1*> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DGESVD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22*                          WORK, LWORK, INFO )
23*
24*       .. Scalar Arguments ..
25*       CHARACTER          JOBU, JOBVT
26*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
27*       ..
28*       .. Array Arguments ..
29*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
30*      $                   VT( LDVT, * ), WORK( * )
31*       ..
32*
33*
34*> \par Purpose:
35*  =============
36*>
37*> \verbatim
38*>
39*> DGESVD computes the singular value decomposition (SVD) of a real
40*> M-by-N matrix A, optionally computing the left and/or right singular
41*> vectors. The SVD is written
42*>
43*>      A = U * SIGMA * transpose(V)
44*>
45*> where SIGMA is an M-by-N matrix which is zero except for its
46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47*> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
48*> are the singular values of A; they are real and non-negative, and
49*> are returned in descending order.  The first min(m,n) columns of
50*> U and V are the left and right singular vectors of A.
51*>
52*> Note that the routine returns V**T, not V.
53*> \endverbatim
54*
55*  Arguments:
56*  ==========
57*
58*> \param[in] JOBU
59*> \verbatim
60*>          JOBU is CHARACTER*1
61*>          Specifies options for computing all or part of the matrix U:
62*>          = 'A':  all M columns of U are returned in array U:
63*>          = 'S':  the first min(m,n) columns of U (the left singular
64*>                  vectors) are returned in the array U;
65*>          = 'O':  the first min(m,n) columns of U (the left singular
66*>                  vectors) are overwritten on the array A;
67*>          = 'N':  no columns of U (no left singular vectors) are
68*>                  computed.
69*> \endverbatim
70*>
71*> \param[in] JOBVT
72*> \verbatim
73*>          JOBVT is CHARACTER*1
74*>          Specifies options for computing all or part of the matrix
75*>          V**T:
76*>          = 'A':  all N rows of V**T are returned in the array VT;
77*>          = 'S':  the first min(m,n) rows of V**T (the right singular
78*>                  vectors) are returned in the array VT;
79*>          = 'O':  the first min(m,n) rows of V**T (the right singular
80*>                  vectors) are overwritten on the array A;
81*>          = 'N':  no rows of V**T (no right singular vectors) are
82*>                  computed.
83*>
84*>          JOBVT and JOBU cannot both be 'O'.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*>          M is INTEGER
90*>          The number of rows of the input matrix A.  M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*>          N is INTEGER
96*>          The number of columns of the input matrix A.  N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*>          A is DOUBLE PRECISION array, dimension (LDA,N)
102*>          On entry, the M-by-N matrix A.
103*>          On exit,
104*>          if JOBU = 'O',  A is overwritten with the first min(m,n)
105*>                          columns of U (the left singular vectors,
106*>                          stored columnwise);
107*>          if JOBVT = 'O', A is overwritten with the first min(m,n)
108*>                          rows of V**T (the right singular vectors,
109*>                          stored rowwise);
110*>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111*>                          are destroyed.
112*> \endverbatim
113*>
114*> \param[in] LDA
115*> \verbatim
116*>          LDA is INTEGER
117*>          The leading dimension of the array A.  LDA >= max(1,M).
118*> \endverbatim
119*>
120*> \param[out] S
121*> \verbatim
122*>          S is DOUBLE PRECISION array, dimension (min(M,N))
123*>          The singular values of A, sorted so that S(i) >= S(i+1).
124*> \endverbatim
125*>
126*> \param[out] U
127*> \verbatim
128*>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129*>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130*>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131*>          if JOBU = 'S', U contains the first min(m,n) columns of U
132*>          (the left singular vectors, stored columnwise);
133*>          if JOBU = 'N' or 'O', U is not referenced.
134*> \endverbatim
135*>
136*> \param[in] LDU
137*> \verbatim
138*>          LDU is INTEGER
139*>          The leading dimension of the array U.  LDU >= 1; if
140*>          JOBU = 'S' or 'A', LDU >= M.
141*> \endverbatim
142*>
143*> \param[out] VT
144*> \verbatim
145*>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
146*>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147*>          V**T;
148*>          if JOBVT = 'S', VT contains the first min(m,n) rows of
149*>          V**T (the right singular vectors, stored rowwise);
150*>          if JOBVT = 'N' or 'O', VT is not referenced.
151*> \endverbatim
152*>
153*> \param[in] LDVT
154*> \verbatim
155*>          LDVT is INTEGER
156*>          The leading dimension of the array VT.  LDVT >= 1; if
157*>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158*> \endverbatim
159*>
160*> \param[out] WORK
161*> \verbatim
162*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164*>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165*>          superdiagonal elements of an upper bidiagonal matrix B
166*>          whose diagonal is in S (not necessarily sorted). B
167*>          satisfies A = U * B * VT, so it has the same singular values
168*>          as A, and singular vectors related by U and VT.
169*> \endverbatim
170*>
171*> \param[in] LWORK
172*> \verbatim
173*>          LWORK is INTEGER
174*>          The dimension of the array WORK.
175*>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176*>             - PATH 1  (M much larger than N, JOBU='N')
177*>             - PATH 1t (N much larger than M, JOBVT='N')
178*>          LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
179*>          For good performance, LWORK should generally be larger.
180*>
181*>          If LWORK = -1, then a workspace query is assumed; the routine
182*>          only calculates the optimal size of the WORK array, returns
183*>          this value as the first entry of the WORK array, and no error
184*>          message related to LWORK is issued by XERBLA.
185*> \endverbatim
186*>
187*> \param[out] INFO
188*> \verbatim
189*>          INFO is INTEGER
190*>          = 0:  successful exit.
191*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
192*>          > 0:  if DBDSQR did not converge, INFO specifies how many
193*>                superdiagonals of an intermediate bidiagonal form B
194*>                did not converge to zero. See the description of WORK
195*>                above for details.
196*> \endverbatim
197*
198*  Authors:
199*  ========
200*
201*> \author Univ. of Tennessee
202*> \author Univ. of California Berkeley
203*> \author Univ. of Colorado Denver
204*> \author NAG Ltd.
205*
206*> \date April 2012
207*
208*> \ingroup doubleGEsing
209*
210*  =====================================================================
211      SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
212     $                   VT, LDVT, WORK, LWORK, INFO )
213*
214*  -- LAPACK driver routine (version 3.7.0) --
215*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
216*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217*     April 2012
218*
219*     .. Scalar Arguments ..
220      CHARACTER          JOBU, JOBVT
221      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
222*     ..
223*     .. Array Arguments ..
224      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
225     $                   VT( LDVT, * ), WORK( * )
226*     ..
227*
228*  =====================================================================
229*
230*     .. Parameters ..
231      DOUBLE PRECISION   ZERO, ONE
232      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
233*     ..
234*     .. Local Scalars ..
235      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
236     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
237      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
238     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
239     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
240     $                   NRVT, WRKBL
241      INTEGER            LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
242     $                   LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
243     $                   LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
244      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
245*     ..
246*     .. Local Arrays ..
247      DOUBLE PRECISION   DUM( 1 )
248*     ..
249*     .. External Subroutines ..
250      EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
251     $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
252     $                   XERBLA
253*     ..
254*     .. External Functions ..
255      LOGICAL            LSAME
256      INTEGER            ILAENV
257      DOUBLE PRECISION   DLAMCH, DLANGE
258      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
259*     ..
260*     .. Intrinsic Functions ..
261      INTRINSIC          MAX, MIN, SQRT
262*     ..
263*     .. Executable Statements ..
264*
265*     Test the input arguments
266*
267      INFO = 0
268      MINMN = MIN( M, N )
269      WNTUA = LSAME( JOBU, 'A' )
270      WNTUS = LSAME( JOBU, 'S' )
271      WNTUAS = WNTUA .OR. WNTUS
272      WNTUO = LSAME( JOBU, 'O' )
273      WNTUN = LSAME( JOBU, 'N' )
274      WNTVA = LSAME( JOBVT, 'A' )
275      WNTVS = LSAME( JOBVT, 'S' )
276      WNTVAS = WNTVA .OR. WNTVS
277      WNTVO = LSAME( JOBVT, 'O' )
278      WNTVN = LSAME( JOBVT, 'N' )
279      LQUERY = ( LWORK.EQ.-1 )
280*
281      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
282         INFO = -1
283      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
284     $         ( WNTVO .AND. WNTUO ) ) THEN
285         INFO = -2
286      ELSE IF( M.LT.0 ) THEN
287         INFO = -3
288      ELSE IF( N.LT.0 ) THEN
289         INFO = -4
290      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
291         INFO = -6
292      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
293         INFO = -9
294      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
295     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
296         INFO = -11
297      END IF
298*
299*     Compute workspace
300*      (Note: Comments in the code beginning "Workspace:" describe the
301*       minimal amount of workspace needed at that point in the code,
302*       as well as the preferred amount for good performance.
303*       NB refers to the optimal block size for the immediately
304*       following subroutine, as returned by ILAENV.)
305*
306      IF( INFO.EQ.0 ) THEN
307         MINWRK = 1
308         MAXWRK = 1
309         IF( M.GE.N .AND. MINMN.GT.0 ) THEN
310*
311*           Compute space needed for DBDSQR
312*
313            MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
314            BDSPAC = 5*N
315*           Compute space needed for DGEQRF
316            CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
317            LWORK_DGEQRF = INT( DUM(1) )
318*           Compute space needed for DORGQR
319            CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
320            LWORK_DORGQR_N = INT( DUM(1) )
321            CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
322            LWORK_DORGQR_M = INT( DUM(1) )
323*           Compute space needed for DGEBRD
324            CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
325     $                   DUM(1), DUM(1), -1, IERR )
326            LWORK_DGEBRD = INT( DUM(1) )
327*           Compute space needed for DORGBR P
328            CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
329     $                   DUM(1), -1, IERR )
330            LWORK_DORGBR_P = INT( DUM(1) )
331*           Compute space needed for DORGBR Q
332            CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1),
333     $                   DUM(1), -1, IERR )
334            LWORK_DORGBR_Q = INT( DUM(1) )
335*
336            IF( M.GE.MNTHR ) THEN
337               IF( WNTUN ) THEN
338*
339*                 Path 1 (M much larger than N, JOBU='N')
340*
341                  MAXWRK = N + LWORK_DGEQRF
342                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
343                  IF( WNTVO .OR. WNTVAS )
344     $               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
345                  MAXWRK = MAX( MAXWRK, BDSPAC )
346                  MINWRK = MAX( 4*N, BDSPAC )
347               ELSE IF( WNTUO .AND. WNTVN ) THEN
348*
349*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350*
351                  WRKBL = N + LWORK_DGEQRF
352                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
353                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
354                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
355                  WRKBL = MAX( WRKBL, BDSPAC )
356                  MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
357                  MINWRK = MAX( 3*N + M, BDSPAC )
358               ELSE IF( WNTUO .AND. WNTVAS ) THEN
359*
360*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361*                 'A')
362*
363                  WRKBL = N + LWORK_DGEQRF
364                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
365                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
366                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
367                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
368                  WRKBL = MAX( WRKBL, BDSPAC )
369                  MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
370                  MINWRK = MAX( 3*N + M, BDSPAC )
371               ELSE IF( WNTUS .AND. WNTVN ) THEN
372*
373*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374*
375                  WRKBL = N + LWORK_DGEQRF
376                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
377                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
378                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
379                  WRKBL = MAX( WRKBL, BDSPAC )
380                  MAXWRK = N*N + WRKBL
381                  MINWRK = MAX( 3*N + M, BDSPAC )
382               ELSE IF( WNTUS .AND. WNTVO ) THEN
383*
384*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385*
386                  WRKBL = N + LWORK_DGEQRF
387                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
388                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
389                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
390                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
391                  WRKBL = MAX( WRKBL, BDSPAC )
392                  MAXWRK = 2*N*N + WRKBL
393                  MINWRK = MAX( 3*N + M, BDSPAC )
394               ELSE IF( WNTUS .AND. WNTVAS ) THEN
395*
396*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397*                 'A')
398*
399                  WRKBL = N + LWORK_DGEQRF
400                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
401                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
402                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
403                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
404                  WRKBL = MAX( WRKBL, BDSPAC )
405                  MAXWRK = N*N + WRKBL
406                  MINWRK = MAX( 3*N + M, BDSPAC )
407               ELSE IF( WNTUA .AND. WNTVN ) THEN
408*
409*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410*
411                  WRKBL = N + LWORK_DGEQRF
412                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
413                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
414                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
415                  WRKBL = MAX( WRKBL, BDSPAC )
416                  MAXWRK = N*N + WRKBL
417                  MINWRK = MAX( 3*N + M, BDSPAC )
418               ELSE IF( WNTUA .AND. WNTVO ) THEN
419*
420*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421*
422                  WRKBL = N + LWORK_DGEQRF
423                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
424                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
425                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
426                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
427                  WRKBL = MAX( WRKBL, BDSPAC )
428                  MAXWRK = 2*N*N + WRKBL
429                  MINWRK = MAX( 3*N + M, BDSPAC )
430               ELSE IF( WNTUA .AND. WNTVAS ) THEN
431*
432*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433*                 'A')
434*
435                  WRKBL = N + LWORK_DGEQRF
436                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
437                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
438                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
439                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
440                  WRKBL = MAX( WRKBL, BDSPAC )
441                  MAXWRK = N*N + WRKBL
442                  MINWRK = MAX( 3*N + M, BDSPAC )
443               END IF
444            ELSE
445*
446*              Path 10 (M at least N, but not much larger)
447*
448               CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
449     $                   DUM(1), DUM(1), -1, IERR )
450               LWORK_DGEBRD = INT( DUM(1) )
451               MAXWRK = 3*N + LWORK_DGEBRD
452               IF( WNTUS .OR. WNTUO ) THEN
453                  CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1),
454     $                   DUM(1), -1, IERR )
455                  LWORK_DORGBR_Q = INT( DUM(1) )
456                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
457               END IF
458               IF( WNTUA ) THEN
459                  CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1),
460     $                   DUM(1), -1, IERR )
461                  LWORK_DORGBR_Q = INT( DUM(1) )
462                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
463               END IF
464               IF( .NOT.WNTVN ) THEN
465                 MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
466               END IF
467               MAXWRK = MAX( MAXWRK, BDSPAC )
468               MINWRK = MAX( 3*N + M, BDSPAC )
469            END IF
470         ELSE IF( MINMN.GT.0 ) THEN
471*
472*           Compute space needed for DBDSQR
473*
474            MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
475            BDSPAC = 5*M
476*           Compute space needed for DGELQF
477            CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
478            LWORK_DGELQF = INT( DUM(1) )
479*           Compute space needed for DORGLQ
480            CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
481            LWORK_DORGLQ_N = INT( DUM(1) )
482            CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
483            LWORK_DORGLQ_M = INT( DUM(1) )
484*           Compute space needed for DGEBRD
485            CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
486     $                   DUM(1), DUM(1), -1, IERR )
487            LWORK_DGEBRD = INT( DUM(1) )
488*            Compute space needed for DORGBR P
489            CALL DORGBR( 'P', M, M, M, A, N, DUM(1),
490     $                   DUM(1), -1, IERR )
491            LWORK_DORGBR_P = INT( DUM(1) )
492*           Compute space needed for DORGBR Q
493            CALL DORGBR( 'Q', M, M, M, A, N, DUM(1),
494     $                   DUM(1), -1, IERR )
495            LWORK_DORGBR_Q = INT( DUM(1) )
496            IF( N.GE.MNTHR ) THEN
497               IF( WNTVN ) THEN
498*
499*                 Path 1t(N much larger than M, JOBVT='N')
500*
501                  MAXWRK = M + LWORK_DGELQF
502                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DGEBRD )
503                  IF( WNTUO .OR. WNTUAS )
504     $               MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
505                  MAXWRK = MAX( MAXWRK, BDSPAC )
506                  MINWRK = MAX( 4*M, BDSPAC )
507               ELSE IF( WNTVO .AND. WNTUN ) THEN
508*
509*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510*
511                  WRKBL = M + LWORK_DGELQF
512                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
513                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
514                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
515                  WRKBL = MAX( WRKBL, BDSPAC )
516                  MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
517                  MINWRK = MAX( 3*M + N, BDSPAC )
518               ELSE IF( WNTVO .AND. WNTUAS ) THEN
519*
520*                 Path 3t(N much larger than M, JOBU='S' or 'A',
521*                 JOBVT='O')
522*
523                  WRKBL = M + LWORK_DGELQF
524                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
525                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
526                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
527                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
528                  WRKBL = MAX( WRKBL, BDSPAC )
529                  MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
530                  MINWRK = MAX( 3*M + N, BDSPAC )
531               ELSE IF( WNTVS .AND. WNTUN ) THEN
532*
533*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534*
535                  WRKBL = M + LWORK_DGELQF
536                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
537                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
538                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
539                  WRKBL = MAX( WRKBL, BDSPAC )
540                  MAXWRK = M*M + WRKBL
541                  MINWRK = MAX( 3*M + N, BDSPAC )
542               ELSE IF( WNTVS .AND. WNTUO ) THEN
543*
544*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545*
546                  WRKBL = M + LWORK_DGELQF
547                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
548                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
549                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
550                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
551                  WRKBL = MAX( WRKBL, BDSPAC )
552                  MAXWRK = 2*M*M + WRKBL
553                  MINWRK = MAX( 3*M + N, BDSPAC )
554               ELSE IF( WNTVS .AND. WNTUAS ) THEN
555*
556*                 Path 6t(N much larger than M, JOBU='S' or 'A',
557*                 JOBVT='S')
558*
559                  WRKBL = M + LWORK_DGELQF
560                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
561                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
562                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
563                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
564                  WRKBL = MAX( WRKBL, BDSPAC )
565                  MAXWRK = M*M + WRKBL
566                  MINWRK = MAX( 3*M + N, BDSPAC )
567               ELSE IF( WNTVA .AND. WNTUN ) THEN
568*
569*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
570*
571                  WRKBL = M + LWORK_DGELQF
572                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
573                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
574                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
575                  WRKBL = MAX( WRKBL, BDSPAC )
576                  MAXWRK = M*M + WRKBL
577                  MINWRK = MAX( 3*M + N, BDSPAC )
578               ELSE IF( WNTVA .AND. WNTUO ) THEN
579*
580*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
581*
582                  WRKBL = M + LWORK_DGELQF
583                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
584                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
585                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
586                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
587                  WRKBL = MAX( WRKBL, BDSPAC )
588                  MAXWRK = 2*M*M + WRKBL
589                  MINWRK = MAX( 3*M + N, BDSPAC )
590               ELSE IF( WNTVA .AND. WNTUAS ) THEN
591*
592*                 Path 9t(N much larger than M, JOBU='S' or 'A',
593*                 JOBVT='A')
594*
595                  WRKBL = M + LWORK_DGELQF
596                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
597                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
598                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
599                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
600                  WRKBL = MAX( WRKBL, BDSPAC )
601                  MAXWRK = M*M + WRKBL
602                  MINWRK = MAX( 3*M + N, BDSPAC )
603               END IF
604            ELSE
605*
606*              Path 10t(N greater than M, but not much larger)
607*
608               CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
609     $                   DUM(1), DUM(1), -1, IERR )
610               LWORK_DGEBRD = INT( DUM(1) )
611               MAXWRK = 3*M + LWORK_DGEBRD
612               IF( WNTVS .OR. WNTVO ) THEN
613*                Compute space needed for DORGBR P
614                 CALL DORGBR( 'P', M, N, M, A, N, DUM(1),
615     $                   DUM(1), -1, IERR )
616                 LWORK_DORGBR_P = INT( DUM(1) )
617                 MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
618               END IF
619               IF( WNTVA ) THEN
620                 CALL DORGBR( 'P', N, N, M, A, N, DUM(1),
621     $                   DUM(1), -1, IERR )
622                 LWORK_DORGBR_P = INT( DUM(1) )
623                 MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
624               END IF
625               IF( .NOT.WNTUN ) THEN
626                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
627               END IF
628               MAXWRK = MAX( MAXWRK, BDSPAC )
629               MINWRK = MAX( 3*M + N, BDSPAC )
630            END IF
631         END IF
632         MAXWRK = MAX( MAXWRK, MINWRK )
633         WORK( 1 ) = MAXWRK
634*
635         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
636            INFO = -13
637         END IF
638      END IF
639*
640      IF( INFO.NE.0 ) THEN
641         CALL XERBLA( 'DGESVD', -INFO )
642         RETURN
643      ELSE IF( LQUERY ) THEN
644         RETURN
645      END IF
646*
647*     Quick return if possible
648*
649      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
650         RETURN
651      END IF
652*
653*     Get machine constants
654*
655      EPS = DLAMCH( 'P' )
656      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
657      BIGNUM = ONE / SMLNUM
658*
659*     Scale A if max element outside range [SMLNUM,BIGNUM]
660*
661      ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
662      ISCL = 0
663      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
664         ISCL = 1
665         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
666      ELSE IF( ANRM.GT.BIGNUM ) THEN
667         ISCL = 1
668         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
669      END IF
670*
671      IF( M.GE.N ) THEN
672*
673*        A has at least as many rows as columns. If A has sufficiently
674*        more rows than columns, first reduce using the QR
675*        decomposition (if sufficient workspace available)
676*
677         IF( M.GE.MNTHR ) THEN
678*
679            IF( WNTUN ) THEN
680*
681*              Path 1 (M much larger than N, JOBU='N')
682*              No left singular vectors to be computed
683*
684               ITAU = 1
685               IWORK = ITAU + N
686*
687*              Compute A=Q*R
688*              (Workspace: need 2*N, prefer N + N*NB)
689*
690               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
691     $                      LWORK-IWORK+1, IERR )
692*
693*              Zero out below R
694*
695               IF( N .GT. 1 ) THEN
696                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
697     $                         LDA )
698               END IF
699               IE = 1
700               ITAUQ = IE + N
701               ITAUP = ITAUQ + N
702               IWORK = ITAUP + N
703*
704*              Bidiagonalize R in A
705*              (Workspace: need 4*N, prefer 3*N + 2*N*NB)
706*
707               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
708     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
709     $                      IERR )
710               NCVT = 0
711               IF( WNTVO .OR. WNTVAS ) THEN
712*
713*                 If right singular vectors desired, generate P'.
714*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
715*
716                  CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
717     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
718                  NCVT = N
719               END IF
720               IWORK = IE + N
721*
722*              Perform bidiagonal QR iteration, computing right
723*              singular vectors of A in A if desired
724*              (Workspace: need BDSPAC)
725*
726               CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
727     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
728*
729*              If right singular vectors desired in VT, copy them there
730*
731               IF( WNTVAS )
732     $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
733*
734            ELSE IF( WNTUO .AND. WNTVN ) THEN
735*
736*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
737*              N left singular vectors to be overwritten on A and
738*              no right singular vectors to be computed
739*
740               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
741*
742*                 Sufficient workspace for a fast algorithm
743*
744                  IR = 1
745                  IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
746*
747*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
748*
749                     LDWRKU = LDA
750                     LDWRKR = LDA
751                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
752*
753*                    WORK(IU) is LDA by N, WORK(IR) is N by N
754*
755                     LDWRKU = LDA
756                     LDWRKR = N
757                  ELSE
758*
759*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
760*
761                     LDWRKU = ( LWORK-N*N-N ) / N
762                     LDWRKR = N
763                  END IF
764                  ITAU = IR + LDWRKR*N
765                  IWORK = ITAU + N
766*
767*                 Compute A=Q*R
768*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
769*
770                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
771     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
772*
773*                 Copy R to WORK(IR) and zero out below it
774*
775                  CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
776                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
777     $                         LDWRKR )
778*
779*                 Generate Q in A
780*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
781*
782                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
783     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
784                  IE = ITAU
785                  ITAUQ = IE + N
786                  ITAUP = ITAUQ + N
787                  IWORK = ITAUP + N
788*
789*                 Bidiagonalize R in WORK(IR)
790*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
791*
792                  CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
793     $                         WORK( ITAUQ ), WORK( ITAUP ),
794     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
795*
796*                 Generate left vectors bidiagonalizing R
797*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
798*
799                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
800     $                         WORK( ITAUQ ), WORK( IWORK ),
801     $                         LWORK-IWORK+1, IERR )
802                  IWORK = IE + N
803*
804*                 Perform bidiagonal QR iteration, computing left
805*                 singular vectors of R in WORK(IR)
806*                 (Workspace: need N*N + BDSPAC)
807*
808                  CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
809     $                         WORK( IR ), LDWRKR, DUM, 1,
810     $                         WORK( IWORK ), INFO )
811                  IU = IE + N
812*
813*                 Multiply Q in A by left singular vectors of R in
814*                 WORK(IR), storing result in WORK(IU) and copying to A
815*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
816*
817                  DO 10 I = 1, M, LDWRKU
818                     CHUNK = MIN( M-I+1, LDWRKU )
819                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
820     $                           LDA, WORK( IR ), LDWRKR, ZERO,
821     $                           WORK( IU ), LDWRKU )
822                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
823     $                            A( I, 1 ), LDA )
824   10             CONTINUE
825*
826               ELSE
827*
828*                 Insufficient workspace for a fast algorithm
829*
830                  IE = 1
831                  ITAUQ = IE + N
832                  ITAUP = ITAUQ + N
833                  IWORK = ITAUP + N
834*
835*                 Bidiagonalize A
836*                 (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
837*
838                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
839     $                         WORK( ITAUQ ), WORK( ITAUP ),
840     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
841*
842*                 Generate left vectors bidiagonalizing A
843*                 (Workspace: need 4*N, prefer 3*N + N*NB)
844*
845                  CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
846     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
847                  IWORK = IE + N
848*
849*                 Perform bidiagonal QR iteration, computing left
850*                 singular vectors of A in A
851*                 (Workspace: need BDSPAC)
852*
853                  CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
854     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
855*
856               END IF
857*
858            ELSE IF( WNTUO .AND. WNTVAS ) THEN
859*
860*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
861*              N left singular vectors to be overwritten on A and
862*              N right singular vectors to be computed in VT
863*
864               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
865*
866*                 Sufficient workspace for a fast algorithm
867*
868                  IR = 1
869                  IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
870*
871*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
872*
873                     LDWRKU = LDA
874                     LDWRKR = LDA
875                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
876*
877*                    WORK(IU) is LDA by N and WORK(IR) is N by N
878*
879                     LDWRKU = LDA
880                     LDWRKR = N
881                  ELSE
882*
883*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
884*
885                     LDWRKU = ( LWORK-N*N-N ) / N
886                     LDWRKR = N
887                  END IF
888                  ITAU = IR + LDWRKR*N
889                  IWORK = ITAU + N
890*
891*                 Compute A=Q*R
892*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
893*
894                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
895     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
896*
897*                 Copy R to VT, zeroing out below it
898*
899                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
900                  IF( N.GT.1 )
901     $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
902     $                            VT( 2, 1 ), LDVT )
903*
904*                 Generate Q in A
905*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
906*
907                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
908     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
909                  IE = ITAU
910                  ITAUQ = IE + N
911                  ITAUP = ITAUQ + N
912                  IWORK = ITAUP + N
913*
914*                 Bidiagonalize R in VT, copying result to WORK(IR)
915*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
916*
917                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
918     $                         WORK( ITAUQ ), WORK( ITAUP ),
919     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
920                  CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
921*
922*                 Generate left vectors bidiagonalizing R in WORK(IR)
923*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
924*
925                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
926     $                         WORK( ITAUQ ), WORK( IWORK ),
927     $                         LWORK-IWORK+1, IERR )
928*
929*                 Generate right vectors bidiagonalizing R in VT
930*                 (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
931*
932                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
933     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
934                  IWORK = IE + N
935*
936*                 Perform bidiagonal QR iteration, computing left
937*                 singular vectors of R in WORK(IR) and computing right
938*                 singular vectors of R in VT
939*                 (Workspace: need N*N + BDSPAC)
940*
941                  CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
942     $                         WORK( IR ), LDWRKR, DUM, 1,
943     $                         WORK( IWORK ), INFO )
944                  IU = IE + N
945*
946*                 Multiply Q in A by left singular vectors of R in
947*                 WORK(IR), storing result in WORK(IU) and copying to A
948*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
949*
950                  DO 20 I = 1, M, LDWRKU
951                     CHUNK = MIN( M-I+1, LDWRKU )
952                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
953     $                           LDA, WORK( IR ), LDWRKR, ZERO,
954     $                           WORK( IU ), LDWRKU )
955                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
956     $                            A( I, 1 ), LDA )
957   20             CONTINUE
958*
959               ELSE
960*
961*                 Insufficient workspace for a fast algorithm
962*
963                  ITAU = 1
964                  IWORK = ITAU + N
965*
966*                 Compute A=Q*R
967*                 (Workspace: need 2*N, prefer N + N*NB)
968*
969                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
970     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
971*
972*                 Copy R to VT, zeroing out below it
973*
974                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
975                  IF( N.GT.1 )
976     $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
977     $                            VT( 2, 1 ), LDVT )
978*
979*                 Generate Q in A
980*                 (Workspace: need 2*N, prefer N + N*NB)
981*
982                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
983     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
984                  IE = ITAU
985                  ITAUQ = IE + N
986                  ITAUP = ITAUQ + N
987                  IWORK = ITAUP + N
988*
989*                 Bidiagonalize R in VT
990*                 (Workspace: need 4*N, prefer 3*N + 2*N*NB)
991*
992                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
993     $                         WORK( ITAUQ ), WORK( ITAUP ),
994     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
995*
996*                 Multiply Q in A by left vectors bidiagonalizing R
997*                 (Workspace: need 3*N + M, prefer 3*N + M*NB)
998*
999                  CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1000     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
1001     $                         LWORK-IWORK+1, IERR )
1002*
1003*                 Generate right vectors bidiagonalizing R in VT
1004*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1005*
1006                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1007     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
1008                  IWORK = IE + N
1009*
1010*                 Perform bidiagonal QR iteration, computing left
1011*                 singular vectors of A in A and computing right
1012*                 singular vectors of A in VT
1013*                 (Workspace: need BDSPAC)
1014*
1015                  CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
1016     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
1017*
1018               END IF
1019*
1020            ELSE IF( WNTUS ) THEN
1021*
1022               IF( WNTVN ) THEN
1023*
1024*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1025*                 N left singular vectors to be computed in U and
1026*                 no right singular vectors to be computed
1027*
1028                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1029*
1030*                    Sufficient workspace for a fast algorithm
1031*
1032                     IR = 1
1033                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1034*
1035*                       WORK(IR) is LDA by N
1036*
1037                        LDWRKR = LDA
1038                     ELSE
1039*
1040*                       WORK(IR) is N by N
1041*
1042                        LDWRKR = N
1043                     END IF
1044                     ITAU = IR + LDWRKR*N
1045                     IWORK = ITAU + N
1046*
1047*                    Compute A=Q*R
1048*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1049*
1050                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1051     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1052*
1053*                    Copy R to WORK(IR), zeroing out below it
1054*
1055                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1056     $                            LDWRKR )
1057                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1058     $                            WORK( IR+1 ), LDWRKR )
1059*
1060*                    Generate Q in A
1061*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1062*
1063                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1064     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1065                     IE = ITAU
1066                     ITAUQ = IE + N
1067                     ITAUP = ITAUQ + N
1068                     IWORK = ITAUP + N
1069*
1070*                    Bidiagonalize R in WORK(IR)
1071*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1072*
1073                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1074     $                            WORK( IE ), WORK( ITAUQ ),
1075     $                            WORK( ITAUP ), WORK( IWORK ),
1076     $                            LWORK-IWORK+1, IERR )
1077*
1078*                    Generate left vectors bidiagonalizing R in WORK(IR)
1079*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1080*
1081                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1082     $                            WORK( ITAUQ ), WORK( IWORK ),
1083     $                            LWORK-IWORK+1, IERR )
1084                     IWORK = IE + N
1085*
1086*                    Perform bidiagonal QR iteration, computing left
1087*                    singular vectors of R in WORK(IR)
1088*                    (Workspace: need N*N + BDSPAC)
1089*
1090                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1091     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1092     $                            WORK( IWORK ), INFO )
1093*
1094*                    Multiply Q in A by left singular vectors of R in
1095*                    WORK(IR), storing result in U
1096*                    (Workspace: need N*N)
1097*
1098                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1099     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1100*
1101                  ELSE
1102*
1103*                    Insufficient workspace for a fast algorithm
1104*
1105                     ITAU = 1
1106                     IWORK = ITAU + N
1107*
1108*                    Compute A=Q*R, copying result to U
1109*                    (Workspace: need 2*N, prefer N + N*NB)
1110*
1111                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1112     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1113                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1114*
1115*                    Generate Q in U
1116*                    (Workspace: need 2*N, prefer N + N*NB)
1117*
1118                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1119     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1120                     IE = ITAU
1121                     ITAUQ = IE + N
1122                     ITAUP = ITAUQ + N
1123                     IWORK = ITAUP + N
1124*
1125*                    Zero out below R in A
1126*
1127                     IF( N .GT. 1 ) THEN
1128                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1129     $                               A( 2, 1 ), LDA )
1130                     END IF
1131*
1132*                    Bidiagonalize R in A
1133*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1134*
1135                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1136     $                            WORK( ITAUQ ), WORK( ITAUP ),
1137     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1138*
1139*                    Multiply Q in U by left vectors bidiagonalizing R
1140*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1141*
1142                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1143     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1144     $                            LWORK-IWORK+1, IERR )
1145                     IWORK = IE + N
1146*
1147*                    Perform bidiagonal QR iteration, computing left
1148*                    singular vectors of A in U
1149*                    (Workspace: need BDSPAC)
1150*
1151                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1152     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1153     $                            INFO )
1154*
1155                  END IF
1156*
1157               ELSE IF( WNTVO ) THEN
1158*
1159*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1160*                 N left singular vectors to be computed in U and
1161*                 N right singular vectors to be overwritten on A
1162*
1163                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1164*
1165*                    Sufficient workspace for a fast algorithm
1166*
1167                     IU = 1
1168                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1169*
1170*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1171*
1172                        LDWRKU = LDA
1173                        IR = IU + LDWRKU*N
1174                        LDWRKR = LDA
1175                     ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
1176*
1177*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1178*
1179                        LDWRKU = LDA
1180                        IR = IU + LDWRKU*N
1181                        LDWRKR = N
1182                     ELSE
1183*
1184*                       WORK(IU) is N by N and WORK(IR) is N by N
1185*
1186                        LDWRKU = N
1187                        IR = IU + LDWRKU*N
1188                        LDWRKR = N
1189                     END IF
1190                     ITAU = IR + LDWRKR*N
1191                     IWORK = ITAU + N
1192*
1193*                    Compute A=Q*R
1194*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1195*
1196                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1197     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1198*
1199*                    Copy R to WORK(IU), zeroing out below it
1200*
1201                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1202     $                            LDWRKU )
1203                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1204     $                            WORK( IU+1 ), LDWRKU )
1205*
1206*                    Generate Q in A
1207*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1208*
1209                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1210     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1211                     IE = ITAU
1212                     ITAUQ = IE + N
1213                     ITAUP = ITAUQ + N
1214                     IWORK = ITAUP + N
1215*
1216*                    Bidiagonalize R in WORK(IU), copying result to
1217*                    WORK(IR)
1218*                    (Workspace: need 2*N*N + 4*N,
1219*                                prefer 2*N*N+3*N+2*N*NB)
1220*
1221                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1222     $                            WORK( IE ), WORK( ITAUQ ),
1223     $                            WORK( ITAUP ), WORK( IWORK ),
1224     $                            LWORK-IWORK+1, IERR )
1225                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1226     $                            WORK( IR ), LDWRKR )
1227*
1228*                    Generate left bidiagonalizing vectors in WORK(IU)
1229*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1230*
1231                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1232     $                            WORK( ITAUQ ), WORK( IWORK ),
1233     $                            LWORK-IWORK+1, IERR )
1234*
1235*                    Generate right bidiagonalizing vectors in WORK(IR)
1236*                    (Workspace: need 2*N*N + 4*N-1,
1237*                                prefer 2*N*N+3*N+(N-1)*NB)
1238*
1239                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1240     $                            WORK( ITAUP ), WORK( IWORK ),
1241     $                            LWORK-IWORK+1, IERR )
1242                     IWORK = IE + N
1243*
1244*                    Perform bidiagonal QR iteration, computing left
1245*                    singular vectors of R in WORK(IU) and computing
1246*                    right singular vectors of R in WORK(IR)
1247*                    (Workspace: need 2*N*N + BDSPAC)
1248*
1249                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1250     $                            WORK( IR ), LDWRKR, WORK( IU ),
1251     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1252*
1253*                    Multiply Q in A by left singular vectors of R in
1254*                    WORK(IU), storing result in U
1255*                    (Workspace: need N*N)
1256*
1257                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1258     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1259*
1260*                    Copy right singular vectors of R to A
1261*                    (Workspace: need N*N)
1262*
1263                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1264     $                            LDA )
1265*
1266                  ELSE
1267*
1268*                    Insufficient workspace for a fast algorithm
1269*
1270                     ITAU = 1
1271                     IWORK = ITAU + N
1272*
1273*                    Compute A=Q*R, copying result to U
1274*                    (Workspace: need 2*N, prefer N + N*NB)
1275*
1276                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1277     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1278                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1279*
1280*                    Generate Q in U
1281*                    (Workspace: need 2*N, prefer N + N*NB)
1282*
1283                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1284     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1285                     IE = ITAU
1286                     ITAUQ = IE + N
1287                     ITAUP = ITAUQ + N
1288                     IWORK = ITAUP + N
1289*
1290*                    Zero out below R in A
1291*
1292                     IF( N .GT. 1 ) THEN
1293                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1294     $                               A( 2, 1 ), LDA )
1295                     END IF
1296*
1297*                    Bidiagonalize R in A
1298*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1299*
1300                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1301     $                            WORK( ITAUQ ), WORK( ITAUP ),
1302     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1303*
1304*                    Multiply Q in U by left vectors bidiagonalizing R
1305*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1306*
1307                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1308     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1309     $                            LWORK-IWORK+1, IERR )
1310*
1311*                    Generate right vectors bidiagonalizing R in A
1312*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1313*
1314                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1315     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1316                     IWORK = IE + N
1317*
1318*                    Perform bidiagonal QR iteration, computing left
1319*                    singular vectors of A in U and computing right
1320*                    singular vectors of A in A
1321*                    (Workspace: need BDSPAC)
1322*
1323                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1324     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1325     $                            INFO )
1326*
1327                  END IF
1328*
1329               ELSE IF( WNTVAS ) THEN
1330*
1331*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1332*                         or 'A')
1333*                 N left singular vectors to be computed in U and
1334*                 N right singular vectors to be computed in VT
1335*
1336                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1337*
1338*                    Sufficient workspace for a fast algorithm
1339*
1340                     IU = 1
1341                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1342*
1343*                       WORK(IU) is LDA by N
1344*
1345                        LDWRKU = LDA
1346                     ELSE
1347*
1348*                       WORK(IU) is N by N
1349*
1350                        LDWRKU = N
1351                     END IF
1352                     ITAU = IU + LDWRKU*N
1353                     IWORK = ITAU + N
1354*
1355*                    Compute A=Q*R
1356*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1357*
1358                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1359     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1360*
1361*                    Copy R to WORK(IU), zeroing out below it
1362*
1363                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1364     $                            LDWRKU )
1365                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1366     $                            WORK( IU+1 ), LDWRKU )
1367*
1368*                    Generate Q in A
1369*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1370*
1371                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1372     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1373                     IE = ITAU
1374                     ITAUQ = IE + N
1375                     ITAUP = ITAUQ + N
1376                     IWORK = ITAUP + N
1377*
1378*                    Bidiagonalize R in WORK(IU), copying result to VT
1379*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1380*
1381                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1382     $                            WORK( IE ), WORK( ITAUQ ),
1383     $                            WORK( ITAUP ), WORK( IWORK ),
1384     $                            LWORK-IWORK+1, IERR )
1385                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1386     $                            LDVT )
1387*
1388*                    Generate left bidiagonalizing vectors in WORK(IU)
1389*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1390*
1391                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1392     $                            WORK( ITAUQ ), WORK( IWORK ),
1393     $                            LWORK-IWORK+1, IERR )
1394*
1395*                    Generate right bidiagonalizing vectors in VT
1396*                    (Workspace: need N*N + 4*N-1,
1397*                                prefer N*N+3*N+(N-1)*NB)
1398*
1399                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1400     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1401                     IWORK = IE + N
1402*
1403*                    Perform bidiagonal QR iteration, computing left
1404*                    singular vectors of R in WORK(IU) and computing
1405*                    right singular vectors of R in VT
1406*                    (Workspace: need N*N + BDSPAC)
1407*
1408                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1409     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1410     $                            WORK( IWORK ), INFO )
1411*
1412*                    Multiply Q in A by left singular vectors of R in
1413*                    WORK(IU), storing result in U
1414*                    (Workspace: need N*N)
1415*
1416                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1417     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1418*
1419                  ELSE
1420*
1421*                    Insufficient workspace for a fast algorithm
1422*
1423                     ITAU = 1
1424                     IWORK = ITAU + N
1425*
1426*                    Compute A=Q*R, copying result to U
1427*                    (Workspace: need 2*N, prefer N + N*NB)
1428*
1429                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1430     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1431                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1432*
1433*                    Generate Q in U
1434*                    (Workspace: need 2*N, prefer N + N*NB)
1435*
1436                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1437     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1438*
1439*                    Copy R to VT, zeroing out below it
1440*
1441                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1442                     IF( N.GT.1 )
1443     $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1444     $                               VT( 2, 1 ), LDVT )
1445                     IE = ITAU
1446                     ITAUQ = IE + N
1447                     ITAUP = ITAUQ + N
1448                     IWORK = ITAUP + N
1449*
1450*                    Bidiagonalize R in VT
1451*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1452*
1453                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1454     $                            WORK( ITAUQ ), WORK( ITAUP ),
1455     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1456*
1457*                    Multiply Q in U by left bidiagonalizing vectors
1458*                    in VT
1459*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1460*
1461                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1462     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1463     $                            LWORK-IWORK+1, IERR )
1464*
1465*                    Generate right bidiagonalizing vectors in VT
1466*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1467*
1468                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1469     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1470                     IWORK = IE + N
1471*
1472*                    Perform bidiagonal QR iteration, computing left
1473*                    singular vectors of A in U and computing right
1474*                    singular vectors of A in VT
1475*                    (Workspace: need BDSPAC)
1476*
1477                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1478     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1479     $                            INFO )
1480*
1481                  END IF
1482*
1483               END IF
1484*
1485            ELSE IF( WNTUA ) THEN
1486*
1487               IF( WNTVN ) THEN
1488*
1489*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1490*                 M left singular vectors to be computed in U and
1491*                 no right singular vectors to be computed
1492*
1493                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1494*
1495*                    Sufficient workspace for a fast algorithm
1496*
1497                     IR = 1
1498                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1499*
1500*                       WORK(IR) is LDA by N
1501*
1502                        LDWRKR = LDA
1503                     ELSE
1504*
1505*                       WORK(IR) is N by N
1506*
1507                        LDWRKR = N
1508                     END IF
1509                     ITAU = IR + LDWRKR*N
1510                     IWORK = ITAU + N
1511*
1512*                    Compute A=Q*R, copying result to U
1513*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1514*
1515                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1516     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1517                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1518*
1519*                    Copy R to WORK(IR), zeroing out below it
1520*
1521                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1522     $                            LDWRKR )
1523                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1524     $                            WORK( IR+1 ), LDWRKR )
1525*
1526*                    Generate Q in U
1527*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1528*
1529                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1530     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1531                     IE = ITAU
1532                     ITAUQ = IE + N
1533                     ITAUP = ITAUQ + N
1534                     IWORK = ITAUP + N
1535*
1536*                    Bidiagonalize R in WORK(IR)
1537*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1538*
1539                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1540     $                            WORK( IE ), WORK( ITAUQ ),
1541     $                            WORK( ITAUP ), WORK( IWORK ),
1542     $                            LWORK-IWORK+1, IERR )
1543*
1544*                    Generate left bidiagonalizing vectors in WORK(IR)
1545*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1546*
1547                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1548     $                            WORK( ITAUQ ), WORK( IWORK ),
1549     $                            LWORK-IWORK+1, IERR )
1550                     IWORK = IE + N
1551*
1552*                    Perform bidiagonal QR iteration, computing left
1553*                    singular vectors of R in WORK(IR)
1554*                    (Workspace: need N*N + BDSPAC)
1555*
1556                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1557     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1558     $                            WORK( IWORK ), INFO )
1559*
1560*                    Multiply Q in U by left singular vectors of R in
1561*                    WORK(IR), storing result in A
1562*                    (Workspace: need N*N)
1563*
1564                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1565     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1566*
1567*                    Copy left singular vectors of A from A to U
1568*
1569                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1570*
1571                  ELSE
1572*
1573*                    Insufficient workspace for a fast algorithm
1574*
1575                     ITAU = 1
1576                     IWORK = ITAU + N
1577*
1578*                    Compute A=Q*R, copying result to U
1579*                    (Workspace: need 2*N, prefer N + N*NB)
1580*
1581                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1582     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1583                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1584*
1585*                    Generate Q in U
1586*                    (Workspace: need N + M, prefer N + M*NB)
1587*
1588                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1589     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1590                     IE = ITAU
1591                     ITAUQ = IE + N
1592                     ITAUP = ITAUQ + N
1593                     IWORK = ITAUP + N
1594*
1595*                    Zero out below R in A
1596*
1597                     IF( N .GT. 1 ) THEN
1598                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1599     $                                A( 2, 1 ), LDA )
1600                     END IF
1601*
1602*                    Bidiagonalize R in A
1603*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1604*
1605                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1606     $                            WORK( ITAUQ ), WORK( ITAUP ),
1607     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1608*
1609*                    Multiply Q in U by left bidiagonalizing vectors
1610*                    in A
1611*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1612*
1613                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1614     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1615     $                            LWORK-IWORK+1, IERR )
1616                     IWORK = IE + N
1617*
1618*                    Perform bidiagonal QR iteration, computing left
1619*                    singular vectors of A in U
1620*                    (Workspace: need BDSPAC)
1621*
1622                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1623     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1624     $                            INFO )
1625*
1626                  END IF
1627*
1628               ELSE IF( WNTVO ) THEN
1629*
1630*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1631*                 M left singular vectors to be computed in U and
1632*                 N right singular vectors to be overwritten on A
1633*
1634                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1635*
1636*                    Sufficient workspace for a fast algorithm
1637*
1638                     IU = 1
1639                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1640*
1641*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1642*
1643                        LDWRKU = LDA
1644                        IR = IU + LDWRKU*N
1645                        LDWRKR = LDA
1646                     ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
1647*
1648*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1649*
1650                        LDWRKU = LDA
1651                        IR = IU + LDWRKU*N
1652                        LDWRKR = N
1653                     ELSE
1654*
1655*                       WORK(IU) is N by N and WORK(IR) is N by N
1656*
1657                        LDWRKU = N
1658                        IR = IU + LDWRKU*N
1659                        LDWRKR = N
1660                     END IF
1661                     ITAU = IR + LDWRKR*N
1662                     IWORK = ITAU + N
1663*
1664*                    Compute A=Q*R, copying result to U
1665*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1666*
1667                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1668     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1669                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1670*
1671*                    Generate Q in U
1672*                    (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1673*
1674                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1675     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1676*
1677*                    Copy R to WORK(IU), zeroing out below it
1678*
1679                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1680     $                            LDWRKU )
1681                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1682     $                            WORK( IU+1 ), LDWRKU )
1683                     IE = ITAU
1684                     ITAUQ = IE + N
1685                     ITAUP = ITAUQ + N
1686                     IWORK = ITAUP + N
1687*
1688*                    Bidiagonalize R in WORK(IU), copying result to
1689*                    WORK(IR)
1690*                    (Workspace: need 2*N*N + 4*N,
1691*                                prefer 2*N*N+3*N+2*N*NB)
1692*
1693                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1694     $                            WORK( IE ), WORK( ITAUQ ),
1695     $                            WORK( ITAUP ), WORK( IWORK ),
1696     $                            LWORK-IWORK+1, IERR )
1697                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1698     $                            WORK( IR ), LDWRKR )
1699*
1700*                    Generate left bidiagonalizing vectors in WORK(IU)
1701*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1702*
1703                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1704     $                            WORK( ITAUQ ), WORK( IWORK ),
1705     $                            LWORK-IWORK+1, IERR )
1706*
1707*                    Generate right bidiagonalizing vectors in WORK(IR)
1708*                    (Workspace: need 2*N*N + 4*N-1,
1709*                                prefer 2*N*N+3*N+(N-1)*NB)
1710*
1711                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1712     $                            WORK( ITAUP ), WORK( IWORK ),
1713     $                            LWORK-IWORK+1, IERR )
1714                     IWORK = IE + N
1715*
1716*                    Perform bidiagonal QR iteration, computing left
1717*                    singular vectors of R in WORK(IU) and computing
1718*                    right singular vectors of R in WORK(IR)
1719*                    (Workspace: need 2*N*N + BDSPAC)
1720*
1721                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1722     $                            WORK( IR ), LDWRKR, WORK( IU ),
1723     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1724*
1725*                    Multiply Q in U by left singular vectors of R in
1726*                    WORK(IU), storing result in A
1727*                    (Workspace: need N*N)
1728*
1729                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1730     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1731*
1732*                    Copy left singular vectors of A from A to U
1733*
1734                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1735*
1736*                    Copy right singular vectors of R from WORK(IR) to A
1737*
1738                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1739     $                            LDA )
1740*
1741                  ELSE
1742*
1743*                    Insufficient workspace for a fast algorithm
1744*
1745                     ITAU = 1
1746                     IWORK = ITAU + N
1747*
1748*                    Compute A=Q*R, copying result to U
1749*                    (Workspace: need 2*N, prefer N + N*NB)
1750*
1751                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1752     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1753                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1754*
1755*                    Generate Q in U
1756*                    (Workspace: need N + M, prefer N + M*NB)
1757*
1758                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1759     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1760                     IE = ITAU
1761                     ITAUQ = IE + N
1762                     ITAUP = ITAUQ + N
1763                     IWORK = ITAUP + N
1764*
1765*                    Zero out below R in A
1766*
1767                     IF( N .GT. 1 ) THEN
1768                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1769     $                                A( 2, 1 ), LDA )
1770                     END IF
1771*
1772*                    Bidiagonalize R in A
1773*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1774*
1775                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1776     $                            WORK( ITAUQ ), WORK( ITAUP ),
1777     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1778*
1779*                    Multiply Q in U by left bidiagonalizing vectors
1780*                    in A
1781*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1782*
1783                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1784     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1785     $                            LWORK-IWORK+1, IERR )
1786*
1787*                    Generate right bidiagonalizing vectors in A
1788*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1789*
1790                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1791     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1792                     IWORK = IE + N
1793*
1794*                    Perform bidiagonal QR iteration, computing left
1795*                    singular vectors of A in U and computing right
1796*                    singular vectors of A in A
1797*                    (Workspace: need BDSPAC)
1798*
1799                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1800     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1801     $                            INFO )
1802*
1803                  END IF
1804*
1805               ELSE IF( WNTVAS ) THEN
1806*
1807*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1808*                         or 'A')
1809*                 M left singular vectors to be computed in U and
1810*                 N right singular vectors to be computed in VT
1811*
1812                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1813*
1814*                    Sufficient workspace for a fast algorithm
1815*
1816                     IU = 1
1817                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1818*
1819*                       WORK(IU) is LDA by N
1820*
1821                        LDWRKU = LDA
1822                     ELSE
1823*
1824*                       WORK(IU) is N by N
1825*
1826                        LDWRKU = N
1827                     END IF
1828                     ITAU = IU + LDWRKU*N
1829                     IWORK = ITAU + N
1830*
1831*                    Compute A=Q*R, copying result to U
1832*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1833*
1834                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1835     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1836                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1837*
1838*                    Generate Q in U
1839*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1840*
1841                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1842     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1843*
1844*                    Copy R to WORK(IU), zeroing out below it
1845*
1846                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1847     $                            LDWRKU )
1848                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1849     $                            WORK( IU+1 ), LDWRKU )
1850                     IE = ITAU
1851                     ITAUQ = IE + N
1852                     ITAUP = ITAUQ + N
1853                     IWORK = ITAUP + N
1854*
1855*                    Bidiagonalize R in WORK(IU), copying result to VT
1856*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1857*
1858                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1859     $                            WORK( IE ), WORK( ITAUQ ),
1860     $                            WORK( ITAUP ), WORK( IWORK ),
1861     $                            LWORK-IWORK+1, IERR )
1862                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1863     $                            LDVT )
1864*
1865*                    Generate left bidiagonalizing vectors in WORK(IU)
1866*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1867*
1868                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1869     $                            WORK( ITAUQ ), WORK( IWORK ),
1870     $                            LWORK-IWORK+1, IERR )
1871*
1872*                    Generate right bidiagonalizing vectors in VT
1873*                    (Workspace: need N*N + 4*N-1,
1874*                                prefer N*N+3*N+(N-1)*NB)
1875*
1876                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1877     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1878                     IWORK = IE + N
1879*
1880*                    Perform bidiagonal QR iteration, computing left
1881*                    singular vectors of R in WORK(IU) and computing
1882*                    right singular vectors of R in VT
1883*                    (Workspace: need N*N + BDSPAC)
1884*
1885                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1886     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1887     $                            WORK( IWORK ), INFO )
1888*
1889*                    Multiply Q in U by left singular vectors of R in
1890*                    WORK(IU), storing result in A
1891*                    (Workspace: need N*N)
1892*
1893                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1894     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1895*
1896*                    Copy left singular vectors of A from A to U
1897*
1898                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1899*
1900                  ELSE
1901*
1902*                    Insufficient workspace for a fast algorithm
1903*
1904                     ITAU = 1
1905                     IWORK = ITAU + N
1906*
1907*                    Compute A=Q*R, copying result to U
1908*                    (Workspace: need 2*N, prefer N + N*NB)
1909*
1910                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1911     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1912                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1913*
1914*                    Generate Q in U
1915*                    (Workspace: need N + M, prefer N + M*NB)
1916*
1917                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1918     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1919*
1920*                    Copy R from A to VT, zeroing out below it
1921*
1922                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1923                     IF( N.GT.1 )
1924     $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1925     $                               VT( 2, 1 ), LDVT )
1926                     IE = ITAU
1927                     ITAUQ = IE + N
1928                     ITAUP = ITAUQ + N
1929                     IWORK = ITAUP + N
1930*
1931*                    Bidiagonalize R in VT
1932*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1933*
1934                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1935     $                            WORK( ITAUQ ), WORK( ITAUP ),
1936     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1937*
1938*                    Multiply Q in U by left bidiagonalizing vectors
1939*                    in VT
1940*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1941*
1942                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1943     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1944     $                            LWORK-IWORK+1, IERR )
1945*
1946*                    Generate right bidiagonalizing vectors in VT
1947*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1948*
1949                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1950     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1951                     IWORK = IE + N
1952*
1953*                    Perform bidiagonal QR iteration, computing left
1954*                    singular vectors of A in U and computing right
1955*                    singular vectors of A in VT
1956*                    (Workspace: need BDSPAC)
1957*
1958                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1959     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1960     $                            INFO )
1961*
1962                  END IF
1963*
1964               END IF
1965*
1966            END IF
1967*
1968         ELSE
1969*
1970*           M .LT. MNTHR
1971*
1972*           Path 10 (M at least N, but not much larger)
1973*           Reduce to bidiagonal form without QR decomposition
1974*
1975            IE = 1
1976            ITAUQ = IE + N
1977            ITAUP = ITAUQ + N
1978            IWORK = ITAUP + N
1979*
1980*           Bidiagonalize A
1981*           (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1982*
1983            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1984     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1985     $                   IERR )
1986            IF( WNTUAS ) THEN
1987*
1988*              If left singular vectors desired in U, copy result to U
1989*              and generate left bidiagonalizing vectors in U
1990*              (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1991*
1992               CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1993               IF( WNTUS )
1994     $            NCU = N
1995               IF( WNTUA )
1996     $            NCU = M
1997               CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1998     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1999            END IF
2000            IF( WNTVAS ) THEN
2001*
2002*              If right singular vectors desired in VT, copy result to
2003*              VT and generate right bidiagonalizing vectors in VT
2004*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2005*
2006               CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
2007               CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2008     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2009            END IF
2010            IF( WNTUO ) THEN
2011*
2012*              If left singular vectors desired in A, generate left
2013*              bidiagonalizing vectors in A
2014*              (Workspace: need 4*N, prefer 3*N + N*NB)
2015*
2016               CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2017     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2018            END IF
2019            IF( WNTVO ) THEN
2020*
2021*              If right singular vectors desired in A, generate right
2022*              bidiagonalizing vectors in A
2023*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2024*
2025               CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2026     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2027            END IF
2028            IWORK = IE + N
2029            IF( WNTUAS .OR. WNTUO )
2030     $         NRU = M
2031            IF( WNTUN )
2032     $         NRU = 0
2033            IF( WNTVAS .OR. WNTVO )
2034     $         NCVT = N
2035            IF( WNTVN )
2036     $         NCVT = 0
2037            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2038*
2039*              Perform bidiagonal QR iteration, if desired, computing
2040*              left singular vectors in U and computing right singular
2041*              vectors in VT
2042*              (Workspace: need BDSPAC)
2043*
2044               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2045     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
2046            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2047*
2048*              Perform bidiagonal QR iteration, if desired, computing
2049*              left singular vectors in U and computing right singular
2050*              vectors in A
2051*              (Workspace: need BDSPAC)
2052*
2053               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
2054     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
2055            ELSE
2056*
2057*              Perform bidiagonal QR iteration, if desired, computing
2058*              left singular vectors in A and computing right singular
2059*              vectors in VT
2060*              (Workspace: need BDSPAC)
2061*
2062               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2063     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
2064            END IF
2065*
2066         END IF
2067*
2068      ELSE
2069*
2070*        A has more columns than rows. If A has sufficiently more
2071*        columns than rows, first reduce using the LQ decomposition (if
2072*        sufficient workspace available)
2073*
2074         IF( N.GE.MNTHR ) THEN
2075*
2076            IF( WNTVN ) THEN
2077*
2078*              Path 1t(N much larger than M, JOBVT='N')
2079*              No right singular vectors to be computed
2080*
2081               ITAU = 1
2082               IWORK = ITAU + M
2083*
2084*              Compute A=L*Q
2085*              (Workspace: need 2*M, prefer M + M*NB)
2086*
2087               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2088     $                      LWORK-IWORK+1, IERR )
2089*
2090*              Zero out above L
2091*
2092               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
2093               IE = 1
2094               ITAUQ = IE + M
2095               ITAUP = ITAUQ + M
2096               IWORK = ITAUP + M
2097*
2098*              Bidiagonalize L in A
2099*              (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2100*
2101               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2102     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2103     $                      IERR )
2104               IF( WNTUO .OR. WNTUAS ) THEN
2105*
2106*                 If left singular vectors desired, generate Q
2107*                 (Workspace: need 4*M, prefer 3*M + M*NB)
2108*
2109                  CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2110     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2111               END IF
2112               IWORK = IE + M
2113               NRU = 0
2114               IF( WNTUO .OR. WNTUAS )
2115     $            NRU = M
2116*
2117*              Perform bidiagonal QR iteration, computing left singular
2118*              vectors of A in A if desired
2119*              (Workspace: need BDSPAC)
2120*
2121               CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2122     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2123*
2124*              If left singular vectors desired in U, copy them there
2125*
2126               IF( WNTUAS )
2127     $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2128*
2129            ELSE IF( WNTVO .AND. WNTUN ) THEN
2130*
2131*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2132*              M right singular vectors to be overwritten on A and
2133*              no left singular vectors to be computed
2134*
2135               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2136*
2137*                 Sufficient workspace for a fast algorithm
2138*
2139                  IR = 1
2140                  IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
2141*
2142*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2143*
2144                     LDWRKU = LDA
2145                     CHUNK = N
2146                     LDWRKR = LDA
2147                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
2148*
2149*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2150*
2151                     LDWRKU = LDA
2152                     CHUNK = N
2153                     LDWRKR = M
2154                  ELSE
2155*
2156*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2157*
2158                     LDWRKU = M
2159                     CHUNK = ( LWORK-M*M-M ) / M
2160                     LDWRKR = M
2161                  END IF
2162                  ITAU = IR + LDWRKR*M
2163                  IWORK = ITAU + M
2164*
2165*                 Compute A=L*Q
2166*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2167*
2168                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2169     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2170*
2171*                 Copy L to WORK(IR) and zero out above it
2172*
2173                  CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2174                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2175     $                         WORK( IR+LDWRKR ), LDWRKR )
2176*
2177*                 Generate Q in A
2178*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2179*
2180                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2181     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2182                  IE = ITAU
2183                  ITAUQ = IE + M
2184                  ITAUP = ITAUQ + M
2185                  IWORK = ITAUP + M
2186*
2187*                 Bidiagonalize L in WORK(IR)
2188*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2189*
2190                  CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2191     $                         WORK( ITAUQ ), WORK( ITAUP ),
2192     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2193*
2194*                 Generate right vectors bidiagonalizing L
2195*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2196*
2197                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2198     $                         WORK( ITAUP ), WORK( IWORK ),
2199     $                         LWORK-IWORK+1, IERR )
2200                  IWORK = IE + M
2201*
2202*                 Perform bidiagonal QR iteration, computing right
2203*                 singular vectors of L in WORK(IR)
2204*                 (Workspace: need M*M + BDSPAC)
2205*
2206                  CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2207     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2208     $                         WORK( IWORK ), INFO )
2209                  IU = IE + M
2210*
2211*                 Multiply right singular vectors of L in WORK(IR) by Q
2212*                 in A, storing result in WORK(IU) and copying to A
2213*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2214*
2215                  DO 30 I = 1, N, CHUNK
2216                     BLK = MIN( N-I+1, CHUNK )
2217                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2218     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2219     $                           WORK( IU ), LDWRKU )
2220                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2221     $                            A( 1, I ), LDA )
2222   30             CONTINUE
2223*
2224               ELSE
2225*
2226*                 Insufficient workspace for a fast algorithm
2227*
2228                  IE = 1
2229                  ITAUQ = IE + M
2230                  ITAUP = ITAUQ + M
2231                  IWORK = ITAUP + M
2232*
2233*                 Bidiagonalize A
2234*                 (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2235*
2236                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2237     $                         WORK( ITAUQ ), WORK( ITAUP ),
2238     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2239*
2240*                 Generate right vectors bidiagonalizing A
2241*                 (Workspace: need 4*M, prefer 3*M + M*NB)
2242*
2243                  CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2244     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2245                  IWORK = IE + M
2246*
2247*                 Perform bidiagonal QR iteration, computing right
2248*                 singular vectors of A in A
2249*                 (Workspace: need BDSPAC)
2250*
2251                  CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2252     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2253*
2254               END IF
2255*
2256            ELSE IF( WNTVO .AND. WNTUAS ) THEN
2257*
2258*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2259*              M right singular vectors to be overwritten on A and
2260*              M left singular vectors to be computed in U
2261*
2262               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2263*
2264*                 Sufficient workspace for a fast algorithm
2265*
2266                  IR = 1
2267                  IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
2268*
2269*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2270*
2271                     LDWRKU = LDA
2272                     CHUNK = N
2273                     LDWRKR = LDA
2274                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
2275*
2276*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2277*
2278                     LDWRKU = LDA
2279                     CHUNK = N
2280                     LDWRKR = M
2281                  ELSE
2282*
2283*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2284*
2285                     LDWRKU = M
2286                     CHUNK = ( LWORK-M*M-M ) / M
2287                     LDWRKR = M
2288                  END IF
2289                  ITAU = IR + LDWRKR*M
2290                  IWORK = ITAU + M
2291*
2292*                 Compute A=L*Q
2293*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2294*
2295                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2296     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2297*
2298*                 Copy L to U, zeroing about above it
2299*
2300                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2301                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2302     $                         LDU )
2303*
2304*                 Generate Q in A
2305*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2306*
2307                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2308     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2309                  IE = ITAU
2310                  ITAUQ = IE + M
2311                  ITAUP = ITAUQ + M
2312                  IWORK = ITAUP + M
2313*
2314*                 Bidiagonalize L in U, copying result to WORK(IR)
2315*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2316*
2317                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2318     $                         WORK( ITAUQ ), WORK( ITAUP ),
2319     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2320                  CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2321*
2322*                 Generate right vectors bidiagonalizing L in WORK(IR)
2323*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2324*
2325                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2326     $                         WORK( ITAUP ), WORK( IWORK ),
2327     $                         LWORK-IWORK+1, IERR )
2328*
2329*                 Generate left vectors bidiagonalizing L in U
2330*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2331*
2332                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2333     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2334                  IWORK = IE + M
2335*
2336*                 Perform bidiagonal QR iteration, computing left
2337*                 singular vectors of L in U, and computing right
2338*                 singular vectors of L in WORK(IR)
2339*                 (Workspace: need M*M + BDSPAC)
2340*
2341                  CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2342     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2343     $                         WORK( IWORK ), INFO )
2344                  IU = IE + M
2345*
2346*                 Multiply right singular vectors of L in WORK(IR) by Q
2347*                 in A, storing result in WORK(IU) and copying to A
2348*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2349*
2350                  DO 40 I = 1, N, CHUNK
2351                     BLK = MIN( N-I+1, CHUNK )
2352                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2353     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2354     $                           WORK( IU ), LDWRKU )
2355                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2356     $                            A( 1, I ), LDA )
2357   40             CONTINUE
2358*
2359               ELSE
2360*
2361*                 Insufficient workspace for a fast algorithm
2362*
2363                  ITAU = 1
2364                  IWORK = ITAU + M
2365*
2366*                 Compute A=L*Q
2367*                 (Workspace: need 2*M, prefer M + M*NB)
2368*
2369                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2370     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2371*
2372*                 Copy L to U, zeroing out above it
2373*
2374                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2375                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2376     $                         LDU )
2377*
2378*                 Generate Q in A
2379*                 (Workspace: need 2*M, prefer M + M*NB)
2380*
2381                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2382     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2383                  IE = ITAU
2384                  ITAUQ = IE + M
2385                  ITAUP = ITAUQ + M
2386                  IWORK = ITAUP + M
2387*
2388*                 Bidiagonalize L in U
2389*                 (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2390*
2391                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2392     $                         WORK( ITAUQ ), WORK( ITAUP ),
2393     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2394*
2395*                 Multiply right vectors bidiagonalizing L by Q in A
2396*                 (Workspace: need 3*M + N, prefer 3*M + N*NB)
2397*
2398                  CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2399     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2400     $                         LWORK-IWORK+1, IERR )
2401*
2402*                 Generate left vectors bidiagonalizing L in U
2403*                 (Workspace: need 4*M, prefer 3*M + M*NB)
2404*
2405                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2406     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2407                  IWORK = IE + M
2408*
2409*                 Perform bidiagonal QR iteration, computing left
2410*                 singular vectors of A in U and computing right
2411*                 singular vectors of A in A
2412*                 (Workspace: need BDSPAC)
2413*
2414                  CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2415     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2416*
2417               END IF
2418*
2419            ELSE IF( WNTVS ) THEN
2420*
2421               IF( WNTUN ) THEN
2422*
2423*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2424*                 M right singular vectors to be computed in VT and
2425*                 no left singular vectors to be computed
2426*
2427                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2428*
2429*                    Sufficient workspace for a fast algorithm
2430*
2431                     IR = 1
2432                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2433*
2434*                       WORK(IR) is LDA by M
2435*
2436                        LDWRKR = LDA
2437                     ELSE
2438*
2439*                       WORK(IR) is M by M
2440*
2441                        LDWRKR = M
2442                     END IF
2443                     ITAU = IR + LDWRKR*M
2444                     IWORK = ITAU + M
2445*
2446*                    Compute A=L*Q
2447*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2448*
2449                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2450     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2451*
2452*                    Copy L to WORK(IR), zeroing out above it
2453*
2454                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2455     $                            LDWRKR )
2456                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2457     $                            WORK( IR+LDWRKR ), LDWRKR )
2458*
2459*                    Generate Q in A
2460*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2461*
2462                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2463     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2464                     IE = ITAU
2465                     ITAUQ = IE + M
2466                     ITAUP = ITAUQ + M
2467                     IWORK = ITAUP + M
2468*
2469*                    Bidiagonalize L in WORK(IR)
2470*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2471*
2472                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2473     $                            WORK( IE ), WORK( ITAUQ ),
2474     $                            WORK( ITAUP ), WORK( IWORK ),
2475     $                            LWORK-IWORK+1, IERR )
2476*
2477*                    Generate right vectors bidiagonalizing L in
2478*                    WORK(IR)
2479*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2480*
2481                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2482     $                            WORK( ITAUP ), WORK( IWORK ),
2483     $                            LWORK-IWORK+1, IERR )
2484                     IWORK = IE + M
2485*
2486*                    Perform bidiagonal QR iteration, computing right
2487*                    singular vectors of L in WORK(IR)
2488*                    (Workspace: need M*M + BDSPAC)
2489*
2490                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2491     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2492     $                            WORK( IWORK ), INFO )
2493*
2494*                    Multiply right singular vectors of L in WORK(IR) by
2495*                    Q in A, storing result in VT
2496*                    (Workspace: need M*M)
2497*
2498                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2499     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2500*
2501                  ELSE
2502*
2503*                    Insufficient workspace for a fast algorithm
2504*
2505                     ITAU = 1
2506                     IWORK = ITAU + M
2507*
2508*                    Compute A=L*Q
2509*                    (Workspace: need 2*M, prefer M + M*NB)
2510*
2511                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2512     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2513*
2514*                    Copy result to VT
2515*
2516                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2517*
2518*                    Generate Q in VT
2519*                    (Workspace: need 2*M, prefer M + M*NB)
2520*
2521                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2522     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2523                     IE = ITAU
2524                     ITAUQ = IE + M
2525                     ITAUP = ITAUQ + M
2526                     IWORK = ITAUP + M
2527*
2528*                    Zero out above L in A
2529*
2530                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2531     $                            LDA )
2532*
2533*                    Bidiagonalize L in A
2534*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2535*
2536                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2537     $                            WORK( ITAUQ ), WORK( ITAUP ),
2538     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2539*
2540*                    Multiply right vectors bidiagonalizing L by Q in VT
2541*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
2542*
2543                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2544     $                            WORK( ITAUP ), VT, LDVT,
2545     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2546                     IWORK = IE + M
2547*
2548*                    Perform bidiagonal QR iteration, computing right
2549*                    singular vectors of A in VT
2550*                    (Workspace: need BDSPAC)
2551*
2552                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2553     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2554     $                            INFO )
2555*
2556                  END IF
2557*
2558               ELSE IF( WNTUO ) THEN
2559*
2560*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2561*                 M right singular vectors to be computed in VT and
2562*                 M left singular vectors to be overwritten on A
2563*
2564                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2565*
2566*                    Sufficient workspace for a fast algorithm
2567*
2568                     IU = 1
2569                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2570*
2571*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2572*
2573                        LDWRKU = LDA
2574                        IR = IU + LDWRKU*M
2575                        LDWRKR = LDA
2576                     ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
2577*
2578*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2579*
2580                        LDWRKU = LDA
2581                        IR = IU + LDWRKU*M
2582                        LDWRKR = M
2583                     ELSE
2584*
2585*                       WORK(IU) is M by M and WORK(IR) is M by M
2586*
2587                        LDWRKU = M
2588                        IR = IU + LDWRKU*M
2589                        LDWRKR = M
2590                     END IF
2591                     ITAU = IR + LDWRKR*M
2592                     IWORK = ITAU + M
2593*
2594*                    Compute A=L*Q
2595*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2596*
2597                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2598     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2599*
2600*                    Copy L to WORK(IU), zeroing out below it
2601*
2602                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2603     $                            LDWRKU )
2604                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2605     $                            WORK( IU+LDWRKU ), LDWRKU )
2606*
2607*                    Generate Q in A
2608*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2609*
2610                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2611     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2612                     IE = ITAU
2613                     ITAUQ = IE + M
2614                     ITAUP = ITAUQ + M
2615                     IWORK = ITAUP + M
2616*
2617*                    Bidiagonalize L in WORK(IU), copying result to
2618*                    WORK(IR)
2619*                    (Workspace: need 2*M*M + 4*M,
2620*                                prefer 2*M*M+3*M+2*M*NB)
2621*
2622                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2623     $                            WORK( IE ), WORK( ITAUQ ),
2624     $                            WORK( ITAUP ), WORK( IWORK ),
2625     $                            LWORK-IWORK+1, IERR )
2626                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2627     $                            WORK( IR ), LDWRKR )
2628*
2629*                    Generate right bidiagonalizing vectors in WORK(IU)
2630*                    (Workspace: need 2*M*M + 4*M-1,
2631*                                prefer 2*M*M+3*M+(M-1)*NB)
2632*
2633                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2634     $                            WORK( ITAUP ), WORK( IWORK ),
2635     $                            LWORK-IWORK+1, IERR )
2636*
2637*                    Generate left bidiagonalizing vectors in WORK(IR)
2638*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2639*
2640                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2641     $                            WORK( ITAUQ ), WORK( IWORK ),
2642     $                            LWORK-IWORK+1, IERR )
2643                     IWORK = IE + M
2644*
2645*                    Perform bidiagonal QR iteration, computing left
2646*                    singular vectors of L in WORK(IR) and computing
2647*                    right singular vectors of L in WORK(IU)
2648*                    (Workspace: need 2*M*M + BDSPAC)
2649*
2650                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2651     $                            WORK( IU ), LDWRKU, WORK( IR ),
2652     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2653*
2654*                    Multiply right singular vectors of L in WORK(IU) by
2655*                    Q in A, storing result in VT
2656*                    (Workspace: need M*M)
2657*
2658                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2659     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2660*
2661*                    Copy left singular vectors of L to A
2662*                    (Workspace: need M*M)
2663*
2664                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2665     $                            LDA )
2666*
2667                  ELSE
2668*
2669*                    Insufficient workspace for a fast algorithm
2670*
2671                     ITAU = 1
2672                     IWORK = ITAU + M
2673*
2674*                    Compute A=L*Q, copying result to VT
2675*                    (Workspace: need 2*M, prefer M + M*NB)
2676*
2677                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2678     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2679                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2680*
2681*                    Generate Q in VT
2682*                    (Workspace: need 2*M, prefer M + M*NB)
2683*
2684                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2685     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2686                     IE = ITAU
2687                     ITAUQ = IE + M
2688                     ITAUP = ITAUQ + M
2689                     IWORK = ITAUP + M
2690*
2691*                    Zero out above L in A
2692*
2693                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2694     $                            LDA )
2695*
2696*                    Bidiagonalize L in A
2697*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2698*
2699                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2700     $                            WORK( ITAUQ ), WORK( ITAUP ),
2701     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2702*
2703*                    Multiply right vectors bidiagonalizing L by Q in VT
2704*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
2705*
2706                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2707     $                            WORK( ITAUP ), VT, LDVT,
2708     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2709*
2710*                    Generate left bidiagonalizing vectors of L in A
2711*                    (Workspace: need 4*M, prefer 3*M + M*NB)
2712*
2713                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2714     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2715                     IWORK = IE + M
2716*
2717*                    Perform bidiagonal QR iteration, compute left
2718*                    singular vectors of A in A and compute right
2719*                    singular vectors of A in VT
2720*                    (Workspace: need BDSPAC)
2721*
2722                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2723     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2724     $                            INFO )
2725*
2726                  END IF
2727*
2728               ELSE IF( WNTUAS ) THEN
2729*
2730*                 Path 6t(N much larger than M, JOBU='S' or 'A',
2731*                         JOBVT='S')
2732*                 M right singular vectors to be computed in VT and
2733*                 M left singular vectors to be computed in U
2734*
2735                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2736*
2737*                    Sufficient workspace for a fast algorithm
2738*
2739                     IU = 1
2740                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2741*
2742*                       WORK(IU) is LDA by N
2743*
2744                        LDWRKU = LDA
2745                     ELSE
2746*
2747*                       WORK(IU) is LDA by M
2748*
2749                        LDWRKU = M
2750                     END IF
2751                     ITAU = IU + LDWRKU*M
2752                     IWORK = ITAU + M
2753*
2754*                    Compute A=L*Q
2755*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2756*
2757                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2758     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2759*
2760*                    Copy L to WORK(IU), zeroing out above it
2761*
2762                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2763     $                            LDWRKU )
2764                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2765     $                            WORK( IU+LDWRKU ), LDWRKU )
2766*
2767*                    Generate Q in A
2768*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2769*
2770                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2771     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2772                     IE = ITAU
2773                     ITAUQ = IE + M
2774                     ITAUP = ITAUQ + M
2775                     IWORK = ITAUP + M
2776*
2777*                    Bidiagonalize L in WORK(IU), copying result to U
2778*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2779*
2780                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2781     $                            WORK( IE ), WORK( ITAUQ ),
2782     $                            WORK( ITAUP ), WORK( IWORK ),
2783     $                            LWORK-IWORK+1, IERR )
2784                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2785     $                            LDU )
2786*
2787*                    Generate right bidiagonalizing vectors in WORK(IU)
2788*                    (Workspace: need M*M + 4*M-1,
2789*                                prefer M*M+3*M+(M-1)*NB)
2790*
2791                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2792     $                            WORK( ITAUP ), WORK( IWORK ),
2793     $                            LWORK-IWORK+1, IERR )
2794*
2795*                    Generate left bidiagonalizing vectors in U
2796*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2797*
2798                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2799     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2800                     IWORK = IE + M
2801*
2802*                    Perform bidiagonal QR iteration, computing left
2803*                    singular vectors of L in U and computing right
2804*                    singular vectors of L in WORK(IU)
2805*                    (Workspace: need M*M + BDSPAC)
2806*
2807                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2808     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2809     $                            WORK( IWORK ), INFO )
2810*
2811*                    Multiply right singular vectors of L in WORK(IU) by
2812*                    Q in A, storing result in VT
2813*                    (Workspace: need M*M)
2814*
2815                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2816     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2817*
2818                  ELSE
2819*
2820*                    Insufficient workspace for a fast algorithm
2821*
2822                     ITAU = 1
2823                     IWORK = ITAU + M
2824*
2825*                    Compute A=L*Q, copying result to VT
2826*                    (Workspace: need 2*M, prefer M + M*NB)
2827*
2828                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2829     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2830                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2831*
2832*                    Generate Q in VT
2833*                    (Workspace: need 2*M, prefer M + M*NB)
2834*
2835                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2836     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2837*
2838*                    Copy L to U, zeroing out above it
2839*
2840                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2841                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2842     $                            LDU )
2843                     IE = ITAU
2844                     ITAUQ = IE + M
2845                     ITAUP = ITAUQ + M
2846                     IWORK = ITAUP + M
2847*
2848*                    Bidiagonalize L in U
2849*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2850*
2851                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2852     $                            WORK( ITAUQ ), WORK( ITAUP ),
2853     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2854*
2855*                    Multiply right bidiagonalizing vectors in U by Q
2856*                    in VT
2857*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
2858*
2859                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2860     $                            WORK( ITAUP ), VT, LDVT,
2861     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2862*
2863*                    Generate left bidiagonalizing vectors in U
2864*                    (Workspace: need 4*M, prefer 3*M + M*NB)
2865*
2866                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2867     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2868                     IWORK = IE + M
2869*
2870*                    Perform bidiagonal QR iteration, computing left
2871*                    singular vectors of A in U and computing right
2872*                    singular vectors of A in VT
2873*                    (Workspace: need BDSPAC)
2874*
2875                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2876     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2877     $                            INFO )
2878*
2879                  END IF
2880*
2881               END IF
2882*
2883            ELSE IF( WNTVA ) THEN
2884*
2885               IF( WNTUN ) THEN
2886*
2887*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2888*                 N right singular vectors to be computed in VT and
2889*                 no left singular vectors to be computed
2890*
2891                  IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
2892*
2893*                    Sufficient workspace for a fast algorithm
2894*
2895                     IR = 1
2896                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2897*
2898*                       WORK(IR) is LDA by M
2899*
2900                        LDWRKR = LDA
2901                     ELSE
2902*
2903*                       WORK(IR) is M by M
2904*
2905                        LDWRKR = M
2906                     END IF
2907                     ITAU = IR + LDWRKR*M
2908                     IWORK = ITAU + M
2909*
2910*                    Compute A=L*Q, copying result to VT
2911*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2912*
2913                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2914     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2915                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2916*
2917*                    Copy L to WORK(IR), zeroing out above it
2918*
2919                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2920     $                            LDWRKR )
2921                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2922     $                            WORK( IR+LDWRKR ), LDWRKR )
2923*
2924*                    Generate Q in VT
2925*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2926*
2927                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2928     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2929                     IE = ITAU
2930                     ITAUQ = IE + M
2931                     ITAUP = ITAUQ + M
2932                     IWORK = ITAUP + M
2933*
2934*                    Bidiagonalize L in WORK(IR)
2935*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2936*
2937                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2938     $                            WORK( IE ), WORK( ITAUQ ),
2939     $                            WORK( ITAUP ), WORK( IWORK ),
2940     $                            LWORK-IWORK+1, IERR )
2941*
2942*                    Generate right bidiagonalizing vectors in WORK(IR)
2943*                    (Workspace: need M*M + 4*M-1,
2944*                                prefer M*M+3*M+(M-1)*NB)
2945*
2946                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2947     $                            WORK( ITAUP ), WORK( IWORK ),
2948     $                            LWORK-IWORK+1, IERR )
2949                     IWORK = IE + M
2950*
2951*                    Perform bidiagonal QR iteration, computing right
2952*                    singular vectors of L in WORK(IR)
2953*                    (Workspace: need M*M + BDSPAC)
2954*
2955                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2956     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2957     $                            WORK( IWORK ), INFO )
2958*
2959*                    Multiply right singular vectors of L in WORK(IR) by
2960*                    Q in VT, storing result in A
2961*                    (Workspace: need M*M)
2962*
2963                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2964     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2965*
2966*                    Copy right singular vectors of A from A to VT
2967*
2968                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2969*
2970                  ELSE
2971*
2972*                    Insufficient workspace for a fast algorithm
2973*
2974                     ITAU = 1
2975                     IWORK = ITAU + M
2976*
2977*                    Compute A=L*Q, copying result to VT
2978*                    (Workspace: need 2*M, prefer M + M*NB)
2979*
2980                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2981     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2982                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2983*
2984*                    Generate Q in VT
2985*                    (Workspace: need M + N, prefer M + N*NB)
2986*
2987                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2988     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2989                     IE = ITAU
2990                     ITAUQ = IE + M
2991                     ITAUP = ITAUQ + M
2992                     IWORK = ITAUP + M
2993*
2994*                    Zero out above L in A
2995*
2996                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2997     $                            LDA )
2998*
2999*                    Bidiagonalize L in A
3000*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3001*
3002                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3003     $                            WORK( ITAUQ ), WORK( ITAUP ),
3004     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3005*
3006*                    Multiply right bidiagonalizing vectors in A by Q
3007*                    in VT
3008*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
3009*
3010                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3011     $                            WORK( ITAUP ), VT, LDVT,
3012     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3013                     IWORK = IE + M
3014*
3015*                    Perform bidiagonal QR iteration, computing right
3016*                    singular vectors of A in VT
3017*                    (Workspace: need BDSPAC)
3018*
3019                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
3020     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
3021     $                            INFO )
3022*
3023                  END IF
3024*
3025               ELSE IF( WNTUO ) THEN
3026*
3027*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3028*                 N right singular vectors to be computed in VT and
3029*                 M left singular vectors to be overwritten on A
3030*
3031                  IF( LWORK.GE.2*M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
3032*
3033*                    Sufficient workspace for a fast algorithm
3034*
3035                     IU = 1
3036                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3037*
3038*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
3039*
3040                        LDWRKU = LDA
3041                        IR = IU + LDWRKU*M
3042                        LDWRKR = LDA
3043                     ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
3044*
3045*                       WORK(IU) is LDA by M and WORK(IR) is M by M
3046*
3047                        LDWRKU = LDA
3048                        IR = IU + LDWRKU*M
3049                        LDWRKR = M
3050                     ELSE
3051*
3052*                       WORK(IU) is M by M and WORK(IR) is M by M
3053*
3054                        LDWRKU = M
3055                        IR = IU + LDWRKU*M
3056                        LDWRKR = M
3057                     END IF
3058                     ITAU = IR + LDWRKR*M
3059                     IWORK = ITAU + M
3060*
3061*                    Compute A=L*Q, copying result to VT
3062*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3063*
3064                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3065     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3066                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3067*
3068*                    Generate Q in VT
3069*                    (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3070*
3071                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3072     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3073*
3074*                    Copy L to WORK(IU), zeroing out above it
3075*
3076                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3077     $                            LDWRKU )
3078                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3079     $                            WORK( IU+LDWRKU ), LDWRKU )
3080                     IE = ITAU
3081                     ITAUQ = IE + M
3082                     ITAUP = ITAUQ + M
3083                     IWORK = ITAUP + M
3084*
3085*                    Bidiagonalize L in WORK(IU), copying result to
3086*                    WORK(IR)
3087*                    (Workspace: need 2*M*M + 4*M,
3088*                                prefer 2*M*M+3*M+2*M*NB)
3089*
3090                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3091     $                            WORK( IE ), WORK( ITAUQ ),
3092     $                            WORK( ITAUP ), WORK( IWORK ),
3093     $                            LWORK-IWORK+1, IERR )
3094                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3095     $                            WORK( IR ), LDWRKR )
3096*
3097*                    Generate right bidiagonalizing vectors in WORK(IU)
3098*                    (Workspace: need 2*M*M + 4*M-1,
3099*                                prefer 2*M*M+3*M+(M-1)*NB)
3100*
3101                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3102     $                            WORK( ITAUP ), WORK( IWORK ),
3103     $                            LWORK-IWORK+1, IERR )
3104*
3105*                    Generate left bidiagonalizing vectors in WORK(IR)
3106*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3107*
3108                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3109     $                            WORK( ITAUQ ), WORK( IWORK ),
3110     $                            LWORK-IWORK+1, IERR )
3111                     IWORK = IE + M
3112*
3113*                    Perform bidiagonal QR iteration, computing left
3114*                    singular vectors of L in WORK(IR) and computing
3115*                    right singular vectors of L in WORK(IU)
3116*                    (Workspace: need 2*M*M + BDSPAC)
3117*
3118                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3119     $                            WORK( IU ), LDWRKU, WORK( IR ),
3120     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3121*
3122*                    Multiply right singular vectors of L in WORK(IU) by
3123*                    Q in VT, storing result in A
3124*                    (Workspace: need M*M)
3125*
3126                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3127     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3128*
3129*                    Copy right singular vectors of A from A to VT
3130*
3131                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3132*
3133*                    Copy left singular vectors of A from WORK(IR) to A
3134*
3135                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3136     $                            LDA )
3137*
3138                  ELSE
3139*
3140*                    Insufficient workspace for a fast algorithm
3141*
3142                     ITAU = 1
3143                     IWORK = ITAU + M
3144*
3145*                    Compute A=L*Q, copying result to VT
3146*                    (Workspace: need 2*M, prefer M + M*NB)
3147*
3148                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3149     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3150                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3151*
3152*                    Generate Q in VT
3153*                    (Workspace: need M + N, prefer M + N*NB)
3154*
3155                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3156     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3157                     IE = ITAU
3158                     ITAUQ = IE + M
3159                     ITAUP = ITAUQ + M
3160                     IWORK = ITAUP + M
3161*
3162*                    Zero out above L in A
3163*
3164                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3165     $                            LDA )
3166*
3167*                    Bidiagonalize L in A
3168*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3169*
3170                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3171     $                            WORK( ITAUQ ), WORK( ITAUP ),
3172     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3173*
3174*                    Multiply right bidiagonalizing vectors in A by Q
3175*                    in VT
3176*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
3177*
3178                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3179     $                            WORK( ITAUP ), VT, LDVT,
3180     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3181*
3182*                    Generate left bidiagonalizing vectors in A
3183*                    (Workspace: need 4*M, prefer 3*M + M*NB)
3184*
3185                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3186     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3187                     IWORK = IE + M
3188*
3189*                    Perform bidiagonal QR iteration, computing left
3190*                    singular vectors of A in A and computing right
3191*                    singular vectors of A in VT
3192*                    (Workspace: need BDSPAC)
3193*
3194                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3195     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3196     $                            INFO )
3197*
3198                  END IF
3199*
3200               ELSE IF( WNTUAS ) THEN
3201*
3202*                 Path 9t(N much larger than M, JOBU='S' or 'A',
3203*                         JOBVT='A')
3204*                 N right singular vectors to be computed in VT and
3205*                 M left singular vectors to be computed in U
3206*
3207                  IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
3208*
3209*                    Sufficient workspace for a fast algorithm
3210*
3211                     IU = 1
3212                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
3213*
3214*                       WORK(IU) is LDA by M
3215*
3216                        LDWRKU = LDA
3217                     ELSE
3218*
3219*                       WORK(IU) is M by M
3220*
3221                        LDWRKU = M
3222                     END IF
3223                     ITAU = IU + LDWRKU*M
3224                     IWORK = ITAU + M
3225*
3226*                    Compute A=L*Q, copying result to VT
3227*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3228*
3229                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3230     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3231                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3232*
3233*                    Generate Q in VT
3234*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3235*
3236                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3237     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3238*
3239*                    Copy L to WORK(IU), zeroing out above it
3240*
3241                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3242     $                            LDWRKU )
3243                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3244     $                            WORK( IU+LDWRKU ), LDWRKU )
3245                     IE = ITAU
3246                     ITAUQ = IE + M
3247                     ITAUP = ITAUQ + M
3248                     IWORK = ITAUP + M
3249*
3250*                    Bidiagonalize L in WORK(IU), copying result to U
3251*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3252*
3253                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3254     $                            WORK( IE ), WORK( ITAUQ ),
3255     $                            WORK( ITAUP ), WORK( IWORK ),
3256     $                            LWORK-IWORK+1, IERR )
3257                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3258     $                            LDU )
3259*
3260*                    Generate right bidiagonalizing vectors in WORK(IU)
3261*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3262*
3263                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3264     $                            WORK( ITAUP ), WORK( IWORK ),
3265     $                            LWORK-IWORK+1, IERR )
3266*
3267*                    Generate left bidiagonalizing vectors in U
3268*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3269*
3270                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3271     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3272                     IWORK = IE + M
3273*
3274*                    Perform bidiagonal QR iteration, computing left
3275*                    singular vectors of L in U and computing right
3276*                    singular vectors of L in WORK(IU)
3277*                    (Workspace: need M*M + BDSPAC)
3278*
3279                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3280     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3281     $                            WORK( IWORK ), INFO )
3282*
3283*                    Multiply right singular vectors of L in WORK(IU) by
3284*                    Q in VT, storing result in A
3285*                    (Workspace: need M*M)
3286*
3287                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3288     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3289*
3290*                    Copy right singular vectors of A from A to VT
3291*
3292                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3293*
3294                  ELSE
3295*
3296*                    Insufficient workspace for a fast algorithm
3297*
3298                     ITAU = 1
3299                     IWORK = ITAU + M
3300*
3301*                    Compute A=L*Q, copying result to VT
3302*                    (Workspace: need 2*M, prefer M + M*NB)
3303*
3304                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3305     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3306                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3307*
3308*                    Generate Q in VT
3309*                    (Workspace: need M + N, prefer M + N*NB)
3310*
3311                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3312     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3313*
3314*                    Copy L to U, zeroing out above it
3315*
3316                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3317                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3318     $                            LDU )
3319                     IE = ITAU
3320                     ITAUQ = IE + M
3321                     ITAUP = ITAUQ + M
3322                     IWORK = ITAUP + M
3323*
3324*                    Bidiagonalize L in U
3325*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3326*
3327                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3328     $                            WORK( ITAUQ ), WORK( ITAUP ),
3329     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3330*
3331*                    Multiply right bidiagonalizing vectors in U by Q
3332*                    in VT
3333*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
3334*
3335                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3336     $                            WORK( ITAUP ), VT, LDVT,
3337     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3338*
3339*                    Generate left bidiagonalizing vectors in U
3340*                    (Workspace: need 4*M, prefer 3*M + M*NB)
3341*
3342                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3343     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3344                     IWORK = IE + M
3345*
3346*                    Perform bidiagonal QR iteration, computing left
3347*                    singular vectors of A in U and computing right
3348*                    singular vectors of A in VT
3349*                    (Workspace: need BDSPAC)
3350*
3351                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3352     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3353     $                            INFO )
3354*
3355                  END IF
3356*
3357               END IF
3358*
3359            END IF
3360*
3361         ELSE
3362*
3363*           N .LT. MNTHR
3364*
3365*           Path 10t(N greater than M, but not much larger)
3366*           Reduce to bidiagonal form without LQ decomposition
3367*
3368            IE = 1
3369            ITAUQ = IE + M
3370            ITAUP = ITAUQ + M
3371            IWORK = ITAUP + M
3372*
3373*           Bidiagonalize A
3374*           (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3375*
3376            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3377     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3378     $                   IERR )
3379            IF( WNTUAS ) THEN
3380*
3381*              If left singular vectors desired in U, copy result to U
3382*              and generate left bidiagonalizing vectors in U
3383*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3384*
3385               CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3386               CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3387     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3388            END IF
3389            IF( WNTVAS ) THEN
3390*
3391*              If right singular vectors desired in VT, copy result to
3392*              VT and generate right bidiagonalizing vectors in VT
3393*              (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3394*
3395               CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3396               IF( WNTVA )
3397     $            NRVT = N
3398               IF( WNTVS )
3399     $            NRVT = M
3400               CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3401     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3402            END IF
3403            IF( WNTUO ) THEN
3404*
3405*              If left singular vectors desired in A, generate left
3406*              bidiagonalizing vectors in A
3407*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3408*
3409               CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3410     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3411            END IF
3412            IF( WNTVO ) THEN
3413*
3414*              If right singular vectors desired in A, generate right
3415*              bidiagonalizing vectors in A
3416*              (Workspace: need 4*M, prefer 3*M + M*NB)
3417*
3418               CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3419     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3420            END IF
3421            IWORK = IE + M
3422            IF( WNTUAS .OR. WNTUO )
3423     $         NRU = M
3424            IF( WNTUN )
3425     $         NRU = 0
3426            IF( WNTVAS .OR. WNTVO )
3427     $         NCVT = N
3428            IF( WNTVN )
3429     $         NCVT = 0
3430            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3431*
3432*              Perform bidiagonal QR iteration, if desired, computing
3433*              left singular vectors in U and computing right singular
3434*              vectors in VT
3435*              (Workspace: need BDSPAC)
3436*
3437               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3438     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3439            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3440*
3441*              Perform bidiagonal QR iteration, if desired, computing
3442*              left singular vectors in U and computing right singular
3443*              vectors in A
3444*              (Workspace: need BDSPAC)
3445*
3446               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3447     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3448            ELSE
3449*
3450*              Perform bidiagonal QR iteration, if desired, computing
3451*              left singular vectors in A and computing right singular
3452*              vectors in VT
3453*              (Workspace: need BDSPAC)
3454*
3455               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3456     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3457            END IF
3458*
3459         END IF
3460*
3461      END IF
3462*
3463*     If DBDSQR failed to converge, copy unconverged superdiagonals
3464*     to WORK( 2:MINMN )
3465*
3466      IF( INFO.NE.0 ) THEN
3467         IF( IE.GT.2 ) THEN
3468            DO 50 I = 1, MINMN - 1
3469               WORK( I+1 ) = WORK( I+IE-1 )
3470   50       CONTINUE
3471         END IF
3472         IF( IE.LT.2 ) THEN
3473            DO 60 I = MINMN - 1, 1, -1
3474               WORK( I+1 ) = WORK( I+IE-1 )
3475   60       CONTINUE
3476         END IF
3477      END IF
3478*
3479*     Undo scaling if necessary
3480*
3481      IF( ISCL.EQ.1 ) THEN
3482         IF( ANRM.GT.BIGNUM )
3483     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3484     $                   IERR )
3485         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3486     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3487     $                   MINMN, IERR )
3488         IF( ANRM.LT.SMLNUM )
3489     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3490     $                   IERR )
3491         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3492     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3493     $                   MINMN, IERR )
3494      END IF
3495*
3496*     Return optimal workspace in WORK(1)
3497*
3498      WORK( 1 ) = MAXWRK
3499*
3500      RETURN
3501*
3502*     End of DGESVD
3503*
3504      END
3505