1*> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DGESVD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22*                          WORK, LWORK, INFO )
23*
24*       .. Scalar Arguments ..
25*       CHARACTER          JOBU, JOBVT
26*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
27*       ..
28*       .. Array Arguments ..
29*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
30*      $                   VT( LDVT, * ), WORK( * )
31*       ..
32*
33*
34*> \par Purpose:
35*  =============
36*>
37*> \verbatim
38*>
39*> DGESVD computes the singular value decomposition (SVD) of a real
40*> M-by-N matrix A, optionally computing the left and/or right singular
41*> vectors. The SVD is written
42*>
43*>      A = U * SIGMA * transpose(V)
44*>
45*> where SIGMA is an M-by-N matrix which is zero except for its
46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47*> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
48*> are the singular values of A; they are real and non-negative, and
49*> are returned in descending order.  The first min(m,n) columns of
50*> U and V are the left and right singular vectors of A.
51*>
52*> Note that the routine returns V**T, not V.
53*> \endverbatim
54*
55*  Arguments:
56*  ==========
57*
58*> \param[in] JOBU
59*> \verbatim
60*>          JOBU is CHARACTER*1
61*>          Specifies options for computing all or part of the matrix U:
62*>          = 'A':  all M columns of U are returned in array U:
63*>          = 'S':  the first min(m,n) columns of U (the left singular
64*>                  vectors) are returned in the array U;
65*>          = 'O':  the first min(m,n) columns of U (the left singular
66*>                  vectors) are overwritten on the array A;
67*>          = 'N':  no columns of U (no left singular vectors) are
68*>                  computed.
69*> \endverbatim
70*>
71*> \param[in] JOBVT
72*> \verbatim
73*>          JOBVT is CHARACTER*1
74*>          Specifies options for computing all or part of the matrix
75*>          V**T:
76*>          = 'A':  all N rows of V**T are returned in the array VT;
77*>          = 'S':  the first min(m,n) rows of V**T (the right singular
78*>                  vectors) are returned in the array VT;
79*>          = 'O':  the first min(m,n) rows of V**T (the right singular
80*>                  vectors) are overwritten on the array A;
81*>          = 'N':  no rows of V**T (no right singular vectors) are
82*>                  computed.
83*>
84*>          JOBVT and JOBU cannot both be 'O'.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*>          M is INTEGER
90*>          The number of rows of the input matrix A.  M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*>          N is INTEGER
96*>          The number of columns of the input matrix A.  N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*>          A is DOUBLE PRECISION array, dimension (LDA,N)
102*>          On entry, the M-by-N matrix A.
103*>          On exit,
104*>          if JOBU = 'O',  A is overwritten with the first min(m,n)
105*>                          columns of U (the left singular vectors,
106*>                          stored columnwise);
107*>          if JOBVT = 'O', A is overwritten with the first min(m,n)
108*>                          rows of V**T (the right singular vectors,
109*>                          stored rowwise);
110*>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111*>                          are destroyed.
112*> \endverbatim
113*>
114*> \param[in] LDA
115*> \verbatim
116*>          LDA is INTEGER
117*>          The leading dimension of the array A.  LDA >= max(1,M).
118*> \endverbatim
119*>
120*> \param[out] S
121*> \verbatim
122*>          S is DOUBLE PRECISION array, dimension (min(M,N))
123*>          The singular values of A, sorted so that S(i) >= S(i+1).
124*> \endverbatim
125*>
126*> \param[out] U
127*> \verbatim
128*>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129*>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130*>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131*>          if JOBU = 'S', U contains the first min(m,n) columns of U
132*>          (the left singular vectors, stored columnwise);
133*>          if JOBU = 'N' or 'O', U is not referenced.
134*> \endverbatim
135*>
136*> \param[in] LDU
137*> \verbatim
138*>          LDU is INTEGER
139*>          The leading dimension of the array U.  LDU >= 1; if
140*>          JOBU = 'S' or 'A', LDU >= M.
141*> \endverbatim
142*>
143*> \param[out] VT
144*> \verbatim
145*>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
146*>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147*>          V**T;
148*>          if JOBVT = 'S', VT contains the first min(m,n) rows of
149*>          V**T (the right singular vectors, stored rowwise);
150*>          if JOBVT = 'N' or 'O', VT is not referenced.
151*> \endverbatim
152*>
153*> \param[in] LDVT
154*> \verbatim
155*>          LDVT is INTEGER
156*>          The leading dimension of the array VT.  LDVT >= 1; if
157*>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158*> \endverbatim
159*>
160*> \param[out] WORK
161*> \verbatim
162*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164*>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165*>          superdiagonal elements of an upper bidiagonal matrix B
166*>          whose diagonal is in S (not necessarily sorted). B
167*>          satisfies A = U * B * VT, so it has the same singular values
168*>          as A, and singular vectors related by U and VT.
169*> \endverbatim
170*>
171*> \param[in] LWORK
172*> \verbatim
173*>          LWORK is INTEGER
174*>          The dimension of the array WORK.
175*>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176*>             - PATH 1  (M much larger than N, JOBU='N')
177*>             - PATH 1t (N much larger than M, JOBVT='N')
178*>          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179*>          For good performance, LWORK should generally be larger.
180*>
181*>          If LWORK = -1, then a workspace query is assumed; the routine
182*>          only calculates the optimal size of the WORK array, returns
183*>          this value as the first entry of the WORK array, and no error
184*>          message related to LWORK is issued by XERBLA.
185*> \endverbatim
186*>
187*> \param[out] INFO
188*> \verbatim
189*>          INFO is INTEGER
190*>          = 0:  successful exit.
191*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
192*>          > 0:  if DBDSQR did not converge, INFO specifies how many
193*>                superdiagonals of an intermediate bidiagonal form B
194*>                did not converge to zero. See the description of WORK
195*>                above for details.
196*> \endverbatim
197*
198*  Authors:
199*  ========
200*
201*> \author Univ. of Tennessee
202*> \author Univ. of California Berkeley
203*> \author Univ. of Colorado Denver
204*> \author NAG Ltd.
205*
206*> \date April 2012
207*
208*> \ingroup doubleGEsing
209*
210*  =====================================================================
211      SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
212     $                   VT, LDVT, WORK, LWORK, INFO )
213*
214*  -- LAPACK driver routine (version 3.4.1) --
215*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
216*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217*     April 2012
218*
219*     .. Scalar Arguments ..
220      CHARACTER          JOBU, JOBVT
221      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
222*     ..
223*     .. Array Arguments ..
224      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
225     $                   VT( LDVT, * ), WORK( * )
226*     ..
227*
228*  =====================================================================
229*
230*     .. Parameters ..
231      DOUBLE PRECISION   ZERO, ONE
232      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
233*     ..
234*     .. Local Scalars ..
235      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
236     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
237      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
238     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
239     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
240     $                   NRVT, WRKBL
241      INTEGER            LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
242     $                   LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
243     $                   LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
244      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
245*     ..
246*     .. Local Arrays ..
247      DOUBLE PRECISION   DUM( 1 )
248*     ..
249*     .. External Subroutines ..
250      EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
251     $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
252     $                   XERBLA
253*     ..
254*     .. External Functions ..
255      LOGICAL            LSAME
256      INTEGER            ILAENV
257      DOUBLE PRECISION   DLAMCH, DLANGE
258      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
259*     ..
260*     .. Intrinsic Functions ..
261      INTRINSIC          MAX, MIN, SQRT
262*     ..
263*     .. Executable Statements ..
264*
265*     Test the input arguments
266*
267      INFO = 0
268      MINMN = MIN( M, N )
269      WNTUA = LSAME( JOBU, 'A' )
270      WNTUS = LSAME( JOBU, 'S' )
271      WNTUAS = WNTUA .OR. WNTUS
272      WNTUO = LSAME( JOBU, 'O' )
273      WNTUN = LSAME( JOBU, 'N' )
274      WNTVA = LSAME( JOBVT, 'A' )
275      WNTVS = LSAME( JOBVT, 'S' )
276      WNTVAS = WNTVA .OR. WNTVS
277      WNTVO = LSAME( JOBVT, 'O' )
278      WNTVN = LSAME( JOBVT, 'N' )
279      LQUERY = ( LWORK.EQ.-1 )
280*
281      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
282         INFO = -1
283      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
284     $         ( WNTVO .AND. WNTUO ) ) THEN
285         INFO = -2
286      ELSE IF( M.LT.0 ) THEN
287         INFO = -3
288      ELSE IF( N.LT.0 ) THEN
289         INFO = -4
290      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
291         INFO = -6
292      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
293         INFO = -9
294      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
295     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
296         INFO = -11
297      END IF
298*
299*     Compute workspace
300*      (Note: Comments in the code beginning "Workspace:" describe the
301*       minimal amount of workspace needed at that point in the code,
302*       as well as the preferred amount for good performance.
303*       NB refers to the optimal block size for the immediately
304*       following subroutine, as returned by ILAENV.)
305*
306      IF( INFO.EQ.0 ) THEN
307         MINWRK = 1
308         MAXWRK = 1
309         IF( M.GE.N .AND. MINMN.GT.0 ) THEN
310*
311*           Compute space needed for DBDSQR
312*
313            MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
314            BDSPAC = 5*N
315*           Compute space needed for DGEQRF
316            CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
317            LWORK_DGEQRF=DUM(1)
318*           Compute space needed for DORGQR
319            CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
320            LWORK_DORGQR_N=DUM(1)
321            CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
322            LWORK_DORGQR_M=DUM(1)
323*           Compute space needed for DGEBRD
324            CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
325     $                   DUM(1), DUM(1), -1, IERR )
326            LWORK_DGEBRD=DUM(1)
327*           Compute space needed for DORGBR P
328            CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
329     $                   DUM(1), -1, IERR )
330            LWORK_DORGBR_P=DUM(1)
331*           Compute space needed for DORGBR Q
332            CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1),
333     $                   DUM(1), -1, IERR )
334            LWORK_DORGBR_Q=DUM(1)
335*
336            IF( M.GE.MNTHR ) THEN
337               IF( WNTUN ) THEN
338*
339*                 Path 1 (M much larger than N, JOBU='N')
340*
341                  MAXWRK = N + LWORK_DGEQRF
342                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_DGEBRD )
343                  IF( WNTVO .OR. WNTVAS )
344     $               MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P )
345                  MAXWRK = MAX( MAXWRK, BDSPAC )
346                  MINWRK = MAX( 4*N, BDSPAC )
347               ELSE IF( WNTUO .AND. WNTVN ) THEN
348*
349*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350*
351                  WRKBL = N + LWORK_DGEQRF
352                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
353                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
354                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
355                  WRKBL = MAX( WRKBL, BDSPAC )
356                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
357                  MINWRK = MAX( 3*N+M, BDSPAC )
358               ELSE IF( WNTUO .AND. WNTVAS ) THEN
359*
360*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361*                 'A')
362*
363                  WRKBL = N + LWORK_DGEQRF
364                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
365                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
366                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
367                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
368                  WRKBL = MAX( WRKBL, BDSPAC )
369                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
370                  MINWRK = MAX( 3*N+M, BDSPAC )
371               ELSE IF( WNTUS .AND. WNTVN ) THEN
372*
373*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374*
375                  WRKBL = N + LWORK_DGEQRF
376                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
377                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
378                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
379                  WRKBL = MAX( WRKBL, BDSPAC )
380                  MAXWRK = N*N + WRKBL
381                  MINWRK = MAX( 3*N+M, BDSPAC )
382               ELSE IF( WNTUS .AND. WNTVO ) THEN
383*
384*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385*
386                  WRKBL = N + LWORK_DGEQRF
387                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
388                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
389                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
390                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
391                  WRKBL = MAX( WRKBL, BDSPAC )
392                  MAXWRK = 2*N*N + WRKBL
393                  MINWRK = MAX( 3*N+M, BDSPAC )
394               ELSE IF( WNTUS .AND. WNTVAS ) THEN
395*
396*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397*                 'A')
398*
399                  WRKBL = N + LWORK_DGEQRF
400                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N )
401                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
402                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
403                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
404                  WRKBL = MAX( WRKBL, BDSPAC )
405                  MAXWRK = N*N + WRKBL
406                  MINWRK = MAX( 3*N+M, BDSPAC )
407               ELSE IF( WNTUA .AND. WNTVN ) THEN
408*
409*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410*
411                  WRKBL = N + LWORK_DGEQRF
412                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
413                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
414                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
415                  WRKBL = MAX( WRKBL, BDSPAC )
416                  MAXWRK = N*N + WRKBL
417                  MINWRK = MAX( 3*N+M, BDSPAC )
418               ELSE IF( WNTUA .AND. WNTVO ) THEN
419*
420*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421*
422                  WRKBL = N + LWORK_DGEQRF
423                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
424                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
425                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
426                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
427                  WRKBL = MAX( WRKBL, BDSPAC )
428                  MAXWRK = 2*N*N + WRKBL
429                  MINWRK = MAX( 3*N+M, BDSPAC )
430               ELSE IF( WNTUA .AND. WNTVAS ) THEN
431*
432*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433*                 'A')
434*
435                  WRKBL = N + LWORK_DGEQRF
436                  WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M )
437                  WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD )
438                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q )
439                  WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P )
440                  WRKBL = MAX( WRKBL, BDSPAC )
441                  MAXWRK = N*N + WRKBL
442                  MINWRK = MAX( 3*N+M, BDSPAC )
443               END IF
444            ELSE
445*
446*              Path 10 (M at least N, but not much larger)
447*
448               CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
449     $                   DUM(1), DUM(1), -1, IERR )
450               LWORK_DGEBRD=DUM(1)
451               MAXWRK = 3*N + LWORK_DGEBRD
452               IF( WNTUS .OR. WNTUO ) THEN
453                  CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1),
454     $                   DUM(1), -1, IERR )
455                  LWORK_DORGBR_Q=DUM(1)
456                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q )
457               END IF
458               IF( WNTUA ) THEN
459                  CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1),
460     $                   DUM(1), -1, IERR )
461                  LWORK_DORGBR_Q=DUM(1)
462                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q )
463               END IF
464               IF( .NOT.WNTVN ) THEN
465                 MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P )
466               END IF
467               MAXWRK = MAX( MAXWRK, BDSPAC )
468               MINWRK = MAX( 3*N+M, BDSPAC )
469            END IF
470         ELSE IF( MINMN.GT.0 ) THEN
471*
472*           Compute space needed for DBDSQR
473*
474            MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
475            BDSPAC = 5*M
476*           Compute space needed for DGELQF
477            CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
478            LWORK_DGELQF=DUM(1)
479*           Compute space needed for DORGLQ
480            CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
481            LWORK_DORGLQ_N=DUM(1)
482            CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
483            LWORK_DORGLQ_M=DUM(1)
484*           Compute space needed for DGEBRD
485            CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
486     $                   DUM(1), DUM(1), -1, IERR )
487            LWORK_DGEBRD=DUM(1)
488*            Compute space needed for DORGBR P
489            CALL DORGBR( 'P', M, M, M, A, N, DUM(1),
490     $                   DUM(1), -1, IERR )
491            LWORK_DORGBR_P=DUM(1)
492*           Compute space needed for DORGBR Q
493            CALL DORGBR( 'Q', M, M, M, A, N, DUM(1),
494     $                   DUM(1), -1, IERR )
495            LWORK_DORGBR_Q=DUM(1)
496            IF( N.GE.MNTHR ) THEN
497               IF( WNTVN ) THEN
498*
499*                 Path 1t(N much larger than M, JOBVT='N')
500*
501                  MAXWRK = M + LWORK_DGELQF
502                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_DGEBRD )
503                  IF( WNTUO .OR. WNTUAS )
504     $               MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q )
505                  MAXWRK = MAX( MAXWRK, BDSPAC )
506                  MINWRK = MAX( 4*M, BDSPAC )
507               ELSE IF( WNTVO .AND. WNTUN ) THEN
508*
509*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510*
511                  WRKBL = M + LWORK_DGELQF
512                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
513                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
514                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
515                  WRKBL = MAX( WRKBL, BDSPAC )
516                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
517                  MINWRK = MAX( 3*M+N, BDSPAC )
518               ELSE IF( WNTVO .AND. WNTUAS ) THEN
519*
520*                 Path 3t(N much larger than M, JOBU='S' or 'A',
521*                 JOBVT='O')
522*
523                  WRKBL = M + LWORK_DGELQF
524                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
525                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
526                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
527                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
528                  WRKBL = MAX( WRKBL, BDSPAC )
529                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
530                  MINWRK = MAX( 3*M+N, BDSPAC )
531               ELSE IF( WNTVS .AND. WNTUN ) THEN
532*
533*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534*
535                  WRKBL = M + LWORK_DGELQF
536                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
537                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
538                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
539                  WRKBL = MAX( WRKBL, BDSPAC )
540                  MAXWRK = M*M + WRKBL
541                  MINWRK = MAX( 3*M+N, BDSPAC )
542               ELSE IF( WNTVS .AND. WNTUO ) THEN
543*
544*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545*
546                  WRKBL = M + LWORK_DGELQF
547                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
548                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
549                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
550                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
551                  WRKBL = MAX( WRKBL, BDSPAC )
552                  MAXWRK = 2*M*M + WRKBL
553                  MINWRK = MAX( 3*M+N, BDSPAC )
554               ELSE IF( WNTVS .AND. WNTUAS ) THEN
555*
556*                 Path 6t(N much larger than M, JOBU='S' or 'A',
557*                 JOBVT='S')
558*
559                  WRKBL = M + LWORK_DGELQF
560                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M )
561                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
562                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
563                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
564                  WRKBL = MAX( WRKBL, BDSPAC )
565                  MAXWRK = M*M + WRKBL
566                  MINWRK = MAX( 3*M+N, BDSPAC )
567               ELSE IF( WNTVA .AND. WNTUN ) THEN
568*
569*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
570*
571                  WRKBL = M + LWORK_DGELQF
572                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
573                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
574                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
575                  WRKBL = MAX( WRKBL, BDSPAC )
576                  MAXWRK = M*M + WRKBL
577                  MINWRK = MAX( 3*M+N, BDSPAC )
578               ELSE IF( WNTVA .AND. WNTUO ) THEN
579*
580*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
581*
582                  WRKBL = M + LWORK_DGELQF
583                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
584                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
585                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
586                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
587                  WRKBL = MAX( WRKBL, BDSPAC )
588                  MAXWRK = 2*M*M + WRKBL
589                  MINWRK = MAX( 3*M+N, BDSPAC )
590               ELSE IF( WNTVA .AND. WNTUAS ) THEN
591*
592*                 Path 9t(N much larger than M, JOBU='S' or 'A',
593*                 JOBVT='A')
594*
595                  WRKBL = M + LWORK_DGELQF
596                  WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N )
597                  WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD )
598                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P )
599                  WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q )
600                  WRKBL = MAX( WRKBL, BDSPAC )
601                  MAXWRK = M*M + WRKBL
602                  MINWRK = MAX( 3*M+N, BDSPAC )
603               END IF
604            ELSE
605*
606*              Path 10t(N greater than M, but not much larger)
607*
608               CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
609     $                   DUM(1), DUM(1), -1, IERR )
610               LWORK_DGEBRD=DUM(1)
611               MAXWRK = 3*M + LWORK_DGEBRD
612               IF( WNTVS .OR. WNTVO ) THEN
613*                Compute space needed for DORGBR P
614                 CALL DORGBR( 'P', M, N, M, A, N, DUM(1),
615     $                   DUM(1), -1, IERR )
616                 LWORK_DORGBR_P=DUM(1)
617                 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P )
618               END IF
619               IF( WNTVA ) THEN
620                 CALL DORGBR( 'P', N, N, M, A, N, DUM(1),
621     $                   DUM(1), -1, IERR )
622                 LWORK_DORGBR_P=DUM(1)
623                 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P )
624               END IF
625               IF( .NOT.WNTUN ) THEN
626                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q )
627               END IF
628               MAXWRK = MAX( MAXWRK, BDSPAC )
629               MINWRK = MAX( 3*M+N, BDSPAC )
630            END IF
631         END IF
632         MAXWRK = MAX( MAXWRK, MINWRK )
633         WORK( 1 ) = MAXWRK
634*
635         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
636            INFO = -13
637         END IF
638      END IF
639*
640      IF( INFO.NE.0 ) THEN
641         CALL XERBLA( 'DGESVD', -INFO )
642         RETURN
643      ELSE IF( LQUERY ) THEN
644         RETURN
645      END IF
646*
647*     Quick return if possible
648*
649      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
650         RETURN
651      END IF
652*
653*     Get machine constants
654*
655      EPS = DLAMCH( 'P' )
656      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
657      BIGNUM = ONE / SMLNUM
658*
659*     Scale A if max element outside range [SMLNUM,BIGNUM]
660*
661      ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
662      ISCL = 0
663      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
664         ISCL = 1
665         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
666      ELSE IF( ANRM.GT.BIGNUM ) THEN
667         ISCL = 1
668         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
669      END IF
670*
671      IF( M.GE.N ) THEN
672*
673*        A has at least as many rows as columns. If A has sufficiently
674*        more rows than columns, first reduce using the QR
675*        decomposition (if sufficient workspace available)
676*
677         IF( M.GE.MNTHR ) THEN
678*
679            IF( WNTUN ) THEN
680*
681*              Path 1 (M much larger than N, JOBU='N')
682*              No left singular vectors to be computed
683*
684               ITAU = 1
685               IWORK = ITAU + N
686*
687*              Compute A=Q*R
688*              (Workspace: need 2*N, prefer N+N*NB)
689*
690               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
691     $                      LWORK-IWORK+1, IERR )
692*
693*              Zero out below R
694*
695               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
696               IE = 1
697               ITAUQ = IE + N
698               ITAUP = ITAUQ + N
699               IWORK = ITAUP + N
700*
701*              Bidiagonalize R in A
702*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
703*
704               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
705     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
706     $                      IERR )
707               NCVT = 0
708               IF( WNTVO .OR. WNTVAS ) THEN
709*
710*                 If right singular vectors desired, generate P'.
711*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
712*
713                  CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
714     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
715                  NCVT = N
716               END IF
717               IWORK = IE + N
718*
719*              Perform bidiagonal QR iteration, computing right
720*              singular vectors of A in A if desired
721*              (Workspace: need BDSPAC)
722*
723               CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
724     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
725*
726*              If right singular vectors desired in VT, copy them there
727*
728               IF( WNTVAS )
729     $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
730*
731            ELSE IF( WNTUO .AND. WNTVN ) THEN
732*
733*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734*              N left singular vectors to be overwritten on A and
735*              no right singular vectors to be computed
736*
737               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
738*
739*                 Sufficient workspace for a fast algorithm
740*
741                  IR = 1
742                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
743*
744*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
745*
746                     LDWRKU = LDA
747                     LDWRKR = LDA
748                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
749*
750*                    WORK(IU) is LDA by N, WORK(IR) is N by N
751*
752                     LDWRKU = LDA
753                     LDWRKR = N
754                  ELSE
755*
756*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757*
758                     LDWRKU = ( LWORK-N*N-N ) / N
759                     LDWRKR = N
760                  END IF
761                  ITAU = IR + LDWRKR*N
762                  IWORK = ITAU + N
763*
764*                 Compute A=Q*R
765*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
766*
767                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
768     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
769*
770*                 Copy R to WORK(IR) and zero out below it
771*
772                  CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
773                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
774     $                         LDWRKR )
775*
776*                 Generate Q in A
777*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
778*
779                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
780     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
781                  IE = ITAU
782                  ITAUQ = IE + N
783                  ITAUP = ITAUQ + N
784                  IWORK = ITAUP + N
785*
786*                 Bidiagonalize R in WORK(IR)
787*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
788*
789                  CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
790     $                         WORK( ITAUQ ), WORK( ITAUP ),
791     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
792*
793*                 Generate left vectors bidiagonalizing R
794*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
795*
796                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
797     $                         WORK( ITAUQ ), WORK( IWORK ),
798     $                         LWORK-IWORK+1, IERR )
799                  IWORK = IE + N
800*
801*                 Perform bidiagonal QR iteration, computing left
802*                 singular vectors of R in WORK(IR)
803*                 (Workspace: need N*N+BDSPAC)
804*
805                  CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
806     $                         WORK( IR ), LDWRKR, DUM, 1,
807     $                         WORK( IWORK ), INFO )
808                  IU = IE + N
809*
810*                 Multiply Q in A by left singular vectors of R in
811*                 WORK(IR), storing result in WORK(IU) and copying to A
812*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
813*
814                  DO 10 I = 1, M, LDWRKU
815                     CHUNK = MIN( M-I+1, LDWRKU )
816                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
817     $                           LDA, WORK( IR ), LDWRKR, ZERO,
818     $                           WORK( IU ), LDWRKU )
819                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
820     $                            A( I, 1 ), LDA )
821   10             CONTINUE
822*
823               ELSE
824*
825*                 Insufficient workspace for a fast algorithm
826*
827                  IE = 1
828                  ITAUQ = IE + N
829                  ITAUP = ITAUQ + N
830                  IWORK = ITAUP + N
831*
832*                 Bidiagonalize A
833*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
834*
835                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
836     $                         WORK( ITAUQ ), WORK( ITAUP ),
837     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
838*
839*                 Generate left vectors bidiagonalizing A
840*                 (Workspace: need 4*N, prefer 3*N+N*NB)
841*
842                  CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
843     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
844                  IWORK = IE + N
845*
846*                 Perform bidiagonal QR iteration, computing left
847*                 singular vectors of A in A
848*                 (Workspace: need BDSPAC)
849*
850                  CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
851     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
852*
853               END IF
854*
855            ELSE IF( WNTUO .AND. WNTVAS ) THEN
856*
857*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858*              N left singular vectors to be overwritten on A and
859*              N right singular vectors to be computed in VT
860*
861               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
862*
863*                 Sufficient workspace for a fast algorithm
864*
865                  IR = 1
866                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
867*
868*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
869*
870                     LDWRKU = LDA
871                     LDWRKR = LDA
872                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
873*
874*                    WORK(IU) is LDA by N and WORK(IR) is N by N
875*
876                     LDWRKU = LDA
877                     LDWRKR = N
878                  ELSE
879*
880*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881*
882                     LDWRKU = ( LWORK-N*N-N ) / N
883                     LDWRKR = N
884                  END IF
885                  ITAU = IR + LDWRKR*N
886                  IWORK = ITAU + N
887*
888*                 Compute A=Q*R
889*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
890*
891                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
892     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
893*
894*                 Copy R to VT, zeroing out below it
895*
896                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
897                  IF( N.GT.1 )
898     $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
899     $                            VT( 2, 1 ), LDVT )
900*
901*                 Generate Q in A
902*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
903*
904                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
905     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
906                  IE = ITAU
907                  ITAUQ = IE + N
908                  ITAUP = ITAUQ + N
909                  IWORK = ITAUP + N
910*
911*                 Bidiagonalize R in VT, copying result to WORK(IR)
912*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
913*
914                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
915     $                         WORK( ITAUQ ), WORK( ITAUP ),
916     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
917                  CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
918*
919*                 Generate left vectors bidiagonalizing R in WORK(IR)
920*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
921*
922                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
923     $                         WORK( ITAUQ ), WORK( IWORK ),
924     $                         LWORK-IWORK+1, IERR )
925*
926*                 Generate right vectors bidiagonalizing R in VT
927*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
928*
929                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
930     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
931                  IWORK = IE + N
932*
933*                 Perform bidiagonal QR iteration, computing left
934*                 singular vectors of R in WORK(IR) and computing right
935*                 singular vectors of R in VT
936*                 (Workspace: need N*N+BDSPAC)
937*
938                  CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
939     $                         WORK( IR ), LDWRKR, DUM, 1,
940     $                         WORK( IWORK ), INFO )
941                  IU = IE + N
942*
943*                 Multiply Q in A by left singular vectors of R in
944*                 WORK(IR), storing result in WORK(IU) and copying to A
945*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
946*
947                  DO 20 I = 1, M, LDWRKU
948                     CHUNK = MIN( M-I+1, LDWRKU )
949                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
950     $                           LDA, WORK( IR ), LDWRKR, ZERO,
951     $                           WORK( IU ), LDWRKU )
952                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
953     $                            A( I, 1 ), LDA )
954   20             CONTINUE
955*
956               ELSE
957*
958*                 Insufficient workspace for a fast algorithm
959*
960                  ITAU = 1
961                  IWORK = ITAU + N
962*
963*                 Compute A=Q*R
964*                 (Workspace: need 2*N, prefer N+N*NB)
965*
966                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
967     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
968*
969*                 Copy R to VT, zeroing out below it
970*
971                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
972                  IF( N.GT.1 )
973     $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
974     $                            VT( 2, 1 ), LDVT )
975*
976*                 Generate Q in A
977*                 (Workspace: need 2*N, prefer N+N*NB)
978*
979                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
980     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
981                  IE = ITAU
982                  ITAUQ = IE + N
983                  ITAUP = ITAUQ + N
984                  IWORK = ITAUP + N
985*
986*                 Bidiagonalize R in VT
987*                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
988*
989                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
990     $                         WORK( ITAUQ ), WORK( ITAUP ),
991     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
992*
993*                 Multiply Q in A by left vectors bidiagonalizing R
994*                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
995*
996                  CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
997     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
998     $                         LWORK-IWORK+1, IERR )
999*
1000*                 Generate right vectors bidiagonalizing R in VT
1001*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1002*
1003                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1004     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
1005                  IWORK = IE + N
1006*
1007*                 Perform bidiagonal QR iteration, computing left
1008*                 singular vectors of A in A and computing right
1009*                 singular vectors of A in VT
1010*                 (Workspace: need BDSPAC)
1011*
1012                  CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
1013     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
1014*
1015               END IF
1016*
1017            ELSE IF( WNTUS ) THEN
1018*
1019               IF( WNTVN ) THEN
1020*
1021*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022*                 N left singular vectors to be computed in U and
1023*                 no right singular vectors to be computed
1024*
1025                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1026*
1027*                    Sufficient workspace for a fast algorithm
1028*
1029                     IR = 1
1030                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1031*
1032*                       WORK(IR) is LDA by N
1033*
1034                        LDWRKR = LDA
1035                     ELSE
1036*
1037*                       WORK(IR) is N by N
1038*
1039                        LDWRKR = N
1040                     END IF
1041                     ITAU = IR + LDWRKR*N
1042                     IWORK = ITAU + N
1043*
1044*                    Compute A=Q*R
1045*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1046*
1047                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1048     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1049*
1050*                    Copy R to WORK(IR), zeroing out below it
1051*
1052                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1053     $                            LDWRKR )
1054                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1055     $                            WORK( IR+1 ), LDWRKR )
1056*
1057*                    Generate Q in A
1058*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1059*
1060                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1061     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1062                     IE = ITAU
1063                     ITAUQ = IE + N
1064                     ITAUP = ITAUQ + N
1065                     IWORK = ITAUP + N
1066*
1067*                    Bidiagonalize R in WORK(IR)
1068*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1069*
1070                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1071     $                            WORK( IE ), WORK( ITAUQ ),
1072     $                            WORK( ITAUP ), WORK( IWORK ),
1073     $                            LWORK-IWORK+1, IERR )
1074*
1075*                    Generate left vectors bidiagonalizing R in WORK(IR)
1076*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1077*
1078                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1079     $                            WORK( ITAUQ ), WORK( IWORK ),
1080     $                            LWORK-IWORK+1, IERR )
1081                     IWORK = IE + N
1082*
1083*                    Perform bidiagonal QR iteration, computing left
1084*                    singular vectors of R in WORK(IR)
1085*                    (Workspace: need N*N+BDSPAC)
1086*
1087                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1088     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1089     $                            WORK( IWORK ), INFO )
1090*
1091*                    Multiply Q in A by left singular vectors of R in
1092*                    WORK(IR), storing result in U
1093*                    (Workspace: need N*N)
1094*
1095                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1096     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1097*
1098                  ELSE
1099*
1100*                    Insufficient workspace for a fast algorithm
1101*
1102                     ITAU = 1
1103                     IWORK = ITAU + N
1104*
1105*                    Compute A=Q*R, copying result to U
1106*                    (Workspace: need 2*N, prefer N+N*NB)
1107*
1108                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1109     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1110                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1111*
1112*                    Generate Q in U
1113*                    (Workspace: need 2*N, prefer N+N*NB)
1114*
1115                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1116     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1117                     IE = ITAU
1118                     ITAUQ = IE + N
1119                     ITAUP = ITAUQ + N
1120                     IWORK = ITAUP + N
1121*
1122*                    Zero out below R in A
1123*
1124                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1125     $                            LDA )
1126*
1127*                    Bidiagonalize R in A
1128*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1129*
1130                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1131     $                            WORK( ITAUQ ), WORK( ITAUP ),
1132     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1133*
1134*                    Multiply Q in U by left vectors bidiagonalizing R
1135*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1136*
1137                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1138     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1139     $                            LWORK-IWORK+1, IERR )
1140                     IWORK = IE + N
1141*
1142*                    Perform bidiagonal QR iteration, computing left
1143*                    singular vectors of A in U
1144*                    (Workspace: need BDSPAC)
1145*
1146                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1147     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1148     $                            INFO )
1149*
1150                  END IF
1151*
1152               ELSE IF( WNTVO ) THEN
1153*
1154*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1155*                 N left singular vectors to be computed in U and
1156*                 N right singular vectors to be overwritten on A
1157*
1158                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1159*
1160*                    Sufficient workspace for a fast algorithm
1161*
1162                     IU = 1
1163                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1164*
1165*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1166*
1167                        LDWRKU = LDA
1168                        IR = IU + LDWRKU*N
1169                        LDWRKR = LDA
1170                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1171*
1172*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1173*
1174                        LDWRKU = LDA
1175                        IR = IU + LDWRKU*N
1176                        LDWRKR = N
1177                     ELSE
1178*
1179*                       WORK(IU) is N by N and WORK(IR) is N by N
1180*
1181                        LDWRKU = N
1182                        IR = IU + LDWRKU*N
1183                        LDWRKR = N
1184                     END IF
1185                     ITAU = IR + LDWRKR*N
1186                     IWORK = ITAU + N
1187*
1188*                    Compute A=Q*R
1189*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1190*
1191                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1192     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1193*
1194*                    Copy R to WORK(IU), zeroing out below it
1195*
1196                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1197     $                            LDWRKU )
1198                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1199     $                            WORK( IU+1 ), LDWRKU )
1200*
1201*                    Generate Q in A
1202*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1203*
1204                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1205     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1206                     IE = ITAU
1207                     ITAUQ = IE + N
1208                     ITAUP = ITAUQ + N
1209                     IWORK = ITAUP + N
1210*
1211*                    Bidiagonalize R in WORK(IU), copying result to
1212*                    WORK(IR)
1213*                    (Workspace: need 2*N*N+4*N,
1214*                                prefer 2*N*N+3*N+2*N*NB)
1215*
1216                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1217     $                            WORK( IE ), WORK( ITAUQ ),
1218     $                            WORK( ITAUP ), WORK( IWORK ),
1219     $                            LWORK-IWORK+1, IERR )
1220                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1221     $                            WORK( IR ), LDWRKR )
1222*
1223*                    Generate left bidiagonalizing vectors in WORK(IU)
1224*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1225*
1226                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1227     $                            WORK( ITAUQ ), WORK( IWORK ),
1228     $                            LWORK-IWORK+1, IERR )
1229*
1230*                    Generate right bidiagonalizing vectors in WORK(IR)
1231*                    (Workspace: need 2*N*N+4*N-1,
1232*                                prefer 2*N*N+3*N+(N-1)*NB)
1233*
1234                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1235     $                            WORK( ITAUP ), WORK( IWORK ),
1236     $                            LWORK-IWORK+1, IERR )
1237                     IWORK = IE + N
1238*
1239*                    Perform bidiagonal QR iteration, computing left
1240*                    singular vectors of R in WORK(IU) and computing
1241*                    right singular vectors of R in WORK(IR)
1242*                    (Workspace: need 2*N*N+BDSPAC)
1243*
1244                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1245     $                            WORK( IR ), LDWRKR, WORK( IU ),
1246     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1247*
1248*                    Multiply Q in A by left singular vectors of R in
1249*                    WORK(IU), storing result in U
1250*                    (Workspace: need N*N)
1251*
1252                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1253     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1254*
1255*                    Copy right singular vectors of R to A
1256*                    (Workspace: need N*N)
1257*
1258                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1259     $                            LDA )
1260*
1261                  ELSE
1262*
1263*                    Insufficient workspace for a fast algorithm
1264*
1265                     ITAU = 1
1266                     IWORK = ITAU + N
1267*
1268*                    Compute A=Q*R, copying result to U
1269*                    (Workspace: need 2*N, prefer N+N*NB)
1270*
1271                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1272     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1273                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1274*
1275*                    Generate Q in U
1276*                    (Workspace: need 2*N, prefer N+N*NB)
1277*
1278                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1279     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1280                     IE = ITAU
1281                     ITAUQ = IE + N
1282                     ITAUP = ITAUQ + N
1283                     IWORK = ITAUP + N
1284*
1285*                    Zero out below R in A
1286*
1287                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1288     $                            LDA )
1289*
1290*                    Bidiagonalize R in A
1291*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1292*
1293                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1294     $                            WORK( ITAUQ ), WORK( ITAUP ),
1295     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1296*
1297*                    Multiply Q in U by left vectors bidiagonalizing R
1298*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1299*
1300                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1301     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1302     $                            LWORK-IWORK+1, IERR )
1303*
1304*                    Generate right vectors bidiagonalizing R in A
1305*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1306*
1307                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1308     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1309                     IWORK = IE + N
1310*
1311*                    Perform bidiagonal QR iteration, computing left
1312*                    singular vectors of A in U and computing right
1313*                    singular vectors of A in A
1314*                    (Workspace: need BDSPAC)
1315*
1316                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1317     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1318     $                            INFO )
1319*
1320                  END IF
1321*
1322               ELSE IF( WNTVAS ) THEN
1323*
1324*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1325*                         or 'A')
1326*                 N left singular vectors to be computed in U and
1327*                 N right singular vectors to be computed in VT
1328*
1329                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1330*
1331*                    Sufficient workspace for a fast algorithm
1332*
1333                     IU = 1
1334                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1335*
1336*                       WORK(IU) is LDA by N
1337*
1338                        LDWRKU = LDA
1339                     ELSE
1340*
1341*                       WORK(IU) is N by N
1342*
1343                        LDWRKU = N
1344                     END IF
1345                     ITAU = IU + LDWRKU*N
1346                     IWORK = ITAU + N
1347*
1348*                    Compute A=Q*R
1349*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1350*
1351                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1352     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1353*
1354*                    Copy R to WORK(IU), zeroing out below it
1355*
1356                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1357     $                            LDWRKU )
1358                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1359     $                            WORK( IU+1 ), LDWRKU )
1360*
1361*                    Generate Q in A
1362*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1363*
1364                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1365     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1366                     IE = ITAU
1367                     ITAUQ = IE + N
1368                     ITAUP = ITAUQ + N
1369                     IWORK = ITAUP + N
1370*
1371*                    Bidiagonalize R in WORK(IU), copying result to VT
1372*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1373*
1374                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1375     $                            WORK( IE ), WORK( ITAUQ ),
1376     $                            WORK( ITAUP ), WORK( IWORK ),
1377     $                            LWORK-IWORK+1, IERR )
1378                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1379     $                            LDVT )
1380*
1381*                    Generate left bidiagonalizing vectors in WORK(IU)
1382*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1383*
1384                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1385     $                            WORK( ITAUQ ), WORK( IWORK ),
1386     $                            LWORK-IWORK+1, IERR )
1387*
1388*                    Generate right bidiagonalizing vectors in VT
1389*                    (Workspace: need N*N+4*N-1,
1390*                                prefer N*N+3*N+(N-1)*NB)
1391*
1392                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1393     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1394                     IWORK = IE + N
1395*
1396*                    Perform bidiagonal QR iteration, computing left
1397*                    singular vectors of R in WORK(IU) and computing
1398*                    right singular vectors of R in VT
1399*                    (Workspace: need N*N+BDSPAC)
1400*
1401                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1402     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1403     $                            WORK( IWORK ), INFO )
1404*
1405*                    Multiply Q in A by left singular vectors of R in
1406*                    WORK(IU), storing result in U
1407*                    (Workspace: need N*N)
1408*
1409                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1410     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1411*
1412                  ELSE
1413*
1414*                    Insufficient workspace for a fast algorithm
1415*
1416                     ITAU = 1
1417                     IWORK = ITAU + N
1418*
1419*                    Compute A=Q*R, copying result to U
1420*                    (Workspace: need 2*N, prefer N+N*NB)
1421*
1422                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1423     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1424                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1425*
1426*                    Generate Q in U
1427*                    (Workspace: need 2*N, prefer N+N*NB)
1428*
1429                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1430     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1431*
1432*                    Copy R to VT, zeroing out below it
1433*
1434                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1435                     IF( N.GT.1 )
1436     $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1437     $                               VT( 2, 1 ), LDVT )
1438                     IE = ITAU
1439                     ITAUQ = IE + N
1440                     ITAUP = ITAUQ + N
1441                     IWORK = ITAUP + N
1442*
1443*                    Bidiagonalize R in VT
1444*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1445*
1446                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1447     $                            WORK( ITAUQ ), WORK( ITAUP ),
1448     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1449*
1450*                    Multiply Q in U by left bidiagonalizing vectors
1451*                    in VT
1452*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1453*
1454                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1455     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1456     $                            LWORK-IWORK+1, IERR )
1457*
1458*                    Generate right bidiagonalizing vectors in VT
1459*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1460*
1461                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1462     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1463                     IWORK = IE + N
1464*
1465*                    Perform bidiagonal QR iteration, computing left
1466*                    singular vectors of A in U and computing right
1467*                    singular vectors of A in VT
1468*                    (Workspace: need BDSPAC)
1469*
1470                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1471     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1472     $                            INFO )
1473*
1474                  END IF
1475*
1476               END IF
1477*
1478            ELSE IF( WNTUA ) THEN
1479*
1480               IF( WNTVN ) THEN
1481*
1482*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1483*                 M left singular vectors to be computed in U and
1484*                 no right singular vectors to be computed
1485*
1486                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1487*
1488*                    Sufficient workspace for a fast algorithm
1489*
1490                     IR = 1
1491                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1492*
1493*                       WORK(IR) is LDA by N
1494*
1495                        LDWRKR = LDA
1496                     ELSE
1497*
1498*                       WORK(IR) is N by N
1499*
1500                        LDWRKR = N
1501                     END IF
1502                     ITAU = IR + LDWRKR*N
1503                     IWORK = ITAU + N
1504*
1505*                    Compute A=Q*R, copying result to U
1506*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1507*
1508                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1509     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1510                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1511*
1512*                    Copy R to WORK(IR), zeroing out below it
1513*
1514                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1515     $                            LDWRKR )
1516                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1517     $                            WORK( IR+1 ), LDWRKR )
1518*
1519*                    Generate Q in U
1520*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1521*
1522                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1523     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1524                     IE = ITAU
1525                     ITAUQ = IE + N
1526                     ITAUP = ITAUQ + N
1527                     IWORK = ITAUP + N
1528*
1529*                    Bidiagonalize R in WORK(IR)
1530*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1531*
1532                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1533     $                            WORK( IE ), WORK( ITAUQ ),
1534     $                            WORK( ITAUP ), WORK( IWORK ),
1535     $                            LWORK-IWORK+1, IERR )
1536*
1537*                    Generate left bidiagonalizing vectors in WORK(IR)
1538*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1539*
1540                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1541     $                            WORK( ITAUQ ), WORK( IWORK ),
1542     $                            LWORK-IWORK+1, IERR )
1543                     IWORK = IE + N
1544*
1545*                    Perform bidiagonal QR iteration, computing left
1546*                    singular vectors of R in WORK(IR)
1547*                    (Workspace: need N*N+BDSPAC)
1548*
1549                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1550     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1551     $                            WORK( IWORK ), INFO )
1552*
1553*                    Multiply Q in U by left singular vectors of R in
1554*                    WORK(IR), storing result in A
1555*                    (Workspace: need N*N)
1556*
1557                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1558     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1559*
1560*                    Copy left singular vectors of A from A to U
1561*
1562                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1563*
1564                  ELSE
1565*
1566*                    Insufficient workspace for a fast algorithm
1567*
1568                     ITAU = 1
1569                     IWORK = ITAU + N
1570*
1571*                    Compute A=Q*R, copying result to U
1572*                    (Workspace: need 2*N, prefer N+N*NB)
1573*
1574                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1575     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1576                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1577*
1578*                    Generate Q in U
1579*                    (Workspace: need N+M, prefer N+M*NB)
1580*
1581                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1582     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1583                     IE = ITAU
1584                     ITAUQ = IE + N
1585                     ITAUP = ITAUQ + N
1586                     IWORK = ITAUP + N
1587*
1588*                    Zero out below R in A
1589*
1590                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1591     $                            LDA )
1592*
1593*                    Bidiagonalize R in A
1594*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1595*
1596                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1597     $                            WORK( ITAUQ ), WORK( ITAUP ),
1598     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1599*
1600*                    Multiply Q in U by left bidiagonalizing vectors
1601*                    in A
1602*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1603*
1604                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1605     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1606     $                            LWORK-IWORK+1, IERR )
1607                     IWORK = IE + N
1608*
1609*                    Perform bidiagonal QR iteration, computing left
1610*                    singular vectors of A in U
1611*                    (Workspace: need BDSPAC)
1612*
1613                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1614     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1615     $                            INFO )
1616*
1617                  END IF
1618*
1619               ELSE IF( WNTVO ) THEN
1620*
1621*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1622*                 M left singular vectors to be computed in U and
1623*                 N right singular vectors to be overwritten on A
1624*
1625                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1626*
1627*                    Sufficient workspace for a fast algorithm
1628*
1629                     IU = 1
1630                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1631*
1632*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1633*
1634                        LDWRKU = LDA
1635                        IR = IU + LDWRKU*N
1636                        LDWRKR = LDA
1637                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1638*
1639*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1640*
1641                        LDWRKU = LDA
1642                        IR = IU + LDWRKU*N
1643                        LDWRKR = N
1644                     ELSE
1645*
1646*                       WORK(IU) is N by N and WORK(IR) is N by N
1647*
1648                        LDWRKU = N
1649                        IR = IU + LDWRKU*N
1650                        LDWRKR = N
1651                     END IF
1652                     ITAU = IR + LDWRKR*N
1653                     IWORK = ITAU + N
1654*
1655*                    Compute A=Q*R, copying result to U
1656*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1657*
1658                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1659     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1660                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1661*
1662*                    Generate Q in U
1663*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1664*
1665                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1666     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1667*
1668*                    Copy R to WORK(IU), zeroing out below it
1669*
1670                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1671     $                            LDWRKU )
1672                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1673     $                            WORK( IU+1 ), LDWRKU )
1674                     IE = ITAU
1675                     ITAUQ = IE + N
1676                     ITAUP = ITAUQ + N
1677                     IWORK = ITAUP + N
1678*
1679*                    Bidiagonalize R in WORK(IU), copying result to
1680*                    WORK(IR)
1681*                    (Workspace: need 2*N*N+4*N,
1682*                                prefer 2*N*N+3*N+2*N*NB)
1683*
1684                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1685     $                            WORK( IE ), WORK( ITAUQ ),
1686     $                            WORK( ITAUP ), WORK( IWORK ),
1687     $                            LWORK-IWORK+1, IERR )
1688                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1689     $                            WORK( IR ), LDWRKR )
1690*
1691*                    Generate left bidiagonalizing vectors in WORK(IU)
1692*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1693*
1694                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1695     $                            WORK( ITAUQ ), WORK( IWORK ),
1696     $                            LWORK-IWORK+1, IERR )
1697*
1698*                    Generate right bidiagonalizing vectors in WORK(IR)
1699*                    (Workspace: need 2*N*N+4*N-1,
1700*                                prefer 2*N*N+3*N+(N-1)*NB)
1701*
1702                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1703     $                            WORK( ITAUP ), WORK( IWORK ),
1704     $                            LWORK-IWORK+1, IERR )
1705                     IWORK = IE + N
1706*
1707*                    Perform bidiagonal QR iteration, computing left
1708*                    singular vectors of R in WORK(IU) and computing
1709*                    right singular vectors of R in WORK(IR)
1710*                    (Workspace: need 2*N*N+BDSPAC)
1711*
1712                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1713     $                            WORK( IR ), LDWRKR, WORK( IU ),
1714     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1715*
1716*                    Multiply Q in U by left singular vectors of R in
1717*                    WORK(IU), storing result in A
1718*                    (Workspace: need N*N)
1719*
1720                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1721     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1722*
1723*                    Copy left singular vectors of A from A to U
1724*
1725                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1726*
1727*                    Copy right singular vectors of R from WORK(IR) to A
1728*
1729                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1730     $                            LDA )
1731*
1732                  ELSE
1733*
1734*                    Insufficient workspace for a fast algorithm
1735*
1736                     ITAU = 1
1737                     IWORK = ITAU + N
1738*
1739*                    Compute A=Q*R, copying result to U
1740*                    (Workspace: need 2*N, prefer N+N*NB)
1741*
1742                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1743     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1744                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1745*
1746*                    Generate Q in U
1747*                    (Workspace: need N+M, prefer N+M*NB)
1748*
1749                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1750     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1751                     IE = ITAU
1752                     ITAUQ = IE + N
1753                     ITAUP = ITAUQ + N
1754                     IWORK = ITAUP + N
1755*
1756*                    Zero out below R in A
1757*
1758                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1759     $                            LDA )
1760*
1761*                    Bidiagonalize R in A
1762*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1763*
1764                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1765     $                            WORK( ITAUQ ), WORK( ITAUP ),
1766     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1767*
1768*                    Multiply Q in U by left bidiagonalizing vectors
1769*                    in A
1770*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1771*
1772                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1773     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1774     $                            LWORK-IWORK+1, IERR )
1775*
1776*                    Generate right bidiagonalizing vectors in A
1777*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1778*
1779                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1780     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1781                     IWORK = IE + N
1782*
1783*                    Perform bidiagonal QR iteration, computing left
1784*                    singular vectors of A in U and computing right
1785*                    singular vectors of A in A
1786*                    (Workspace: need BDSPAC)
1787*
1788                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1789     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1790     $                            INFO )
1791*
1792                  END IF
1793*
1794               ELSE IF( WNTVAS ) THEN
1795*
1796*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1797*                         or 'A')
1798*                 M left singular vectors to be computed in U and
1799*                 N right singular vectors to be computed in VT
1800*
1801                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1802*
1803*                    Sufficient workspace for a fast algorithm
1804*
1805                     IU = 1
1806                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1807*
1808*                       WORK(IU) is LDA by N
1809*
1810                        LDWRKU = LDA
1811                     ELSE
1812*
1813*                       WORK(IU) is N by N
1814*
1815                        LDWRKU = N
1816                     END IF
1817                     ITAU = IU + LDWRKU*N
1818                     IWORK = ITAU + N
1819*
1820*                    Compute A=Q*R, copying result to U
1821*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1822*
1823                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1824     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1825                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1826*
1827*                    Generate Q in U
1828*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1829*
1830                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1831     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1832*
1833*                    Copy R to WORK(IU), zeroing out below it
1834*
1835                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1836     $                            LDWRKU )
1837                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1838     $                            WORK( IU+1 ), LDWRKU )
1839                     IE = ITAU
1840                     ITAUQ = IE + N
1841                     ITAUP = ITAUQ + N
1842                     IWORK = ITAUP + N
1843*
1844*                    Bidiagonalize R in WORK(IU), copying result to VT
1845*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1846*
1847                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1848     $                            WORK( IE ), WORK( ITAUQ ),
1849     $                            WORK( ITAUP ), WORK( IWORK ),
1850     $                            LWORK-IWORK+1, IERR )
1851                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1852     $                            LDVT )
1853*
1854*                    Generate left bidiagonalizing vectors in WORK(IU)
1855*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1856*
1857                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1858     $                            WORK( ITAUQ ), WORK( IWORK ),
1859     $                            LWORK-IWORK+1, IERR )
1860*
1861*                    Generate right bidiagonalizing vectors in VT
1862*                    (Workspace: need N*N+4*N-1,
1863*                                prefer N*N+3*N+(N-1)*NB)
1864*
1865                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1866     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1867                     IWORK = IE + N
1868*
1869*                    Perform bidiagonal QR iteration, computing left
1870*                    singular vectors of R in WORK(IU) and computing
1871*                    right singular vectors of R in VT
1872*                    (Workspace: need N*N+BDSPAC)
1873*
1874                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1875     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1876     $                            WORK( IWORK ), INFO )
1877*
1878*                    Multiply Q in U by left singular vectors of R in
1879*                    WORK(IU), storing result in A
1880*                    (Workspace: need N*N)
1881*
1882                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1883     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1884*
1885*                    Copy left singular vectors of A from A to U
1886*
1887                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1888*
1889                  ELSE
1890*
1891*                    Insufficient workspace for a fast algorithm
1892*
1893                     ITAU = 1
1894                     IWORK = ITAU + N
1895*
1896*                    Compute A=Q*R, copying result to U
1897*                    (Workspace: need 2*N, prefer N+N*NB)
1898*
1899                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1900     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1901                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1902*
1903*                    Generate Q in U
1904*                    (Workspace: need N+M, prefer N+M*NB)
1905*
1906                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1907     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1908*
1909*                    Copy R from A to VT, zeroing out below it
1910*
1911                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1912                     IF( N.GT.1 )
1913     $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1914     $                               VT( 2, 1 ), LDVT )
1915                     IE = ITAU
1916                     ITAUQ = IE + N
1917                     ITAUP = ITAUQ + N
1918                     IWORK = ITAUP + N
1919*
1920*                    Bidiagonalize R in VT
1921*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1922*
1923                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1924     $                            WORK( ITAUQ ), WORK( ITAUP ),
1925     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1926*
1927*                    Multiply Q in U by left bidiagonalizing vectors
1928*                    in VT
1929*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1930*
1931                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1932     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1933     $                            LWORK-IWORK+1, IERR )
1934*
1935*                    Generate right bidiagonalizing vectors in VT
1936*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1937*
1938                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1939     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1940                     IWORK = IE + N
1941*
1942*                    Perform bidiagonal QR iteration, computing left
1943*                    singular vectors of A in U and computing right
1944*                    singular vectors of A in VT
1945*                    (Workspace: need BDSPAC)
1946*
1947                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1948     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1949     $                            INFO )
1950*
1951                  END IF
1952*
1953               END IF
1954*
1955            END IF
1956*
1957         ELSE
1958*
1959*           M .LT. MNTHR
1960*
1961*           Path 10 (M at least N, but not much larger)
1962*           Reduce to bidiagonal form without QR decomposition
1963*
1964            IE = 1
1965            ITAUQ = IE + N
1966            ITAUP = ITAUQ + N
1967            IWORK = ITAUP + N
1968*
1969*           Bidiagonalize A
1970*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1971*
1972            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1973     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1974     $                   IERR )
1975            IF( WNTUAS ) THEN
1976*
1977*              If left singular vectors desired in U, copy result to U
1978*              and generate left bidiagonalizing vectors in U
1979*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1980*
1981               CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1982               IF( WNTUS )
1983     $            NCU = N
1984               IF( WNTUA )
1985     $            NCU = M
1986               CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1987     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1988            END IF
1989            IF( WNTVAS ) THEN
1990*
1991*              If right singular vectors desired in VT, copy result to
1992*              VT and generate right bidiagonalizing vectors in VT
1993*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1994*
1995               CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1996               CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1997     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1998            END IF
1999            IF( WNTUO ) THEN
2000*
2001*              If left singular vectors desired in A, generate left
2002*              bidiagonalizing vectors in A
2003*              (Workspace: need 4*N, prefer 3*N+N*NB)
2004*
2005               CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2006     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2007            END IF
2008            IF( WNTVO ) THEN
2009*
2010*              If right singular vectors desired in A, generate right
2011*              bidiagonalizing vectors in A
2012*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2013*
2014               CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2015     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2016            END IF
2017            IWORK = IE + N
2018            IF( WNTUAS .OR. WNTUO )
2019     $         NRU = M
2020            IF( WNTUN )
2021     $         NRU = 0
2022            IF( WNTVAS .OR. WNTVO )
2023     $         NCVT = N
2024            IF( WNTVN )
2025     $         NCVT = 0
2026            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2027*
2028*              Perform bidiagonal QR iteration, if desired, computing
2029*              left singular vectors in U and computing right singular
2030*              vectors in VT
2031*              (Workspace: need BDSPAC)
2032*
2033               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2034     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
2035            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2036*
2037*              Perform bidiagonal QR iteration, if desired, computing
2038*              left singular vectors in U and computing right singular
2039*              vectors in A
2040*              (Workspace: need BDSPAC)
2041*
2042               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
2043     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
2044            ELSE
2045*
2046*              Perform bidiagonal QR iteration, if desired, computing
2047*              left singular vectors in A and computing right singular
2048*              vectors in VT
2049*              (Workspace: need BDSPAC)
2050*
2051               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2052     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
2053            END IF
2054*
2055         END IF
2056*
2057      ELSE
2058*
2059*        A has more columns than rows. If A has sufficiently more
2060*        columns than rows, first reduce using the LQ decomposition (if
2061*        sufficient workspace available)
2062*
2063         IF( N.GE.MNTHR ) THEN
2064*
2065            IF( WNTVN ) THEN
2066*
2067*              Path 1t(N much larger than M, JOBVT='N')
2068*              No right singular vectors to be computed
2069*
2070               ITAU = 1
2071               IWORK = ITAU + M
2072*
2073*              Compute A=L*Q
2074*              (Workspace: need 2*M, prefer M+M*NB)
2075*
2076               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2077     $                      LWORK-IWORK+1, IERR )
2078*
2079*              Zero out above L
2080*
2081               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
2082               IE = 1
2083               ITAUQ = IE + M
2084               ITAUP = ITAUQ + M
2085               IWORK = ITAUP + M
2086*
2087*              Bidiagonalize L in A
2088*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
2089*
2090               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2091     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2092     $                      IERR )
2093               IF( WNTUO .OR. WNTUAS ) THEN
2094*
2095*                 If left singular vectors desired, generate Q
2096*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2097*
2098                  CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2099     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2100               END IF
2101               IWORK = IE + M
2102               NRU = 0
2103               IF( WNTUO .OR. WNTUAS )
2104     $            NRU = M
2105*
2106*              Perform bidiagonal QR iteration, computing left singular
2107*              vectors of A in A if desired
2108*              (Workspace: need BDSPAC)
2109*
2110               CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2111     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2112*
2113*              If left singular vectors desired in U, copy them there
2114*
2115               IF( WNTUAS )
2116     $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2117*
2118            ELSE IF( WNTVO .AND. WNTUN ) THEN
2119*
2120*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2121*              M right singular vectors to be overwritten on A and
2122*              no left singular vectors to be computed
2123*
2124               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2125*
2126*                 Sufficient workspace for a fast algorithm
2127*
2128                  IR = 1
2129                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2130*
2131*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2132*
2133                     LDWRKU = LDA
2134                     CHUNK = N
2135                     LDWRKR = LDA
2136                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2137*
2138*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2139*
2140                     LDWRKU = LDA
2141                     CHUNK = N
2142                     LDWRKR = M
2143                  ELSE
2144*
2145*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2146*
2147                     LDWRKU = M
2148                     CHUNK = ( LWORK-M*M-M ) / M
2149                     LDWRKR = M
2150                  END IF
2151                  ITAU = IR + LDWRKR*M
2152                  IWORK = ITAU + M
2153*
2154*                 Compute A=L*Q
2155*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2156*
2157                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2158     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2159*
2160*                 Copy L to WORK(IR) and zero out above it
2161*
2162                  CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2163                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2164     $                         WORK( IR+LDWRKR ), LDWRKR )
2165*
2166*                 Generate Q in A
2167*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2168*
2169                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2170     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2171                  IE = ITAU
2172                  ITAUQ = IE + M
2173                  ITAUP = ITAUQ + M
2174                  IWORK = ITAUP + M
2175*
2176*                 Bidiagonalize L in WORK(IR)
2177*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2178*
2179                  CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2180     $                         WORK( ITAUQ ), WORK( ITAUP ),
2181     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2182*
2183*                 Generate right vectors bidiagonalizing L
2184*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2185*
2186                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2187     $                         WORK( ITAUP ), WORK( IWORK ),
2188     $                         LWORK-IWORK+1, IERR )
2189                  IWORK = IE + M
2190*
2191*                 Perform bidiagonal QR iteration, computing right
2192*                 singular vectors of L in WORK(IR)
2193*                 (Workspace: need M*M+BDSPAC)
2194*
2195                  CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2196     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2197     $                         WORK( IWORK ), INFO )
2198                  IU = IE + M
2199*
2200*                 Multiply right singular vectors of L in WORK(IR) by Q
2201*                 in A, storing result in WORK(IU) and copying to A
2202*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2203*
2204                  DO 30 I = 1, N, CHUNK
2205                     BLK = MIN( N-I+1, CHUNK )
2206                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2207     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2208     $                           WORK( IU ), LDWRKU )
2209                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2210     $                            A( 1, I ), LDA )
2211   30             CONTINUE
2212*
2213               ELSE
2214*
2215*                 Insufficient workspace for a fast algorithm
2216*
2217                  IE = 1
2218                  ITAUQ = IE + M
2219                  ITAUP = ITAUQ + M
2220                  IWORK = ITAUP + M
2221*
2222*                 Bidiagonalize A
2223*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2224*
2225                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2226     $                         WORK( ITAUQ ), WORK( ITAUP ),
2227     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2228*
2229*                 Generate right vectors bidiagonalizing A
2230*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2231*
2232                  CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2233     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2234                  IWORK = IE + M
2235*
2236*                 Perform bidiagonal QR iteration, computing right
2237*                 singular vectors of A in A
2238*                 (Workspace: need BDSPAC)
2239*
2240                  CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2241     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2242*
2243               END IF
2244*
2245            ELSE IF( WNTVO .AND. WNTUAS ) THEN
2246*
2247*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2248*              M right singular vectors to be overwritten on A and
2249*              M left singular vectors to be computed in U
2250*
2251               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2252*
2253*                 Sufficient workspace for a fast algorithm
2254*
2255                  IR = 1
2256                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2257*
2258*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2259*
2260                     LDWRKU = LDA
2261                     CHUNK = N
2262                     LDWRKR = LDA
2263                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2264*
2265*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2266*
2267                     LDWRKU = LDA
2268                     CHUNK = N
2269                     LDWRKR = M
2270                  ELSE
2271*
2272*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2273*
2274                     LDWRKU = M
2275                     CHUNK = ( LWORK-M*M-M ) / M
2276                     LDWRKR = M
2277                  END IF
2278                  ITAU = IR + LDWRKR*M
2279                  IWORK = ITAU + M
2280*
2281*                 Compute A=L*Q
2282*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2283*
2284                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2285     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2286*
2287*                 Copy L to U, zeroing about above it
2288*
2289                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2290                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2291     $                         LDU )
2292*
2293*                 Generate Q in A
2294*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2295*
2296                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2297     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2298                  IE = ITAU
2299                  ITAUQ = IE + M
2300                  ITAUP = ITAUQ + M
2301                  IWORK = ITAUP + M
2302*
2303*                 Bidiagonalize L in U, copying result to WORK(IR)
2304*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2305*
2306                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2307     $                         WORK( ITAUQ ), WORK( ITAUP ),
2308     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2309                  CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2310*
2311*                 Generate right vectors bidiagonalizing L in WORK(IR)
2312*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2313*
2314                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2315     $                         WORK( ITAUP ), WORK( IWORK ),
2316     $                         LWORK-IWORK+1, IERR )
2317*
2318*                 Generate left vectors bidiagonalizing L in U
2319*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2320*
2321                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2322     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2323                  IWORK = IE + M
2324*
2325*                 Perform bidiagonal QR iteration, computing left
2326*                 singular vectors of L in U, and computing right
2327*                 singular vectors of L in WORK(IR)
2328*                 (Workspace: need M*M+BDSPAC)
2329*
2330                  CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2331     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2332     $                         WORK( IWORK ), INFO )
2333                  IU = IE + M
2334*
2335*                 Multiply right singular vectors of L in WORK(IR) by Q
2336*                 in A, storing result in WORK(IU) and copying to A
2337*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2338*
2339                  DO 40 I = 1, N, CHUNK
2340                     BLK = MIN( N-I+1, CHUNK )
2341                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2342     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2343     $                           WORK( IU ), LDWRKU )
2344                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2345     $                            A( 1, I ), LDA )
2346   40             CONTINUE
2347*
2348               ELSE
2349*
2350*                 Insufficient workspace for a fast algorithm
2351*
2352                  ITAU = 1
2353                  IWORK = ITAU + M
2354*
2355*                 Compute A=L*Q
2356*                 (Workspace: need 2*M, prefer M+M*NB)
2357*
2358                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2359     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2360*
2361*                 Copy L to U, zeroing out above it
2362*
2363                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2364                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2365     $                         LDU )
2366*
2367*                 Generate Q in A
2368*                 (Workspace: need 2*M, prefer M+M*NB)
2369*
2370                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2371     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2372                  IE = ITAU
2373                  ITAUQ = IE + M
2374                  ITAUP = ITAUQ + M
2375                  IWORK = ITAUP + M
2376*
2377*                 Bidiagonalize L in U
2378*                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2379*
2380                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2381     $                         WORK( ITAUQ ), WORK( ITAUP ),
2382     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2383*
2384*                 Multiply right vectors bidiagonalizing L by Q in A
2385*                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2386*
2387                  CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2388     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2389     $                         LWORK-IWORK+1, IERR )
2390*
2391*                 Generate left vectors bidiagonalizing L in U
2392*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2393*
2394                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2395     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2396                  IWORK = IE + M
2397*
2398*                 Perform bidiagonal QR iteration, computing left
2399*                 singular vectors of A in U and computing right
2400*                 singular vectors of A in A
2401*                 (Workspace: need BDSPAC)
2402*
2403                  CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2404     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2405*
2406               END IF
2407*
2408            ELSE IF( WNTVS ) THEN
2409*
2410               IF( WNTUN ) THEN
2411*
2412*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2413*                 M right singular vectors to be computed in VT and
2414*                 no left singular vectors to be computed
2415*
2416                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2417*
2418*                    Sufficient workspace for a fast algorithm
2419*
2420                     IR = 1
2421                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2422*
2423*                       WORK(IR) is LDA by M
2424*
2425                        LDWRKR = LDA
2426                     ELSE
2427*
2428*                       WORK(IR) is M by M
2429*
2430                        LDWRKR = M
2431                     END IF
2432                     ITAU = IR + LDWRKR*M
2433                     IWORK = ITAU + M
2434*
2435*                    Compute A=L*Q
2436*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2437*
2438                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2439     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2440*
2441*                    Copy L to WORK(IR), zeroing out above it
2442*
2443                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2444     $                            LDWRKR )
2445                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2446     $                            WORK( IR+LDWRKR ), LDWRKR )
2447*
2448*                    Generate Q in A
2449*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2450*
2451                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2452     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2453                     IE = ITAU
2454                     ITAUQ = IE + M
2455                     ITAUP = ITAUQ + M
2456                     IWORK = ITAUP + M
2457*
2458*                    Bidiagonalize L in WORK(IR)
2459*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2460*
2461                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2462     $                            WORK( IE ), WORK( ITAUQ ),
2463     $                            WORK( ITAUP ), WORK( IWORK ),
2464     $                            LWORK-IWORK+1, IERR )
2465*
2466*                    Generate right vectors bidiagonalizing L in
2467*                    WORK(IR)
2468*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2469*
2470                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2471     $                            WORK( ITAUP ), WORK( IWORK ),
2472     $                            LWORK-IWORK+1, IERR )
2473                     IWORK = IE + M
2474*
2475*                    Perform bidiagonal QR iteration, computing right
2476*                    singular vectors of L in WORK(IR)
2477*                    (Workspace: need M*M+BDSPAC)
2478*
2479                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2480     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2481     $                            WORK( IWORK ), INFO )
2482*
2483*                    Multiply right singular vectors of L in WORK(IR) by
2484*                    Q in A, storing result in VT
2485*                    (Workspace: need M*M)
2486*
2487                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2488     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2489*
2490                  ELSE
2491*
2492*                    Insufficient workspace for a fast algorithm
2493*
2494                     ITAU = 1
2495                     IWORK = ITAU + M
2496*
2497*                    Compute A=L*Q
2498*                    (Workspace: need 2*M, prefer M+M*NB)
2499*
2500                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2501     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2502*
2503*                    Copy result to VT
2504*
2505                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2506*
2507*                    Generate Q in VT
2508*                    (Workspace: need 2*M, prefer M+M*NB)
2509*
2510                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2511     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2512                     IE = ITAU
2513                     ITAUQ = IE + M
2514                     ITAUP = ITAUQ + M
2515                     IWORK = ITAUP + M
2516*
2517*                    Zero out above L in A
2518*
2519                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2520     $                            LDA )
2521*
2522*                    Bidiagonalize L in A
2523*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2524*
2525                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2526     $                            WORK( ITAUQ ), WORK( ITAUP ),
2527     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2528*
2529*                    Multiply right vectors bidiagonalizing L by Q in VT
2530*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2531*
2532                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2533     $                            WORK( ITAUP ), VT, LDVT,
2534     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2535                     IWORK = IE + M
2536*
2537*                    Perform bidiagonal QR iteration, computing right
2538*                    singular vectors of A in VT
2539*                    (Workspace: need BDSPAC)
2540*
2541                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2542     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2543     $                            INFO )
2544*
2545                  END IF
2546*
2547               ELSE IF( WNTUO ) THEN
2548*
2549*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2550*                 M right singular vectors to be computed in VT and
2551*                 M left singular vectors to be overwritten on A
2552*
2553                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2554*
2555*                    Sufficient workspace for a fast algorithm
2556*
2557                     IU = 1
2558                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2559*
2560*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2561*
2562                        LDWRKU = LDA
2563                        IR = IU + LDWRKU*M
2564                        LDWRKR = LDA
2565                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2566*
2567*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2568*
2569                        LDWRKU = LDA
2570                        IR = IU + LDWRKU*M
2571                        LDWRKR = M
2572                     ELSE
2573*
2574*                       WORK(IU) is M by M and WORK(IR) is M by M
2575*
2576                        LDWRKU = M
2577                        IR = IU + LDWRKU*M
2578                        LDWRKR = M
2579                     END IF
2580                     ITAU = IR + LDWRKR*M
2581                     IWORK = ITAU + M
2582*
2583*                    Compute A=L*Q
2584*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2585*
2586                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2587     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2588*
2589*                    Copy L to WORK(IU), zeroing out below it
2590*
2591                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2592     $                            LDWRKU )
2593                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2594     $                            WORK( IU+LDWRKU ), LDWRKU )
2595*
2596*                    Generate Q in A
2597*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2598*
2599                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2600     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2601                     IE = ITAU
2602                     ITAUQ = IE + M
2603                     ITAUP = ITAUQ + M
2604                     IWORK = ITAUP + M
2605*
2606*                    Bidiagonalize L in WORK(IU), copying result to
2607*                    WORK(IR)
2608*                    (Workspace: need 2*M*M+4*M,
2609*                                prefer 2*M*M+3*M+2*M*NB)
2610*
2611                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2612     $                            WORK( IE ), WORK( ITAUQ ),
2613     $                            WORK( ITAUP ), WORK( IWORK ),
2614     $                            LWORK-IWORK+1, IERR )
2615                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2616     $                            WORK( IR ), LDWRKR )
2617*
2618*                    Generate right bidiagonalizing vectors in WORK(IU)
2619*                    (Workspace: need 2*M*M+4*M-1,
2620*                                prefer 2*M*M+3*M+(M-1)*NB)
2621*
2622                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2623     $                            WORK( ITAUP ), WORK( IWORK ),
2624     $                            LWORK-IWORK+1, IERR )
2625*
2626*                    Generate left bidiagonalizing vectors in WORK(IR)
2627*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2628*
2629                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2630     $                            WORK( ITAUQ ), WORK( IWORK ),
2631     $                            LWORK-IWORK+1, IERR )
2632                     IWORK = IE + M
2633*
2634*                    Perform bidiagonal QR iteration, computing left
2635*                    singular vectors of L in WORK(IR) and computing
2636*                    right singular vectors of L in WORK(IU)
2637*                    (Workspace: need 2*M*M+BDSPAC)
2638*
2639                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2640     $                            WORK( IU ), LDWRKU, WORK( IR ),
2641     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2642*
2643*                    Multiply right singular vectors of L in WORK(IU) by
2644*                    Q in A, storing result in VT
2645*                    (Workspace: need M*M)
2646*
2647                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2648     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2649*
2650*                    Copy left singular vectors of L to A
2651*                    (Workspace: need M*M)
2652*
2653                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2654     $                            LDA )
2655*
2656                  ELSE
2657*
2658*                    Insufficient workspace for a fast algorithm
2659*
2660                     ITAU = 1
2661                     IWORK = ITAU + M
2662*
2663*                    Compute A=L*Q, copying result to VT
2664*                    (Workspace: need 2*M, prefer M+M*NB)
2665*
2666                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2667     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2668                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2669*
2670*                    Generate Q in VT
2671*                    (Workspace: need 2*M, prefer M+M*NB)
2672*
2673                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2674     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2675                     IE = ITAU
2676                     ITAUQ = IE + M
2677                     ITAUP = ITAUQ + M
2678                     IWORK = ITAUP + M
2679*
2680*                    Zero out above L in A
2681*
2682                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2683     $                            LDA )
2684*
2685*                    Bidiagonalize L in A
2686*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2687*
2688                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2689     $                            WORK( ITAUQ ), WORK( ITAUP ),
2690     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2691*
2692*                    Multiply right vectors bidiagonalizing L by Q in VT
2693*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2694*
2695                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2696     $                            WORK( ITAUP ), VT, LDVT,
2697     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2698*
2699*                    Generate left bidiagonalizing vectors of L in A
2700*                    (Workspace: need 4*M, prefer 3*M+M*NB)
2701*
2702                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2703     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2704                     IWORK = IE + M
2705*
2706*                    Perform bidiagonal QR iteration, compute left
2707*                    singular vectors of A in A and compute right
2708*                    singular vectors of A in VT
2709*                    (Workspace: need BDSPAC)
2710*
2711                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2712     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2713     $                            INFO )
2714*
2715                  END IF
2716*
2717               ELSE IF( WNTUAS ) THEN
2718*
2719*                 Path 6t(N much larger than M, JOBU='S' or 'A',
2720*                         JOBVT='S')
2721*                 M right singular vectors to be computed in VT and
2722*                 M left singular vectors to be computed in U
2723*
2724                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2725*
2726*                    Sufficient workspace for a fast algorithm
2727*
2728                     IU = 1
2729                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2730*
2731*                       WORK(IU) is LDA by N
2732*
2733                        LDWRKU = LDA
2734                     ELSE
2735*
2736*                       WORK(IU) is LDA by M
2737*
2738                        LDWRKU = M
2739                     END IF
2740                     ITAU = IU + LDWRKU*M
2741                     IWORK = ITAU + M
2742*
2743*                    Compute A=L*Q
2744*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2745*
2746                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2747     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2748*
2749*                    Copy L to WORK(IU), zeroing out above it
2750*
2751                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2752     $                            LDWRKU )
2753                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2754     $                            WORK( IU+LDWRKU ), LDWRKU )
2755*
2756*                    Generate Q in A
2757*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2758*
2759                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2760     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2761                     IE = ITAU
2762                     ITAUQ = IE + M
2763                     ITAUP = ITAUQ + M
2764                     IWORK = ITAUP + M
2765*
2766*                    Bidiagonalize L in WORK(IU), copying result to U
2767*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2768*
2769                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2770     $                            WORK( IE ), WORK( ITAUQ ),
2771     $                            WORK( ITAUP ), WORK( IWORK ),
2772     $                            LWORK-IWORK+1, IERR )
2773                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2774     $                            LDU )
2775*
2776*                    Generate right bidiagonalizing vectors in WORK(IU)
2777*                    (Workspace: need M*M+4*M-1,
2778*                                prefer M*M+3*M+(M-1)*NB)
2779*
2780                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2781     $                            WORK( ITAUP ), WORK( IWORK ),
2782     $                            LWORK-IWORK+1, IERR )
2783*
2784*                    Generate left bidiagonalizing vectors in U
2785*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2786*
2787                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2788     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2789                     IWORK = IE + M
2790*
2791*                    Perform bidiagonal QR iteration, computing left
2792*                    singular vectors of L in U and computing right
2793*                    singular vectors of L in WORK(IU)
2794*                    (Workspace: need M*M+BDSPAC)
2795*
2796                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2797     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2798     $                            WORK( IWORK ), INFO )
2799*
2800*                    Multiply right singular vectors of L in WORK(IU) by
2801*                    Q in A, storing result in VT
2802*                    (Workspace: need M*M)
2803*
2804                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2805     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2806*
2807                  ELSE
2808*
2809*                    Insufficient workspace for a fast algorithm
2810*
2811                     ITAU = 1
2812                     IWORK = ITAU + M
2813*
2814*                    Compute A=L*Q, copying result to VT
2815*                    (Workspace: need 2*M, prefer M+M*NB)
2816*
2817                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2818     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2819                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2820*
2821*                    Generate Q in VT
2822*                    (Workspace: need 2*M, prefer M+M*NB)
2823*
2824                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2825     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2826*
2827*                    Copy L to U, zeroing out above it
2828*
2829                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2830                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2831     $                            LDU )
2832                     IE = ITAU
2833                     ITAUQ = IE + M
2834                     ITAUP = ITAUQ + M
2835                     IWORK = ITAUP + M
2836*
2837*                    Bidiagonalize L in U
2838*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2839*
2840                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2841     $                            WORK( ITAUQ ), WORK( ITAUP ),
2842     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2843*
2844*                    Multiply right bidiagonalizing vectors in U by Q
2845*                    in VT
2846*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2847*
2848                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2849     $                            WORK( ITAUP ), VT, LDVT,
2850     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2851*
2852*                    Generate left bidiagonalizing vectors in U
2853*                    (Workspace: need 4*M, prefer 3*M+M*NB)
2854*
2855                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2856     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2857                     IWORK = IE + M
2858*
2859*                    Perform bidiagonal QR iteration, computing left
2860*                    singular vectors of A in U and computing right
2861*                    singular vectors of A in VT
2862*                    (Workspace: need BDSPAC)
2863*
2864                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2865     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2866     $                            INFO )
2867*
2868                  END IF
2869*
2870               END IF
2871*
2872            ELSE IF( WNTVA ) THEN
2873*
2874               IF( WNTUN ) THEN
2875*
2876*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2877*                 N right singular vectors to be computed in VT and
2878*                 no left singular vectors to be computed
2879*
2880                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2881*
2882*                    Sufficient workspace for a fast algorithm
2883*
2884                     IR = 1
2885                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2886*
2887*                       WORK(IR) is LDA by M
2888*
2889                        LDWRKR = LDA
2890                     ELSE
2891*
2892*                       WORK(IR) is M by M
2893*
2894                        LDWRKR = M
2895                     END IF
2896                     ITAU = IR + LDWRKR*M
2897                     IWORK = ITAU + M
2898*
2899*                    Compute A=L*Q, copying result to VT
2900*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2901*
2902                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2903     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2904                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2905*
2906*                    Copy L to WORK(IR), zeroing out above it
2907*
2908                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2909     $                            LDWRKR )
2910                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2911     $                            WORK( IR+LDWRKR ), LDWRKR )
2912*
2913*                    Generate Q in VT
2914*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2915*
2916                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2917     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2918                     IE = ITAU
2919                     ITAUQ = IE + M
2920                     ITAUP = ITAUQ + M
2921                     IWORK = ITAUP + M
2922*
2923*                    Bidiagonalize L in WORK(IR)
2924*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2925*
2926                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2927     $                            WORK( IE ), WORK( ITAUQ ),
2928     $                            WORK( ITAUP ), WORK( IWORK ),
2929     $                            LWORK-IWORK+1, IERR )
2930*
2931*                    Generate right bidiagonalizing vectors in WORK(IR)
2932*                    (Workspace: need M*M+4*M-1,
2933*                                prefer M*M+3*M+(M-1)*NB)
2934*
2935                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2936     $                            WORK( ITAUP ), WORK( IWORK ),
2937     $                            LWORK-IWORK+1, IERR )
2938                     IWORK = IE + M
2939*
2940*                    Perform bidiagonal QR iteration, computing right
2941*                    singular vectors of L in WORK(IR)
2942*                    (Workspace: need M*M+BDSPAC)
2943*
2944                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2945     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2946     $                            WORK( IWORK ), INFO )
2947*
2948*                    Multiply right singular vectors of L in WORK(IR) by
2949*                    Q in VT, storing result in A
2950*                    (Workspace: need M*M)
2951*
2952                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2953     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2954*
2955*                    Copy right singular vectors of A from A to VT
2956*
2957                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2958*
2959                  ELSE
2960*
2961*                    Insufficient workspace for a fast algorithm
2962*
2963                     ITAU = 1
2964                     IWORK = ITAU + M
2965*
2966*                    Compute A=L*Q, copying result to VT
2967*                    (Workspace: need 2*M, prefer M+M*NB)
2968*
2969                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2970     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2971                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2972*
2973*                    Generate Q in VT
2974*                    (Workspace: need M+N, prefer M+N*NB)
2975*
2976                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2977     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2978                     IE = ITAU
2979                     ITAUQ = IE + M
2980                     ITAUP = ITAUQ + M
2981                     IWORK = ITAUP + M
2982*
2983*                    Zero out above L in A
2984*
2985                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2986     $                            LDA )
2987*
2988*                    Bidiagonalize L in A
2989*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2990*
2991                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2992     $                            WORK( ITAUQ ), WORK( ITAUP ),
2993     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2994*
2995*                    Multiply right bidiagonalizing vectors in A by Q
2996*                    in VT
2997*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2998*
2999                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3000     $                            WORK( ITAUP ), VT, LDVT,
3001     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3002                     IWORK = IE + M
3003*
3004*                    Perform bidiagonal QR iteration, computing right
3005*                    singular vectors of A in VT
3006*                    (Workspace: need BDSPAC)
3007*
3008                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
3009     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
3010     $                            INFO )
3011*
3012                  END IF
3013*
3014               ELSE IF( WNTUO ) THEN
3015*
3016*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3017*                 N right singular vectors to be computed in VT and
3018*                 M left singular vectors to be overwritten on A
3019*
3020                  IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3021*
3022*                    Sufficient workspace for a fast algorithm
3023*
3024                     IU = 1
3025                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3026*
3027*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
3028*
3029                        LDWRKU = LDA
3030                        IR = IU + LDWRKU*M
3031                        LDWRKR = LDA
3032                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3033*
3034*                       WORK(IU) is LDA by M and WORK(IR) is M by M
3035*
3036                        LDWRKU = LDA
3037                        IR = IU + LDWRKU*M
3038                        LDWRKR = M
3039                     ELSE
3040*
3041*                       WORK(IU) is M by M and WORK(IR) is M by M
3042*
3043                        LDWRKU = M
3044                        IR = IU + LDWRKU*M
3045                        LDWRKR = M
3046                     END IF
3047                     ITAU = IR + LDWRKR*M
3048                     IWORK = ITAU + M
3049*
3050*                    Compute A=L*Q, copying result to VT
3051*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3052*
3053                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3054     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3055                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3056*
3057*                    Generate Q in VT
3058*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3059*
3060                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3061     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3062*
3063*                    Copy L to WORK(IU), zeroing out above it
3064*
3065                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3066     $                            LDWRKU )
3067                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3068     $                            WORK( IU+LDWRKU ), LDWRKU )
3069                     IE = ITAU
3070                     ITAUQ = IE + M
3071                     ITAUP = ITAUQ + M
3072                     IWORK = ITAUP + M
3073*
3074*                    Bidiagonalize L in WORK(IU), copying result to
3075*                    WORK(IR)
3076*                    (Workspace: need 2*M*M+4*M,
3077*                                prefer 2*M*M+3*M+2*M*NB)
3078*
3079                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3080     $                            WORK( IE ), WORK( ITAUQ ),
3081     $                            WORK( ITAUP ), WORK( IWORK ),
3082     $                            LWORK-IWORK+1, IERR )
3083                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3084     $                            WORK( IR ), LDWRKR )
3085*
3086*                    Generate right bidiagonalizing vectors in WORK(IU)
3087*                    (Workspace: need 2*M*M+4*M-1,
3088*                                prefer 2*M*M+3*M+(M-1)*NB)
3089*
3090                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3091     $                            WORK( ITAUP ), WORK( IWORK ),
3092     $                            LWORK-IWORK+1, IERR )
3093*
3094*                    Generate left bidiagonalizing vectors in WORK(IR)
3095*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3096*
3097                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3098     $                            WORK( ITAUQ ), WORK( IWORK ),
3099     $                            LWORK-IWORK+1, IERR )
3100                     IWORK = IE + M
3101*
3102*                    Perform bidiagonal QR iteration, computing left
3103*                    singular vectors of L in WORK(IR) and computing
3104*                    right singular vectors of L in WORK(IU)
3105*                    (Workspace: need 2*M*M+BDSPAC)
3106*
3107                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3108     $                            WORK( IU ), LDWRKU, WORK( IR ),
3109     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3110*
3111*                    Multiply right singular vectors of L in WORK(IU) by
3112*                    Q in VT, storing result in A
3113*                    (Workspace: need M*M)
3114*
3115                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3116     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3117*
3118*                    Copy right singular vectors of A from A to VT
3119*
3120                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3121*
3122*                    Copy left singular vectors of A from WORK(IR) to A
3123*
3124                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3125     $                            LDA )
3126*
3127                  ELSE
3128*
3129*                    Insufficient workspace for a fast algorithm
3130*
3131                     ITAU = 1
3132                     IWORK = ITAU + M
3133*
3134*                    Compute A=L*Q, copying result to VT
3135*                    (Workspace: need 2*M, prefer M+M*NB)
3136*
3137                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3138     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3139                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3140*
3141*                    Generate Q in VT
3142*                    (Workspace: need M+N, prefer M+N*NB)
3143*
3144                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3145     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3146                     IE = ITAU
3147                     ITAUQ = IE + M
3148                     ITAUP = ITAUQ + M
3149                     IWORK = ITAUP + M
3150*
3151*                    Zero out above L in A
3152*
3153                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3154     $                            LDA )
3155*
3156*                    Bidiagonalize L in A
3157*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3158*
3159                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3160     $                            WORK( ITAUQ ), WORK( ITAUP ),
3161     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3162*
3163*                    Multiply right bidiagonalizing vectors in A by Q
3164*                    in VT
3165*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3166*
3167                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3168     $                            WORK( ITAUP ), VT, LDVT,
3169     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3170*
3171*                    Generate left bidiagonalizing vectors in A
3172*                    (Workspace: need 4*M, prefer 3*M+M*NB)
3173*
3174                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3175     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3176                     IWORK = IE + M
3177*
3178*                    Perform bidiagonal QR iteration, computing left
3179*                    singular vectors of A in A and computing right
3180*                    singular vectors of A in VT
3181*                    (Workspace: need BDSPAC)
3182*
3183                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3184     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3185     $                            INFO )
3186*
3187                  END IF
3188*
3189               ELSE IF( WNTUAS ) THEN
3190*
3191*                 Path 9t(N much larger than M, JOBU='S' or 'A',
3192*                         JOBVT='A')
3193*                 N right singular vectors to be computed in VT and
3194*                 M left singular vectors to be computed in U
3195*
3196                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3197*
3198*                    Sufficient workspace for a fast algorithm
3199*
3200                     IU = 1
3201                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
3202*
3203*                       WORK(IU) is LDA by M
3204*
3205                        LDWRKU = LDA
3206                     ELSE
3207*
3208*                       WORK(IU) is M by M
3209*
3210                        LDWRKU = M
3211                     END IF
3212                     ITAU = IU + LDWRKU*M
3213                     IWORK = ITAU + M
3214*
3215*                    Compute A=L*Q, copying result to VT
3216*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3217*
3218                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3219     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3220                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3221*
3222*                    Generate Q in VT
3223*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3224*
3225                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3226     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3227*
3228*                    Copy L to WORK(IU), zeroing out above it
3229*
3230                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3231     $                            LDWRKU )
3232                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3233     $                            WORK( IU+LDWRKU ), LDWRKU )
3234                     IE = ITAU
3235                     ITAUQ = IE + M
3236                     ITAUP = ITAUQ + M
3237                     IWORK = ITAUP + M
3238*
3239*                    Bidiagonalize L in WORK(IU), copying result to U
3240*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3241*
3242                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3243     $                            WORK( IE ), WORK( ITAUQ ),
3244     $                            WORK( ITAUP ), WORK( IWORK ),
3245     $                            LWORK-IWORK+1, IERR )
3246                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3247     $                            LDU )
3248*
3249*                    Generate right bidiagonalizing vectors in WORK(IU)
3250*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3251*
3252                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3253     $                            WORK( ITAUP ), WORK( IWORK ),
3254     $                            LWORK-IWORK+1, IERR )
3255*
3256*                    Generate left bidiagonalizing vectors in U
3257*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3258*
3259                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3260     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3261                     IWORK = IE + M
3262*
3263*                    Perform bidiagonal QR iteration, computing left
3264*                    singular vectors of L in U and computing right
3265*                    singular vectors of L in WORK(IU)
3266*                    (Workspace: need M*M+BDSPAC)
3267*
3268                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3269     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3270     $                            WORK( IWORK ), INFO )
3271*
3272*                    Multiply right singular vectors of L in WORK(IU) by
3273*                    Q in VT, storing result in A
3274*                    (Workspace: need M*M)
3275*
3276                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3277     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3278*
3279*                    Copy right singular vectors of A from A to VT
3280*
3281                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3282*
3283                  ELSE
3284*
3285*                    Insufficient workspace for a fast algorithm
3286*
3287                     ITAU = 1
3288                     IWORK = ITAU + M
3289*
3290*                    Compute A=L*Q, copying result to VT
3291*                    (Workspace: need 2*M, prefer M+M*NB)
3292*
3293                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3294     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3295                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3296*
3297*                    Generate Q in VT
3298*                    (Workspace: need M+N, prefer M+N*NB)
3299*
3300                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3301     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3302*
3303*                    Copy L to U, zeroing out above it
3304*
3305                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3306                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3307     $                            LDU )
3308                     IE = ITAU
3309                     ITAUQ = IE + M
3310                     ITAUP = ITAUQ + M
3311                     IWORK = ITAUP + M
3312*
3313*                    Bidiagonalize L in U
3314*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3315*
3316                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3317     $                            WORK( ITAUQ ), WORK( ITAUP ),
3318     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3319*
3320*                    Multiply right bidiagonalizing vectors in U by Q
3321*                    in VT
3322*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3323*
3324                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3325     $                            WORK( ITAUP ), VT, LDVT,
3326     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3327*
3328*                    Generate left bidiagonalizing vectors in U
3329*                    (Workspace: need 4*M, prefer 3*M+M*NB)
3330*
3331                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3332     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3333                     IWORK = IE + M
3334*
3335*                    Perform bidiagonal QR iteration, computing left
3336*                    singular vectors of A in U and computing right
3337*                    singular vectors of A in VT
3338*                    (Workspace: need BDSPAC)
3339*
3340                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3341     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3342     $                            INFO )
3343*
3344                  END IF
3345*
3346               END IF
3347*
3348            END IF
3349*
3350         ELSE
3351*
3352*           N .LT. MNTHR
3353*
3354*           Path 10t(N greater than M, but not much larger)
3355*           Reduce to bidiagonal form without LQ decomposition
3356*
3357            IE = 1
3358            ITAUQ = IE + M
3359            ITAUP = ITAUQ + M
3360            IWORK = ITAUP + M
3361*
3362*           Bidiagonalize A
3363*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3364*
3365            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3366     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3367     $                   IERR )
3368            IF( WNTUAS ) THEN
3369*
3370*              If left singular vectors desired in U, copy result to U
3371*              and generate left bidiagonalizing vectors in U
3372*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3373*
3374               CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3375               CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3376     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3377            END IF
3378            IF( WNTVAS ) THEN
3379*
3380*              If right singular vectors desired in VT, copy result to
3381*              VT and generate right bidiagonalizing vectors in VT
3382*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3383*
3384               CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3385               IF( WNTVA )
3386     $            NRVT = N
3387               IF( WNTVS )
3388     $            NRVT = M
3389               CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3390     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3391            END IF
3392            IF( WNTUO ) THEN
3393*
3394*              If left singular vectors desired in A, generate left
3395*              bidiagonalizing vectors in A
3396*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3397*
3398               CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3399     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3400            END IF
3401            IF( WNTVO ) THEN
3402*
3403*              If right singular vectors desired in A, generate right
3404*              bidiagonalizing vectors in A
3405*              (Workspace: need 4*M, prefer 3*M+M*NB)
3406*
3407               CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3408     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3409            END IF
3410            IWORK = IE + M
3411            IF( WNTUAS .OR. WNTUO )
3412     $         NRU = M
3413            IF( WNTUN )
3414     $         NRU = 0
3415            IF( WNTVAS .OR. WNTVO )
3416     $         NCVT = N
3417            IF( WNTVN )
3418     $         NCVT = 0
3419            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3420*
3421*              Perform bidiagonal QR iteration, if desired, computing
3422*              left singular vectors in U and computing right singular
3423*              vectors in VT
3424*              (Workspace: need BDSPAC)
3425*
3426               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3427     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3428            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3429*
3430*              Perform bidiagonal QR iteration, if desired, computing
3431*              left singular vectors in U and computing right singular
3432*              vectors in A
3433*              (Workspace: need BDSPAC)
3434*
3435               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3436     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3437            ELSE
3438*
3439*              Perform bidiagonal QR iteration, if desired, computing
3440*              left singular vectors in A and computing right singular
3441*              vectors in VT
3442*              (Workspace: need BDSPAC)
3443*
3444               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3445     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3446            END IF
3447*
3448         END IF
3449*
3450      END IF
3451*
3452*     If DBDSQR failed to converge, copy unconverged superdiagonals
3453*     to WORK( 2:MINMN )
3454*
3455      IF( INFO.NE.0 ) THEN
3456         IF( IE.GT.2 ) THEN
3457            DO 50 I = 1, MINMN - 1
3458               WORK( I+1 ) = WORK( I+IE-1 )
3459   50       CONTINUE
3460         END IF
3461         IF( IE.LT.2 ) THEN
3462            DO 60 I = MINMN - 1, 1, -1
3463               WORK( I+1 ) = WORK( I+IE-1 )
3464   60       CONTINUE
3465         END IF
3466      END IF
3467*
3468*     Undo scaling if necessary
3469*
3470      IF( ISCL.EQ.1 ) THEN
3471         IF( ANRM.GT.BIGNUM )
3472     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3473     $                   IERR )
3474         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3475     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3476     $                   MINMN, IERR )
3477         IF( ANRM.LT.SMLNUM )
3478     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3479     $                   IERR )
3480         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3481     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3482     $                   MINMN, IERR )
3483      END IF
3484*
3485*     Return optimal workspace in WORK(1)
3486*
3487      WORK( 1 ) = MAXWRK
3488*
3489      RETURN
3490*
3491*     End of DGESVD
3492*
3493      END
3494c $Id$
3495