1*> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DGESVD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22*                          WORK, LWORK, INFO )
23*
24*       .. Scalar Arguments ..
25*       CHARACTER          JOBU, JOBVT
26*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
27*       ..
28*       .. Array Arguments ..
29*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
30*      $                   VT( LDVT, * ), WORK( * )
31*       ..
32*
33*
34*> \par Purpose:
35*  =============
36*>
37*> \verbatim
38*>
39*> DGESVD computes the singular value decomposition (SVD) of a real
40*> M-by-N matrix A, optionally computing the left and/or right singular
41*> vectors. The SVD is written
42*>
43*>      A = U * SIGMA * transpose(V)
44*>
45*> where SIGMA is an M-by-N matrix which is zero except for its
46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47*> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
48*> are the singular values of A; they are real and non-negative, and
49*> are returned in descending order.  The first min(m,n) columns of
50*> U and V are the left and right singular vectors of A.
51*>
52*> Note that the routine returns V**T, not V.
53*> \endverbatim
54*
55*  Arguments:
56*  ==========
57*
58*> \param[in] JOBU
59*> \verbatim
60*>          JOBU is CHARACTER*1
61*>          Specifies options for computing all or part of the matrix U:
62*>          = 'A':  all M columns of U are returned in array U:
63*>          = 'S':  the first min(m,n) columns of U (the left singular
64*>                  vectors) are returned in the array U;
65*>          = 'O':  the first min(m,n) columns of U (the left singular
66*>                  vectors) are overwritten on the array A;
67*>          = 'N':  no columns of U (no left singular vectors) are
68*>                  computed.
69*> \endverbatim
70*>
71*> \param[in] JOBVT
72*> \verbatim
73*>          JOBVT is CHARACTER*1
74*>          Specifies options for computing all or part of the matrix
75*>          V**T:
76*>          = 'A':  all N rows of V**T are returned in the array VT;
77*>          = 'S':  the first min(m,n) rows of V**T (the right singular
78*>                  vectors) are returned in the array VT;
79*>          = 'O':  the first min(m,n) rows of V**T (the right singular
80*>                  vectors) are overwritten on the array A;
81*>          = 'N':  no rows of V**T (no right singular vectors) are
82*>                  computed.
83*>
84*>          JOBVT and JOBU cannot both be 'O'.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*>          M is INTEGER
90*>          The number of rows of the input matrix A.  M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*>          N is INTEGER
96*>          The number of columns of the input matrix A.  N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*>          A is DOUBLE PRECISION array, dimension (LDA,N)
102*>          On entry, the M-by-N matrix A.
103*>          On exit,
104*>          if JOBU = 'O',  A is overwritten with the first min(m,n)
105*>                          columns of U (the left singular vectors,
106*>                          stored columnwise);
107*>          if JOBVT = 'O', A is overwritten with the first min(m,n)
108*>                          rows of V**T (the right singular vectors,
109*>                          stored rowwise);
110*>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111*>                          are destroyed.
112*> \endverbatim
113*>
114*> \param[in] LDA
115*> \verbatim
116*>          LDA is INTEGER
117*>          The leading dimension of the array A.  LDA >= max(1,M).
118*> \endverbatim
119*>
120*> \param[out] S
121*> \verbatim
122*>          S is DOUBLE PRECISION array, dimension (min(M,N))
123*>          The singular values of A, sorted so that S(i) >= S(i+1).
124*> \endverbatim
125*>
126*> \param[out] U
127*> \verbatim
128*>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129*>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130*>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131*>          if JOBU = 'S', U contains the first min(m,n) columns of U
132*>          (the left singular vectors, stored columnwise);
133*>          if JOBU = 'N' or 'O', U is not referenced.
134*> \endverbatim
135*>
136*> \param[in] LDU
137*> \verbatim
138*>          LDU is INTEGER
139*>          The leading dimension of the array U.  LDU >= 1; if
140*>          JOBU = 'S' or 'A', LDU >= M.
141*> \endverbatim
142*>
143*> \param[out] VT
144*> \verbatim
145*>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
146*>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147*>          V**T;
148*>          if JOBVT = 'S', VT contains the first min(m,n) rows of
149*>          V**T (the right singular vectors, stored rowwise);
150*>          if JOBVT = 'N' or 'O', VT is not referenced.
151*> \endverbatim
152*>
153*> \param[in] LDVT
154*> \verbatim
155*>          LDVT is INTEGER
156*>          The leading dimension of the array VT.  LDVT >= 1; if
157*>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158*> \endverbatim
159*>
160*> \param[out] WORK
161*> \verbatim
162*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164*>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165*>          superdiagonal elements of an upper bidiagonal matrix B
166*>          whose diagonal is in S (not necessarily sorted). B
167*>          satisfies A = U * B * VT, so it has the same singular values
168*>          as A, and singular vectors related by U and VT.
169*> \endverbatim
170*>
171*> \param[in] LWORK
172*> \verbatim
173*>          LWORK is INTEGER
174*>          The dimension of the array WORK.
175*>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176*>             - PATH 1  (M much larger than N, JOBU='N')
177*>             - PATH 1t (N much larger than M, JOBVT='N')
178*>          LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
179*>          For good performance, LWORK should generally be larger.
180*>
181*>          If LWORK = -1, then a workspace query is assumed; the routine
182*>          only calculates the optimal size of the WORK array, returns
183*>          this value as the first entry of the WORK array, and no error
184*>          message related to LWORK is issued by XERBLA.
185*> \endverbatim
186*>
187*> \param[out] INFO
188*> \verbatim
189*>          INFO is INTEGER
190*>          = 0:  successful exit.
191*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
192*>          > 0:  if DBDSQR did not converge, INFO specifies how many
193*>                superdiagonals of an intermediate bidiagonal form B
194*>                did not converge to zero. See the description of WORK
195*>                above for details.
196*> \endverbatim
197*
198*  Authors:
199*  ========
200*
201*> \author Univ. of Tennessee
202*> \author Univ. of California Berkeley
203*> \author Univ. of Colorado Denver
204*> \author NAG Ltd.
205*
206*> \ingroup doubleGEsing
207*
208*  =====================================================================
209      SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
210     $                   VT, LDVT, WORK, LWORK, INFO )
211*
212*  -- LAPACK driver routine --
213*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
214*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216*     .. Scalar Arguments ..
217      CHARACTER          JOBU, JOBVT
218      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
219*     ..
220*     .. Array Arguments ..
221      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
222     $                   VT( LDVT, * ), WORK( * )
223*     ..
224*
225*  =====================================================================
226*
227*     .. Parameters ..
228      DOUBLE PRECISION   ZERO, ONE
229      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
230*     ..
231*     .. Local Scalars ..
232      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
234      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
236     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
237     $                   NRVT, WRKBL
238      INTEGER            LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
239     $                   LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
240     $                   LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
241      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
242*     ..
243*     .. Local Arrays ..
244      DOUBLE PRECISION   DUM( 1 )
245*     ..
246*     .. External Subroutines ..
247      EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
248     $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
249     $                   XERBLA
250*     ..
251*     .. External Functions ..
252      LOGICAL            LSAME
253      INTEGER            ILAENV
254      DOUBLE PRECISION   DLAMCH, DLANGE
255      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
256*     ..
257*     .. Intrinsic Functions ..
258      INTRINSIC          MAX, MIN, SQRT
259*     ..
260*     .. Executable Statements ..
261*
262*     Test the input arguments
263*
264      INFO = 0
265      MINMN = MIN( M, N )
266      WNTUA = LSAME( JOBU, 'A' )
267      WNTUS = LSAME( JOBU, 'S' )
268      WNTUAS = WNTUA .OR. WNTUS
269      WNTUO = LSAME( JOBU, 'O' )
270      WNTUN = LSAME( JOBU, 'N' )
271      WNTVA = LSAME( JOBVT, 'A' )
272      WNTVS = LSAME( JOBVT, 'S' )
273      WNTVAS = WNTVA .OR. WNTVS
274      WNTVO = LSAME( JOBVT, 'O' )
275      WNTVN = LSAME( JOBVT, 'N' )
276      LQUERY = ( LWORK.EQ.-1 )
277*
278      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
279         INFO = -1
280      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
281     $         ( WNTVO .AND. WNTUO ) ) THEN
282         INFO = -2
283      ELSE IF( M.LT.0 ) THEN
284         INFO = -3
285      ELSE IF( N.LT.0 ) THEN
286         INFO = -4
287      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
288         INFO = -6
289      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
290         INFO = -9
291      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
292     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
293         INFO = -11
294      END IF
295*
296*     Compute workspace
297*      (Note: Comments in the code beginning "Workspace:" describe the
298*       minimal amount of workspace needed at that point in the code,
299*       as well as the preferred amount for good performance.
300*       NB refers to the optimal block size for the immediately
301*       following subroutine, as returned by ILAENV.)
302*
303      IF( INFO.EQ.0 ) THEN
304         MINWRK = 1
305         MAXWRK = 1
306         IF( M.GE.N .AND. MINMN.GT.0 ) THEN
307*
308*           Compute space needed for DBDSQR
309*
310            MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
311            BDSPAC = 5*N
312*           Compute space needed for DGEQRF
313            CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
314            LWORK_DGEQRF = INT( DUM(1) )
315*           Compute space needed for DORGQR
316            CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
317            LWORK_DORGQR_N = INT( DUM(1) )
318            CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
319            LWORK_DORGQR_M = INT( DUM(1) )
320*           Compute space needed for DGEBRD
321            CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
322     $                   DUM(1), DUM(1), -1, IERR )
323            LWORK_DGEBRD = INT( DUM(1) )
324*           Compute space needed for DORGBR P
325            CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
326     $                   DUM(1), -1, IERR )
327            LWORK_DORGBR_P = INT( DUM(1) )
328*           Compute space needed for DORGBR Q
329            CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1),
330     $                   DUM(1), -1, IERR )
331            LWORK_DORGBR_Q = INT( DUM(1) )
332*
333            IF( M.GE.MNTHR ) THEN
334               IF( WNTUN ) THEN
335*
336*                 Path 1 (M much larger than N, JOBU='N')
337*
338                  MAXWRK = N + LWORK_DGEQRF
339                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
340                  IF( WNTVO .OR. WNTVAS )
341     $               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
342                  MAXWRK = MAX( MAXWRK, BDSPAC )
343                  MINWRK = MAX( 4*N, BDSPAC )
344               ELSE IF( WNTUO .AND. WNTVN ) THEN
345*
346*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348                  WRKBL = N + LWORK_DGEQRF
349                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
350                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
351                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
352                  WRKBL = MAX( WRKBL, BDSPAC )
353                  MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
354                  MINWRK = MAX( 3*N + M, BDSPAC )
355               ELSE IF( WNTUO .AND. WNTVAS ) THEN
356*
357*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358*                 'A')
359*
360                  WRKBL = N + LWORK_DGEQRF
361                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
362                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
363                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
364                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
365                  WRKBL = MAX( WRKBL, BDSPAC )
366                  MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
367                  MINWRK = MAX( 3*N + M, BDSPAC )
368               ELSE IF( WNTUS .AND. WNTVN ) THEN
369*
370*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372                  WRKBL = N + LWORK_DGEQRF
373                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
374                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
375                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
376                  WRKBL = MAX( WRKBL, BDSPAC )
377                  MAXWRK = N*N + WRKBL
378                  MINWRK = MAX( 3*N + M, BDSPAC )
379               ELSE IF( WNTUS .AND. WNTVO ) THEN
380*
381*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383                  WRKBL = N + LWORK_DGEQRF
384                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
385                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
386                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
387                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
388                  WRKBL = MAX( WRKBL, BDSPAC )
389                  MAXWRK = 2*N*N + WRKBL
390                  MINWRK = MAX( 3*N + M, BDSPAC )
391               ELSE IF( WNTUS .AND. WNTVAS ) THEN
392*
393*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394*                 'A')
395*
396                  WRKBL = N + LWORK_DGEQRF
397                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
398                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
399                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
400                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
401                  WRKBL = MAX( WRKBL, BDSPAC )
402                  MAXWRK = N*N + WRKBL
403                  MINWRK = MAX( 3*N + M, BDSPAC )
404               ELSE IF( WNTUA .AND. WNTVN ) THEN
405*
406*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408                  WRKBL = N + LWORK_DGEQRF
409                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
410                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
411                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
412                  WRKBL = MAX( WRKBL, BDSPAC )
413                  MAXWRK = N*N + WRKBL
414                  MINWRK = MAX( 3*N + M, BDSPAC )
415               ELSE IF( WNTUA .AND. WNTVO ) THEN
416*
417*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419                  WRKBL = N + LWORK_DGEQRF
420                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
421                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
422                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
423                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
424                  WRKBL = MAX( WRKBL, BDSPAC )
425                  MAXWRK = 2*N*N + WRKBL
426                  MINWRK = MAX( 3*N + M, BDSPAC )
427               ELSE IF( WNTUA .AND. WNTVAS ) THEN
428*
429*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430*                 'A')
431*
432                  WRKBL = N + LWORK_DGEQRF
433                  WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
434                  WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
435                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
436                  WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
437                  WRKBL = MAX( WRKBL, BDSPAC )
438                  MAXWRK = N*N + WRKBL
439                  MINWRK = MAX( 3*N + M, BDSPAC )
440               END IF
441            ELSE
442*
443*              Path 10 (M at least N, but not much larger)
444*
445               CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
446     $                   DUM(1), DUM(1), -1, IERR )
447               LWORK_DGEBRD = INT( DUM(1) )
448               MAXWRK = 3*N + LWORK_DGEBRD
449               IF( WNTUS .OR. WNTUO ) THEN
450                  CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1),
451     $                   DUM(1), -1, IERR )
452                  LWORK_DORGBR_Q = INT( DUM(1) )
453                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
454               END IF
455               IF( WNTUA ) THEN
456                  CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1),
457     $                   DUM(1), -1, IERR )
458                  LWORK_DORGBR_Q = INT( DUM(1) )
459                  MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
460               END IF
461               IF( .NOT.WNTVN ) THEN
462                 MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
463               END IF
464               MAXWRK = MAX( MAXWRK, BDSPAC )
465               MINWRK = MAX( 3*N + M, BDSPAC )
466            END IF
467         ELSE IF( MINMN.GT.0 ) THEN
468*
469*           Compute space needed for DBDSQR
470*
471            MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
472            BDSPAC = 5*M
473*           Compute space needed for DGELQF
474            CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
475            LWORK_DGELQF = INT( DUM(1) )
476*           Compute space needed for DORGLQ
477            CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
478            LWORK_DORGLQ_N = INT( DUM(1) )
479            CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
480            LWORK_DORGLQ_M = INT( DUM(1) )
481*           Compute space needed for DGEBRD
482            CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
483     $                   DUM(1), DUM(1), -1, IERR )
484            LWORK_DGEBRD = INT( DUM(1) )
485*            Compute space needed for DORGBR P
486            CALL DORGBR( 'P', M, M, M, A, N, DUM(1),
487     $                   DUM(1), -1, IERR )
488            LWORK_DORGBR_P = INT( DUM(1) )
489*           Compute space needed for DORGBR Q
490            CALL DORGBR( 'Q', M, M, M, A, N, DUM(1),
491     $                   DUM(1), -1, IERR )
492            LWORK_DORGBR_Q = INT( DUM(1) )
493            IF( N.GE.MNTHR ) THEN
494               IF( WNTVN ) THEN
495*
496*                 Path 1t(N much larger than M, JOBVT='N')
497*
498                  MAXWRK = M + LWORK_DGELQF
499                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DGEBRD )
500                  IF( WNTUO .OR. WNTUAS )
501     $               MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
502                  MAXWRK = MAX( MAXWRK, BDSPAC )
503                  MINWRK = MAX( 4*M, BDSPAC )
504               ELSE IF( WNTVO .AND. WNTUN ) THEN
505*
506*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508                  WRKBL = M + LWORK_DGELQF
509                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
510                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
511                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
512                  WRKBL = MAX( WRKBL, BDSPAC )
513                  MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
514                  MINWRK = MAX( 3*M + N, BDSPAC )
515               ELSE IF( WNTVO .AND. WNTUAS ) THEN
516*
517*                 Path 3t(N much larger than M, JOBU='S' or 'A',
518*                 JOBVT='O')
519*
520                  WRKBL = M + LWORK_DGELQF
521                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
522                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
523                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
524                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
525                  WRKBL = MAX( WRKBL, BDSPAC )
526                  MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
527                  MINWRK = MAX( 3*M + N, BDSPAC )
528               ELSE IF( WNTVS .AND. WNTUN ) THEN
529*
530*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532                  WRKBL = M + LWORK_DGELQF
533                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
534                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
535                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
536                  WRKBL = MAX( WRKBL, BDSPAC )
537                  MAXWRK = M*M + WRKBL
538                  MINWRK = MAX( 3*M + N, BDSPAC )
539               ELSE IF( WNTVS .AND. WNTUO ) THEN
540*
541*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543                  WRKBL = M + LWORK_DGELQF
544                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
545                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
546                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
547                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
548                  WRKBL = MAX( WRKBL, BDSPAC )
549                  MAXWRK = 2*M*M + WRKBL
550                  MINWRK = MAX( 3*M + N, BDSPAC )
551               ELSE IF( WNTVS .AND. WNTUAS ) THEN
552*
553*                 Path 6t(N much larger than M, JOBU='S' or 'A',
554*                 JOBVT='S')
555*
556                  WRKBL = M + LWORK_DGELQF
557                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
558                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
559                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
560                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
561                  WRKBL = MAX( WRKBL, BDSPAC )
562                  MAXWRK = M*M + WRKBL
563                  MINWRK = MAX( 3*M + N, BDSPAC )
564               ELSE IF( WNTVA .AND. WNTUN ) THEN
565*
566*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
567*
568                  WRKBL = M + LWORK_DGELQF
569                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
570                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
571                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
572                  WRKBL = MAX( WRKBL, BDSPAC )
573                  MAXWRK = M*M + WRKBL
574                  MINWRK = MAX( 3*M + N, BDSPAC )
575               ELSE IF( WNTVA .AND. WNTUO ) THEN
576*
577*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
578*
579                  WRKBL = M + LWORK_DGELQF
580                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
581                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
582                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
583                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
584                  WRKBL = MAX( WRKBL, BDSPAC )
585                  MAXWRK = 2*M*M + WRKBL
586                  MINWRK = MAX( 3*M + N, BDSPAC )
587               ELSE IF( WNTVA .AND. WNTUAS ) THEN
588*
589*                 Path 9t(N much larger than M, JOBU='S' or 'A',
590*                 JOBVT='A')
591*
592                  WRKBL = M + LWORK_DGELQF
593                  WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
594                  WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
595                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
596                  WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
597                  WRKBL = MAX( WRKBL, BDSPAC )
598                  MAXWRK = M*M + WRKBL
599                  MINWRK = MAX( 3*M + N, BDSPAC )
600               END IF
601            ELSE
602*
603*              Path 10t(N greater than M, but not much larger)
604*
605               CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
606     $                   DUM(1), DUM(1), -1, IERR )
607               LWORK_DGEBRD = INT( DUM(1) )
608               MAXWRK = 3*M + LWORK_DGEBRD
609               IF( WNTVS .OR. WNTVO ) THEN
610*                Compute space needed for DORGBR P
611                 CALL DORGBR( 'P', M, N, M, A, N, DUM(1),
612     $                   DUM(1), -1, IERR )
613                 LWORK_DORGBR_P = INT( DUM(1) )
614                 MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
615               END IF
616               IF( WNTVA ) THEN
617                 CALL DORGBR( 'P', N, N, M, A, N, DUM(1),
618     $                   DUM(1), -1, IERR )
619                 LWORK_DORGBR_P = INT( DUM(1) )
620                 MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
621               END IF
622               IF( .NOT.WNTUN ) THEN
623                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
624               END IF
625               MAXWRK = MAX( MAXWRK, BDSPAC )
626               MINWRK = MAX( 3*M + N, BDSPAC )
627            END IF
628         END IF
629         MAXWRK = MAX( MAXWRK, MINWRK )
630         WORK( 1 ) = MAXWRK
631*
632         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
633            INFO = -13
634         END IF
635      END IF
636*
637      IF( INFO.NE.0 ) THEN
638         CALL XERBLA( 'DGESVD', -INFO )
639         RETURN
640      ELSE IF( LQUERY ) THEN
641         RETURN
642      END IF
643*
644*     Quick return if possible
645*
646      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
647         RETURN
648      END IF
649*
650*     Get machine constants
651*
652      EPS = DLAMCH( 'P' )
653      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
654      BIGNUM = ONE / SMLNUM
655*
656*     Scale A if max element outside range [SMLNUM,BIGNUM]
657*
658      ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
659      ISCL = 0
660      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
661         ISCL = 1
662         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
663      ELSE IF( ANRM.GT.BIGNUM ) THEN
664         ISCL = 1
665         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
666      END IF
667*
668      IF( M.GE.N ) THEN
669*
670*        A has at least as many rows as columns. If A has sufficiently
671*        more rows than columns, first reduce using the QR
672*        decomposition (if sufficient workspace available)
673*
674         IF( M.GE.MNTHR ) THEN
675*
676            IF( WNTUN ) THEN
677*
678*              Path 1 (M much larger than N, JOBU='N')
679*              No left singular vectors to be computed
680*
681               ITAU = 1
682               IWORK = ITAU + N
683*
684*              Compute A=Q*R
685*              (Workspace: need 2*N, prefer N + N*NB)
686*
687               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
688     $                      LWORK-IWORK+1, IERR )
689*
690*              Zero out below R
691*
692               IF( N .GT. 1 ) THEN
693                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
694     $                         LDA )
695               END IF
696               IE = 1
697               ITAUQ = IE + N
698               ITAUP = ITAUQ + N
699               IWORK = ITAUP + N
700*
701*              Bidiagonalize R in A
702*              (Workspace: need 4*N, prefer 3*N + 2*N*NB)
703*
704               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
705     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
706     $                      IERR )
707               NCVT = 0
708               IF( WNTVO .OR. WNTVAS ) THEN
709*
710*                 If right singular vectors desired, generate P'.
711*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
712*
713                  CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
714     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
715                  NCVT = N
716               END IF
717               IWORK = IE + N
718*
719*              Perform bidiagonal QR iteration, computing right
720*              singular vectors of A in A if desired
721*              (Workspace: need BDSPAC)
722*
723               CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
724     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
725*
726*              If right singular vectors desired in VT, copy them there
727*
728               IF( WNTVAS )
729     $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
730*
731            ELSE IF( WNTUO .AND. WNTVN ) THEN
732*
733*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734*              N left singular vectors to be overwritten on A and
735*              no right singular vectors to be computed
736*
737               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
738*
739*                 Sufficient workspace for a fast algorithm
740*
741                  IR = 1
742                  IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
743*
744*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
745*
746                     LDWRKU = LDA
747                     LDWRKR = LDA
748                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
749*
750*                    WORK(IU) is LDA by N, WORK(IR) is N by N
751*
752                     LDWRKU = LDA
753                     LDWRKR = N
754                  ELSE
755*
756*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757*
758                     LDWRKU = ( LWORK-N*N-N ) / N
759                     LDWRKR = N
760                  END IF
761                  ITAU = IR + LDWRKR*N
762                  IWORK = ITAU + N
763*
764*                 Compute A=Q*R
765*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
766*
767                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
768     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
769*
770*                 Copy R to WORK(IR) and zero out below it
771*
772                  CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
773                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
774     $                         LDWRKR )
775*
776*                 Generate Q in A
777*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
778*
779                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
780     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
781                  IE = ITAU
782                  ITAUQ = IE + N
783                  ITAUP = ITAUQ + N
784                  IWORK = ITAUP + N
785*
786*                 Bidiagonalize R in WORK(IR)
787*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
788*
789                  CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
790     $                         WORK( ITAUQ ), WORK( ITAUP ),
791     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
792*
793*                 Generate left vectors bidiagonalizing R
794*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
795*
796                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
797     $                         WORK( ITAUQ ), WORK( IWORK ),
798     $                         LWORK-IWORK+1, IERR )
799                  IWORK = IE + N
800*
801*                 Perform bidiagonal QR iteration, computing left
802*                 singular vectors of R in WORK(IR)
803*                 (Workspace: need N*N + BDSPAC)
804*
805                  CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
806     $                         WORK( IR ), LDWRKR, DUM, 1,
807     $                         WORK( IWORK ), INFO )
808                  IU = IE + N
809*
810*                 Multiply Q in A by left singular vectors of R in
811*                 WORK(IR), storing result in WORK(IU) and copying to A
812*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
813*
814                  DO 10 I = 1, M, LDWRKU
815                     CHUNK = MIN( M-I+1, LDWRKU )
816                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
817     $                           LDA, WORK( IR ), LDWRKR, ZERO,
818     $                           WORK( IU ), LDWRKU )
819                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
820     $                            A( I, 1 ), LDA )
821   10             CONTINUE
822*
823               ELSE
824*
825*                 Insufficient workspace for a fast algorithm
826*
827                  IE = 1
828                  ITAUQ = IE + N
829                  ITAUP = ITAUQ + N
830                  IWORK = ITAUP + N
831*
832*                 Bidiagonalize A
833*                 (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
834*
835                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
836     $                         WORK( ITAUQ ), WORK( ITAUP ),
837     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
838*
839*                 Generate left vectors bidiagonalizing A
840*                 (Workspace: need 4*N, prefer 3*N + N*NB)
841*
842                  CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
843     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
844                  IWORK = IE + N
845*
846*                 Perform bidiagonal QR iteration, computing left
847*                 singular vectors of A in A
848*                 (Workspace: need BDSPAC)
849*
850                  CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
851     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
852*
853               END IF
854*
855            ELSE IF( WNTUO .AND. WNTVAS ) THEN
856*
857*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858*              N left singular vectors to be overwritten on A and
859*              N right singular vectors to be computed in VT
860*
861               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
862*
863*                 Sufficient workspace for a fast algorithm
864*
865                  IR = 1
866                  IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
867*
868*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
869*
870                     LDWRKU = LDA
871                     LDWRKR = LDA
872                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
873*
874*                    WORK(IU) is LDA by N and WORK(IR) is N by N
875*
876                     LDWRKU = LDA
877                     LDWRKR = N
878                  ELSE
879*
880*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881*
882                     LDWRKU = ( LWORK-N*N-N ) / N
883                     LDWRKR = N
884                  END IF
885                  ITAU = IR + LDWRKR*N
886                  IWORK = ITAU + N
887*
888*                 Compute A=Q*R
889*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
890*
891                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
892     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
893*
894*                 Copy R to VT, zeroing out below it
895*
896                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
897                  IF( N.GT.1 )
898     $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
899     $                            VT( 2, 1 ), LDVT )
900*
901*                 Generate Q in A
902*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
903*
904                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
905     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
906                  IE = ITAU
907                  ITAUQ = IE + N
908                  ITAUP = ITAUQ + N
909                  IWORK = ITAUP + N
910*
911*                 Bidiagonalize R in VT, copying result to WORK(IR)
912*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
913*
914                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
915     $                         WORK( ITAUQ ), WORK( ITAUP ),
916     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
917                  CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
918*
919*                 Generate left vectors bidiagonalizing R in WORK(IR)
920*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
921*
922                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
923     $                         WORK( ITAUQ ), WORK( IWORK ),
924     $                         LWORK-IWORK+1, IERR )
925*
926*                 Generate right vectors bidiagonalizing R in VT
927*                 (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
928*
929                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
930     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
931                  IWORK = IE + N
932*
933*                 Perform bidiagonal QR iteration, computing left
934*                 singular vectors of R in WORK(IR) and computing right
935*                 singular vectors of R in VT
936*                 (Workspace: need N*N + BDSPAC)
937*
938                  CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
939     $                         WORK( IR ), LDWRKR, DUM, 1,
940     $                         WORK( IWORK ), INFO )
941                  IU = IE + N
942*
943*                 Multiply Q in A by left singular vectors of R in
944*                 WORK(IR), storing result in WORK(IU) and copying to A
945*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
946*
947                  DO 20 I = 1, M, LDWRKU
948                     CHUNK = MIN( M-I+1, LDWRKU )
949                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
950     $                           LDA, WORK( IR ), LDWRKR, ZERO,
951     $                           WORK( IU ), LDWRKU )
952                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
953     $                            A( I, 1 ), LDA )
954   20             CONTINUE
955*
956               ELSE
957*
958*                 Insufficient workspace for a fast algorithm
959*
960                  ITAU = 1
961                  IWORK = ITAU + N
962*
963*                 Compute A=Q*R
964*                 (Workspace: need 2*N, prefer N + N*NB)
965*
966                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
967     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
968*
969*                 Copy R to VT, zeroing out below it
970*
971                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
972                  IF( N.GT.1 )
973     $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
974     $                            VT( 2, 1 ), LDVT )
975*
976*                 Generate Q in A
977*                 (Workspace: need 2*N, prefer N + N*NB)
978*
979                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
980     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
981                  IE = ITAU
982                  ITAUQ = IE + N
983                  ITAUP = ITAUQ + N
984                  IWORK = ITAUP + N
985*
986*                 Bidiagonalize R in VT
987*                 (Workspace: need 4*N, prefer 3*N + 2*N*NB)
988*
989                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
990     $                         WORK( ITAUQ ), WORK( ITAUP ),
991     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
992*
993*                 Multiply Q in A by left vectors bidiagonalizing R
994*                 (Workspace: need 3*N + M, prefer 3*N + M*NB)
995*
996                  CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
997     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
998     $                         LWORK-IWORK+1, IERR )
999*
1000*                 Generate right vectors bidiagonalizing R in VT
1001*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1002*
1003                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1004     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
1005                  IWORK = IE + N
1006*
1007*                 Perform bidiagonal QR iteration, computing left
1008*                 singular vectors of A in A and computing right
1009*                 singular vectors of A in VT
1010*                 (Workspace: need BDSPAC)
1011*
1012                  CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
1013     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
1014*
1015               END IF
1016*
1017            ELSE IF( WNTUS ) THEN
1018*
1019               IF( WNTVN ) THEN
1020*
1021*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022*                 N left singular vectors to be computed in U and
1023*                 no right singular vectors to be computed
1024*
1025                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1026*
1027*                    Sufficient workspace for a fast algorithm
1028*
1029                     IR = 1
1030                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1031*
1032*                       WORK(IR) is LDA by N
1033*
1034                        LDWRKR = LDA
1035                     ELSE
1036*
1037*                       WORK(IR) is N by N
1038*
1039                        LDWRKR = N
1040                     END IF
1041                     ITAU = IR + LDWRKR*N
1042                     IWORK = ITAU + N
1043*
1044*                    Compute A=Q*R
1045*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1046*
1047                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1048     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1049*
1050*                    Copy R to WORK(IR), zeroing out below it
1051*
1052                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1053     $                            LDWRKR )
1054                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1055     $                            WORK( IR+1 ), LDWRKR )
1056*
1057*                    Generate Q in A
1058*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1059*
1060                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1061     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1062                     IE = ITAU
1063                     ITAUQ = IE + N
1064                     ITAUP = ITAUQ + N
1065                     IWORK = ITAUP + N
1066*
1067*                    Bidiagonalize R in WORK(IR)
1068*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1069*
1070                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1071     $                            WORK( IE ), WORK( ITAUQ ),
1072     $                            WORK( ITAUP ), WORK( IWORK ),
1073     $                            LWORK-IWORK+1, IERR )
1074*
1075*                    Generate left vectors bidiagonalizing R in WORK(IR)
1076*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1077*
1078                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1079     $                            WORK( ITAUQ ), WORK( IWORK ),
1080     $                            LWORK-IWORK+1, IERR )
1081                     IWORK = IE + N
1082*
1083*                    Perform bidiagonal QR iteration, computing left
1084*                    singular vectors of R in WORK(IR)
1085*                    (Workspace: need N*N + BDSPAC)
1086*
1087                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1088     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1089     $                            WORK( IWORK ), INFO )
1090*
1091*                    Multiply Q in A by left singular vectors of R in
1092*                    WORK(IR), storing result in U
1093*                    (Workspace: need N*N)
1094*
1095                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1096     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1097*
1098                  ELSE
1099*
1100*                    Insufficient workspace for a fast algorithm
1101*
1102                     ITAU = 1
1103                     IWORK = ITAU + N
1104*
1105*                    Compute A=Q*R, copying result to U
1106*                    (Workspace: need 2*N, prefer N + N*NB)
1107*
1108                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1109     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1110                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1111*
1112*                    Generate Q in U
1113*                    (Workspace: need 2*N, prefer N + N*NB)
1114*
1115                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1116     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1117                     IE = ITAU
1118                     ITAUQ = IE + N
1119                     ITAUP = ITAUQ + N
1120                     IWORK = ITAUP + N
1121*
1122*                    Zero out below R in A
1123*
1124                     IF( N .GT. 1 ) THEN
1125                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1126     $                               A( 2, 1 ), LDA )
1127                     END IF
1128*
1129*                    Bidiagonalize R in A
1130*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1131*
1132                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1133     $                            WORK( ITAUQ ), WORK( ITAUP ),
1134     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1135*
1136*                    Multiply Q in U by left vectors bidiagonalizing R
1137*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1138*
1139                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1140     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1141     $                            LWORK-IWORK+1, IERR )
1142                     IWORK = IE + N
1143*
1144*                    Perform bidiagonal QR iteration, computing left
1145*                    singular vectors of A in U
1146*                    (Workspace: need BDSPAC)
1147*
1148                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1149     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1150     $                            INFO )
1151*
1152                  END IF
1153*
1154               ELSE IF( WNTVO ) THEN
1155*
1156*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1157*                 N left singular vectors to be computed in U and
1158*                 N right singular vectors to be overwritten on A
1159*
1160                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1161*
1162*                    Sufficient workspace for a fast algorithm
1163*
1164                     IU = 1
1165                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1166*
1167*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1168*
1169                        LDWRKU = LDA
1170                        IR = IU + LDWRKU*N
1171                        LDWRKR = LDA
1172                     ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
1173*
1174*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1175*
1176                        LDWRKU = LDA
1177                        IR = IU + LDWRKU*N
1178                        LDWRKR = N
1179                     ELSE
1180*
1181*                       WORK(IU) is N by N and WORK(IR) is N by N
1182*
1183                        LDWRKU = N
1184                        IR = IU + LDWRKU*N
1185                        LDWRKR = N
1186                     END IF
1187                     ITAU = IR + LDWRKR*N
1188                     IWORK = ITAU + N
1189*
1190*                    Compute A=Q*R
1191*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1192*
1193                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1194     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1195*
1196*                    Copy R to WORK(IU), zeroing out below it
1197*
1198                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1199     $                            LDWRKU )
1200                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1201     $                            WORK( IU+1 ), LDWRKU )
1202*
1203*                    Generate Q in A
1204*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1205*
1206                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1207     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1208                     IE = ITAU
1209                     ITAUQ = IE + N
1210                     ITAUP = ITAUQ + N
1211                     IWORK = ITAUP + N
1212*
1213*                    Bidiagonalize R in WORK(IU), copying result to
1214*                    WORK(IR)
1215*                    (Workspace: need 2*N*N + 4*N,
1216*                                prefer 2*N*N+3*N+2*N*NB)
1217*
1218                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1219     $                            WORK( IE ), WORK( ITAUQ ),
1220     $                            WORK( ITAUP ), WORK( IWORK ),
1221     $                            LWORK-IWORK+1, IERR )
1222                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1223     $                            WORK( IR ), LDWRKR )
1224*
1225*                    Generate left bidiagonalizing vectors in WORK(IU)
1226*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1227*
1228                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1229     $                            WORK( ITAUQ ), WORK( IWORK ),
1230     $                            LWORK-IWORK+1, IERR )
1231*
1232*                    Generate right bidiagonalizing vectors in WORK(IR)
1233*                    (Workspace: need 2*N*N + 4*N-1,
1234*                                prefer 2*N*N+3*N+(N-1)*NB)
1235*
1236                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1237     $                            WORK( ITAUP ), WORK( IWORK ),
1238     $                            LWORK-IWORK+1, IERR )
1239                     IWORK = IE + N
1240*
1241*                    Perform bidiagonal QR iteration, computing left
1242*                    singular vectors of R in WORK(IU) and computing
1243*                    right singular vectors of R in WORK(IR)
1244*                    (Workspace: need 2*N*N + BDSPAC)
1245*
1246                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1247     $                            WORK( IR ), LDWRKR, WORK( IU ),
1248     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1249*
1250*                    Multiply Q in A by left singular vectors of R in
1251*                    WORK(IU), storing result in U
1252*                    (Workspace: need N*N)
1253*
1254                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1255     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1256*
1257*                    Copy right singular vectors of R to A
1258*                    (Workspace: need N*N)
1259*
1260                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1261     $                            LDA )
1262*
1263                  ELSE
1264*
1265*                    Insufficient workspace for a fast algorithm
1266*
1267                     ITAU = 1
1268                     IWORK = ITAU + N
1269*
1270*                    Compute A=Q*R, copying result to U
1271*                    (Workspace: need 2*N, prefer N + N*NB)
1272*
1273                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1274     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1275                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1276*
1277*                    Generate Q in U
1278*                    (Workspace: need 2*N, prefer N + N*NB)
1279*
1280                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1281     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1282                     IE = ITAU
1283                     ITAUQ = IE + N
1284                     ITAUP = ITAUQ + N
1285                     IWORK = ITAUP + N
1286*
1287*                    Zero out below R in A
1288*
1289                     IF( N .GT. 1 ) THEN
1290                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1291     $                               A( 2, 1 ), LDA )
1292                     END IF
1293*
1294*                    Bidiagonalize R in A
1295*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1296*
1297                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1298     $                            WORK( ITAUQ ), WORK( ITAUP ),
1299     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1300*
1301*                    Multiply Q in U by left vectors bidiagonalizing R
1302*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1303*
1304                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1305     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1306     $                            LWORK-IWORK+1, IERR )
1307*
1308*                    Generate right vectors bidiagonalizing R in A
1309*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1310*
1311                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1312     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1313                     IWORK = IE + N
1314*
1315*                    Perform bidiagonal QR iteration, computing left
1316*                    singular vectors of A in U and computing right
1317*                    singular vectors of A in A
1318*                    (Workspace: need BDSPAC)
1319*
1320                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1321     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1322     $                            INFO )
1323*
1324                  END IF
1325*
1326               ELSE IF( WNTVAS ) THEN
1327*
1328*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1329*                         or 'A')
1330*                 N left singular vectors to be computed in U and
1331*                 N right singular vectors to be computed in VT
1332*
1333                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1334*
1335*                    Sufficient workspace for a fast algorithm
1336*
1337                     IU = 1
1338                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1339*
1340*                       WORK(IU) is LDA by N
1341*
1342                        LDWRKU = LDA
1343                     ELSE
1344*
1345*                       WORK(IU) is N by N
1346*
1347                        LDWRKU = N
1348                     END IF
1349                     ITAU = IU + LDWRKU*N
1350                     IWORK = ITAU + N
1351*
1352*                    Compute A=Q*R
1353*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1354*
1355                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1356     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1357*
1358*                    Copy R to WORK(IU), zeroing out below it
1359*
1360                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1361     $                            LDWRKU )
1362                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1363     $                            WORK( IU+1 ), LDWRKU )
1364*
1365*                    Generate Q in A
1366*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1367*
1368                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1369     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1370                     IE = ITAU
1371                     ITAUQ = IE + N
1372                     ITAUP = ITAUQ + N
1373                     IWORK = ITAUP + N
1374*
1375*                    Bidiagonalize R in WORK(IU), copying result to VT
1376*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1377*
1378                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1379     $                            WORK( IE ), WORK( ITAUQ ),
1380     $                            WORK( ITAUP ), WORK( IWORK ),
1381     $                            LWORK-IWORK+1, IERR )
1382                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1383     $                            LDVT )
1384*
1385*                    Generate left bidiagonalizing vectors in WORK(IU)
1386*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1387*
1388                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1389     $                            WORK( ITAUQ ), WORK( IWORK ),
1390     $                            LWORK-IWORK+1, IERR )
1391*
1392*                    Generate right bidiagonalizing vectors in VT
1393*                    (Workspace: need N*N + 4*N-1,
1394*                                prefer N*N+3*N+(N-1)*NB)
1395*
1396                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1397     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1398                     IWORK = IE + N
1399*
1400*                    Perform bidiagonal QR iteration, computing left
1401*                    singular vectors of R in WORK(IU) and computing
1402*                    right singular vectors of R in VT
1403*                    (Workspace: need N*N + BDSPAC)
1404*
1405                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1406     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1407     $                            WORK( IWORK ), INFO )
1408*
1409*                    Multiply Q in A by left singular vectors of R in
1410*                    WORK(IU), storing result in U
1411*                    (Workspace: need N*N)
1412*
1413                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1414     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1415*
1416                  ELSE
1417*
1418*                    Insufficient workspace for a fast algorithm
1419*
1420                     ITAU = 1
1421                     IWORK = ITAU + N
1422*
1423*                    Compute A=Q*R, copying result to U
1424*                    (Workspace: need 2*N, prefer N + N*NB)
1425*
1426                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1427     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1428                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1429*
1430*                    Generate Q in U
1431*                    (Workspace: need 2*N, prefer N + N*NB)
1432*
1433                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1434     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1435*
1436*                    Copy R to VT, zeroing out below it
1437*
1438                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1439                     IF( N.GT.1 )
1440     $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1441     $                               VT( 2, 1 ), LDVT )
1442                     IE = ITAU
1443                     ITAUQ = IE + N
1444                     ITAUP = ITAUQ + N
1445                     IWORK = ITAUP + N
1446*
1447*                    Bidiagonalize R in VT
1448*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1449*
1450                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1451     $                            WORK( ITAUQ ), WORK( ITAUP ),
1452     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1453*
1454*                    Multiply Q in U by left bidiagonalizing vectors
1455*                    in VT
1456*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1457*
1458                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1459     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1460     $                            LWORK-IWORK+1, IERR )
1461*
1462*                    Generate right bidiagonalizing vectors in VT
1463*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1464*
1465                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1466     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1467                     IWORK = IE + N
1468*
1469*                    Perform bidiagonal QR iteration, computing left
1470*                    singular vectors of A in U and computing right
1471*                    singular vectors of A in VT
1472*                    (Workspace: need BDSPAC)
1473*
1474                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1475     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1476     $                            INFO )
1477*
1478                  END IF
1479*
1480               END IF
1481*
1482            ELSE IF( WNTUA ) THEN
1483*
1484               IF( WNTVN ) THEN
1485*
1486*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1487*                 M left singular vectors to be computed in U and
1488*                 no right singular vectors to be computed
1489*
1490                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1491*
1492*                    Sufficient workspace for a fast algorithm
1493*
1494                     IR = 1
1495                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1496*
1497*                       WORK(IR) is LDA by N
1498*
1499                        LDWRKR = LDA
1500                     ELSE
1501*
1502*                       WORK(IR) is N by N
1503*
1504                        LDWRKR = N
1505                     END IF
1506                     ITAU = IR + LDWRKR*N
1507                     IWORK = ITAU + N
1508*
1509*                    Compute A=Q*R, copying result to U
1510*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1511*
1512                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1513     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1514                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1515*
1516*                    Copy R to WORK(IR), zeroing out below it
1517*
1518                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1519     $                            LDWRKR )
1520                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1521     $                            WORK( IR+1 ), LDWRKR )
1522*
1523*                    Generate Q in U
1524*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1525*
1526                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1527     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1528                     IE = ITAU
1529                     ITAUQ = IE + N
1530                     ITAUP = ITAUQ + N
1531                     IWORK = ITAUP + N
1532*
1533*                    Bidiagonalize R in WORK(IR)
1534*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1535*
1536                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1537     $                            WORK( IE ), WORK( ITAUQ ),
1538     $                            WORK( ITAUP ), WORK( IWORK ),
1539     $                            LWORK-IWORK+1, IERR )
1540*
1541*                    Generate left bidiagonalizing vectors in WORK(IR)
1542*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1543*
1544                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1545     $                            WORK( ITAUQ ), WORK( IWORK ),
1546     $                            LWORK-IWORK+1, IERR )
1547                     IWORK = IE + N
1548*
1549*                    Perform bidiagonal QR iteration, computing left
1550*                    singular vectors of R in WORK(IR)
1551*                    (Workspace: need N*N + BDSPAC)
1552*
1553                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1554     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1555     $                            WORK( IWORK ), INFO )
1556*
1557*                    Multiply Q in U by left singular vectors of R in
1558*                    WORK(IR), storing result in A
1559*                    (Workspace: need N*N)
1560*
1561                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1562     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1563*
1564*                    Copy left singular vectors of A from A to U
1565*
1566                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1567*
1568                  ELSE
1569*
1570*                    Insufficient workspace for a fast algorithm
1571*
1572                     ITAU = 1
1573                     IWORK = ITAU + N
1574*
1575*                    Compute A=Q*R, copying result to U
1576*                    (Workspace: need 2*N, prefer N + N*NB)
1577*
1578                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1579     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1580                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1581*
1582*                    Generate Q in U
1583*                    (Workspace: need N + M, prefer N + M*NB)
1584*
1585                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1586     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1587                     IE = ITAU
1588                     ITAUQ = IE + N
1589                     ITAUP = ITAUQ + N
1590                     IWORK = ITAUP + N
1591*
1592*                    Zero out below R in A
1593*
1594                     IF( N .GT. 1 ) THEN
1595                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1596     $                                A( 2, 1 ), LDA )
1597                     END IF
1598*
1599*                    Bidiagonalize R in A
1600*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1601*
1602                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1603     $                            WORK( ITAUQ ), WORK( ITAUP ),
1604     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1605*
1606*                    Multiply Q in U by left bidiagonalizing vectors
1607*                    in A
1608*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1609*
1610                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1611     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1612     $                            LWORK-IWORK+1, IERR )
1613                     IWORK = IE + N
1614*
1615*                    Perform bidiagonal QR iteration, computing left
1616*                    singular vectors of A in U
1617*                    (Workspace: need BDSPAC)
1618*
1619                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1620     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1621     $                            INFO )
1622*
1623                  END IF
1624*
1625               ELSE IF( WNTVO ) THEN
1626*
1627*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1628*                 M left singular vectors to be computed in U and
1629*                 N right singular vectors to be overwritten on A
1630*
1631                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1632*
1633*                    Sufficient workspace for a fast algorithm
1634*
1635                     IU = 1
1636                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1637*
1638*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1639*
1640                        LDWRKU = LDA
1641                        IR = IU + LDWRKU*N
1642                        LDWRKR = LDA
1643                     ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
1644*
1645*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1646*
1647                        LDWRKU = LDA
1648                        IR = IU + LDWRKU*N
1649                        LDWRKR = N
1650                     ELSE
1651*
1652*                       WORK(IU) is N by N and WORK(IR) is N by N
1653*
1654                        LDWRKU = N
1655                        IR = IU + LDWRKU*N
1656                        LDWRKR = N
1657                     END IF
1658                     ITAU = IR + LDWRKR*N
1659                     IWORK = ITAU + N
1660*
1661*                    Compute A=Q*R, copying result to U
1662*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1663*
1664                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1665     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1666                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1667*
1668*                    Generate Q in U
1669*                    (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1670*
1671                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1672     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1673*
1674*                    Copy R to WORK(IU), zeroing out below it
1675*
1676                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1677     $                            LDWRKU )
1678                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1679     $                            WORK( IU+1 ), LDWRKU )
1680                     IE = ITAU
1681                     ITAUQ = IE + N
1682                     ITAUP = ITAUQ + N
1683                     IWORK = ITAUP + N
1684*
1685*                    Bidiagonalize R in WORK(IU), copying result to
1686*                    WORK(IR)
1687*                    (Workspace: need 2*N*N + 4*N,
1688*                                prefer 2*N*N+3*N+2*N*NB)
1689*
1690                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1691     $                            WORK( IE ), WORK( ITAUQ ),
1692     $                            WORK( ITAUP ), WORK( IWORK ),
1693     $                            LWORK-IWORK+1, IERR )
1694                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1695     $                            WORK( IR ), LDWRKR )
1696*
1697*                    Generate left bidiagonalizing vectors in WORK(IU)
1698*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1699*
1700                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1701     $                            WORK( ITAUQ ), WORK( IWORK ),
1702     $                            LWORK-IWORK+1, IERR )
1703*
1704*                    Generate right bidiagonalizing vectors in WORK(IR)
1705*                    (Workspace: need 2*N*N + 4*N-1,
1706*                                prefer 2*N*N+3*N+(N-1)*NB)
1707*
1708                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1709     $                            WORK( ITAUP ), WORK( IWORK ),
1710     $                            LWORK-IWORK+1, IERR )
1711                     IWORK = IE + N
1712*
1713*                    Perform bidiagonal QR iteration, computing left
1714*                    singular vectors of R in WORK(IU) and computing
1715*                    right singular vectors of R in WORK(IR)
1716*                    (Workspace: need 2*N*N + BDSPAC)
1717*
1718                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1719     $                            WORK( IR ), LDWRKR, WORK( IU ),
1720     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1721*
1722*                    Multiply Q in U by left singular vectors of R in
1723*                    WORK(IU), storing result in A
1724*                    (Workspace: need N*N)
1725*
1726                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1727     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1728*
1729*                    Copy left singular vectors of A from A to U
1730*
1731                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1732*
1733*                    Copy right singular vectors of R from WORK(IR) to A
1734*
1735                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1736     $                            LDA )
1737*
1738                  ELSE
1739*
1740*                    Insufficient workspace for a fast algorithm
1741*
1742                     ITAU = 1
1743                     IWORK = ITAU + N
1744*
1745*                    Compute A=Q*R, copying result to U
1746*                    (Workspace: need 2*N, prefer N + N*NB)
1747*
1748                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1749     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1750                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1751*
1752*                    Generate Q in U
1753*                    (Workspace: need N + M, prefer N + M*NB)
1754*
1755                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1756     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1757                     IE = ITAU
1758                     ITAUQ = IE + N
1759                     ITAUP = ITAUQ + N
1760                     IWORK = ITAUP + N
1761*
1762*                    Zero out below R in A
1763*
1764                     IF( N .GT. 1 ) THEN
1765                        CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1766     $                                A( 2, 1 ), LDA )
1767                     END IF
1768*
1769*                    Bidiagonalize R in A
1770*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1771*
1772                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1773     $                            WORK( ITAUQ ), WORK( ITAUP ),
1774     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1775*
1776*                    Multiply Q in U by left bidiagonalizing vectors
1777*                    in A
1778*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1779*
1780                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1781     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1782     $                            LWORK-IWORK+1, IERR )
1783*
1784*                    Generate right bidiagonalizing vectors in A
1785*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1786*
1787                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1788     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1789                     IWORK = IE + N
1790*
1791*                    Perform bidiagonal QR iteration, computing left
1792*                    singular vectors of A in U and computing right
1793*                    singular vectors of A in A
1794*                    (Workspace: need BDSPAC)
1795*
1796                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1797     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1798     $                            INFO )
1799*
1800                  END IF
1801*
1802               ELSE IF( WNTVAS ) THEN
1803*
1804*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1805*                         or 'A')
1806*                 M left singular vectors to be computed in U and
1807*                 N right singular vectors to be computed in VT
1808*
1809                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1810*
1811*                    Sufficient workspace for a fast algorithm
1812*
1813                     IU = 1
1814                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1815*
1816*                       WORK(IU) is LDA by N
1817*
1818                        LDWRKU = LDA
1819                     ELSE
1820*
1821*                       WORK(IU) is N by N
1822*
1823                        LDWRKU = N
1824                     END IF
1825                     ITAU = IU + LDWRKU*N
1826                     IWORK = ITAU + N
1827*
1828*                    Compute A=Q*R, copying result to U
1829*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1830*
1831                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1832     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1833                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1834*
1835*                    Generate Q in U
1836*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1837*
1838                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1839     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1840*
1841*                    Copy R to WORK(IU), zeroing out below it
1842*
1843                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1844     $                            LDWRKU )
1845                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1846     $                            WORK( IU+1 ), LDWRKU )
1847                     IE = ITAU
1848                     ITAUQ = IE + N
1849                     ITAUP = ITAUQ + N
1850                     IWORK = ITAUP + N
1851*
1852*                    Bidiagonalize R in WORK(IU), copying result to VT
1853*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1854*
1855                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1856     $                            WORK( IE ), WORK( ITAUQ ),
1857     $                            WORK( ITAUP ), WORK( IWORK ),
1858     $                            LWORK-IWORK+1, IERR )
1859                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1860     $                            LDVT )
1861*
1862*                    Generate left bidiagonalizing vectors in WORK(IU)
1863*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1864*
1865                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1866     $                            WORK( ITAUQ ), WORK( IWORK ),
1867     $                            LWORK-IWORK+1, IERR )
1868*
1869*                    Generate right bidiagonalizing vectors in VT
1870*                    (Workspace: need N*N + 4*N-1,
1871*                                prefer N*N+3*N+(N-1)*NB)
1872*
1873                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1874     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1875                     IWORK = IE + N
1876*
1877*                    Perform bidiagonal QR iteration, computing left
1878*                    singular vectors of R in WORK(IU) and computing
1879*                    right singular vectors of R in VT
1880*                    (Workspace: need N*N + BDSPAC)
1881*
1882                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1883     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1884     $                            WORK( IWORK ), INFO )
1885*
1886*                    Multiply Q in U by left singular vectors of R in
1887*                    WORK(IU), storing result in A
1888*                    (Workspace: need N*N)
1889*
1890                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1891     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1892*
1893*                    Copy left singular vectors of A from A to U
1894*
1895                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1896*
1897                  ELSE
1898*
1899*                    Insufficient workspace for a fast algorithm
1900*
1901                     ITAU = 1
1902                     IWORK = ITAU + N
1903*
1904*                    Compute A=Q*R, copying result to U
1905*                    (Workspace: need 2*N, prefer N + N*NB)
1906*
1907                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1908     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1909                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1910*
1911*                    Generate Q in U
1912*                    (Workspace: need N + M, prefer N + M*NB)
1913*
1914                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1915     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1916*
1917*                    Copy R from A to VT, zeroing out below it
1918*
1919                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1920                     IF( N.GT.1 )
1921     $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1922     $                               VT( 2, 1 ), LDVT )
1923                     IE = ITAU
1924                     ITAUQ = IE + N
1925                     ITAUP = ITAUQ + N
1926                     IWORK = ITAUP + N
1927*
1928*                    Bidiagonalize R in VT
1929*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1930*
1931                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1932     $                            WORK( ITAUQ ), WORK( ITAUP ),
1933     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1934*
1935*                    Multiply Q in U by left bidiagonalizing vectors
1936*                    in VT
1937*                    (Workspace: need 3*N + M, prefer 3*N + M*NB)
1938*
1939                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1940     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1941     $                            LWORK-IWORK+1, IERR )
1942*
1943*                    Generate right bidiagonalizing vectors in VT
1944*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1945*
1946                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1947     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1948                     IWORK = IE + N
1949*
1950*                    Perform bidiagonal QR iteration, computing left
1951*                    singular vectors of A in U and computing right
1952*                    singular vectors of A in VT
1953*                    (Workspace: need BDSPAC)
1954*
1955                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1956     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1957     $                            INFO )
1958*
1959                  END IF
1960*
1961               END IF
1962*
1963            END IF
1964*
1965         ELSE
1966*
1967*           M .LT. MNTHR
1968*
1969*           Path 10 (M at least N, but not much larger)
1970*           Reduce to bidiagonal form without QR decomposition
1971*
1972            IE = 1
1973            ITAUQ = IE + N
1974            ITAUP = ITAUQ + N
1975            IWORK = ITAUP + N
1976*
1977*           Bidiagonalize A
1978*           (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1979*
1980            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1981     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1982     $                   IERR )
1983            IF( WNTUAS ) THEN
1984*
1985*              If left singular vectors desired in U, copy result to U
1986*              and generate left bidiagonalizing vectors in U
1987*              (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1988*
1989               CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1990               IF( WNTUS )
1991     $            NCU = N
1992               IF( WNTUA )
1993     $            NCU = M
1994               CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1995     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1996            END IF
1997            IF( WNTVAS ) THEN
1998*
1999*              If right singular vectors desired in VT, copy result to
2000*              VT and generate right bidiagonalizing vectors in VT
2001*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2002*
2003               CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
2004               CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2005     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2006            END IF
2007            IF( WNTUO ) THEN
2008*
2009*              If left singular vectors desired in A, generate left
2010*              bidiagonalizing vectors in A
2011*              (Workspace: need 4*N, prefer 3*N + N*NB)
2012*
2013               CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2014     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2015            END IF
2016            IF( WNTVO ) THEN
2017*
2018*              If right singular vectors desired in A, generate right
2019*              bidiagonalizing vectors in A
2020*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2021*
2022               CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2023     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2024            END IF
2025            IWORK = IE + N
2026            IF( WNTUAS .OR. WNTUO )
2027     $         NRU = M
2028            IF( WNTUN )
2029     $         NRU = 0
2030            IF( WNTVAS .OR. WNTVO )
2031     $         NCVT = N
2032            IF( WNTVN )
2033     $         NCVT = 0
2034            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2035*
2036*              Perform bidiagonal QR iteration, if desired, computing
2037*              left singular vectors in U and computing right singular
2038*              vectors in VT
2039*              (Workspace: need BDSPAC)
2040*
2041               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2042     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
2043            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2044*
2045*              Perform bidiagonal QR iteration, if desired, computing
2046*              left singular vectors in U and computing right singular
2047*              vectors in A
2048*              (Workspace: need BDSPAC)
2049*
2050               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
2051     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
2052            ELSE
2053*
2054*              Perform bidiagonal QR iteration, if desired, computing
2055*              left singular vectors in A and computing right singular
2056*              vectors in VT
2057*              (Workspace: need BDSPAC)
2058*
2059               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2060     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
2061            END IF
2062*
2063         END IF
2064*
2065      ELSE
2066*
2067*        A has more columns than rows. If A has sufficiently more
2068*        columns than rows, first reduce using the LQ decomposition (if
2069*        sufficient workspace available)
2070*
2071         IF( N.GE.MNTHR ) THEN
2072*
2073            IF( WNTVN ) THEN
2074*
2075*              Path 1t(N much larger than M, JOBVT='N')
2076*              No right singular vectors to be computed
2077*
2078               ITAU = 1
2079               IWORK = ITAU + M
2080*
2081*              Compute A=L*Q
2082*              (Workspace: need 2*M, prefer M + M*NB)
2083*
2084               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2085     $                      LWORK-IWORK+1, IERR )
2086*
2087*              Zero out above L
2088*
2089               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
2090               IE = 1
2091               ITAUQ = IE + M
2092               ITAUP = ITAUQ + M
2093               IWORK = ITAUP + M
2094*
2095*              Bidiagonalize L in A
2096*              (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2097*
2098               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2099     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2100     $                      IERR )
2101               IF( WNTUO .OR. WNTUAS ) THEN
2102*
2103*                 If left singular vectors desired, generate Q
2104*                 (Workspace: need 4*M, prefer 3*M + M*NB)
2105*
2106                  CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2107     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2108               END IF
2109               IWORK = IE + M
2110               NRU = 0
2111               IF( WNTUO .OR. WNTUAS )
2112     $            NRU = M
2113*
2114*              Perform bidiagonal QR iteration, computing left singular
2115*              vectors of A in A if desired
2116*              (Workspace: need BDSPAC)
2117*
2118               CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2119     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2120*
2121*              If left singular vectors desired in U, copy them there
2122*
2123               IF( WNTUAS )
2124     $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2125*
2126            ELSE IF( WNTVO .AND. WNTUN ) THEN
2127*
2128*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2129*              M right singular vectors to be overwritten on A and
2130*              no left singular vectors to be computed
2131*
2132               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2133*
2134*                 Sufficient workspace for a fast algorithm
2135*
2136                  IR = 1
2137                  IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
2138*
2139*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2140*
2141                     LDWRKU = LDA
2142                     CHUNK = N
2143                     LDWRKR = LDA
2144                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
2145*
2146*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2147*
2148                     LDWRKU = LDA
2149                     CHUNK = N
2150                     LDWRKR = M
2151                  ELSE
2152*
2153*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2154*
2155                     LDWRKU = M
2156                     CHUNK = ( LWORK-M*M-M ) / M
2157                     LDWRKR = M
2158                  END IF
2159                  ITAU = IR + LDWRKR*M
2160                  IWORK = ITAU + M
2161*
2162*                 Compute A=L*Q
2163*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2164*
2165                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2166     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2167*
2168*                 Copy L to WORK(IR) and zero out above it
2169*
2170                  CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2171                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2172     $                         WORK( IR+LDWRKR ), LDWRKR )
2173*
2174*                 Generate Q in A
2175*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2176*
2177                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2178     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2179                  IE = ITAU
2180                  ITAUQ = IE + M
2181                  ITAUP = ITAUQ + M
2182                  IWORK = ITAUP + M
2183*
2184*                 Bidiagonalize L in WORK(IR)
2185*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2186*
2187                  CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2188     $                         WORK( ITAUQ ), WORK( ITAUP ),
2189     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2190*
2191*                 Generate right vectors bidiagonalizing L
2192*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2193*
2194                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2195     $                         WORK( ITAUP ), WORK( IWORK ),
2196     $                         LWORK-IWORK+1, IERR )
2197                  IWORK = IE + M
2198*
2199*                 Perform bidiagonal QR iteration, computing right
2200*                 singular vectors of L in WORK(IR)
2201*                 (Workspace: need M*M + BDSPAC)
2202*
2203                  CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2204     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2205     $                         WORK( IWORK ), INFO )
2206                  IU = IE + M
2207*
2208*                 Multiply right singular vectors of L in WORK(IR) by Q
2209*                 in A, storing result in WORK(IU) and copying to A
2210*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2211*
2212                  DO 30 I = 1, N, CHUNK
2213                     BLK = MIN( N-I+1, CHUNK )
2214                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2215     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2216     $                           WORK( IU ), LDWRKU )
2217                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2218     $                            A( 1, I ), LDA )
2219   30             CONTINUE
2220*
2221               ELSE
2222*
2223*                 Insufficient workspace for a fast algorithm
2224*
2225                  IE = 1
2226                  ITAUQ = IE + M
2227                  ITAUP = ITAUQ + M
2228                  IWORK = ITAUP + M
2229*
2230*                 Bidiagonalize A
2231*                 (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2232*
2233                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2234     $                         WORK( ITAUQ ), WORK( ITAUP ),
2235     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2236*
2237*                 Generate right vectors bidiagonalizing A
2238*                 (Workspace: need 4*M, prefer 3*M + M*NB)
2239*
2240                  CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2241     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2242                  IWORK = IE + M
2243*
2244*                 Perform bidiagonal QR iteration, computing right
2245*                 singular vectors of A in A
2246*                 (Workspace: need BDSPAC)
2247*
2248                  CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2249     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2250*
2251               END IF
2252*
2253            ELSE IF( WNTVO .AND. WNTUAS ) THEN
2254*
2255*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2256*              M right singular vectors to be overwritten on A and
2257*              M left singular vectors to be computed in U
2258*
2259               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2260*
2261*                 Sufficient workspace for a fast algorithm
2262*
2263                  IR = 1
2264                  IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
2265*
2266*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2267*
2268                     LDWRKU = LDA
2269                     CHUNK = N
2270                     LDWRKR = LDA
2271                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
2272*
2273*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2274*
2275                     LDWRKU = LDA
2276                     CHUNK = N
2277                     LDWRKR = M
2278                  ELSE
2279*
2280*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2281*
2282                     LDWRKU = M
2283                     CHUNK = ( LWORK-M*M-M ) / M
2284                     LDWRKR = M
2285                  END IF
2286                  ITAU = IR + LDWRKR*M
2287                  IWORK = ITAU + M
2288*
2289*                 Compute A=L*Q
2290*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2291*
2292                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2293     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2294*
2295*                 Copy L to U, zeroing about above it
2296*
2297                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2298                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2299     $                         LDU )
2300*
2301*                 Generate Q in A
2302*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2303*
2304                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2305     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2306                  IE = ITAU
2307                  ITAUQ = IE + M
2308                  ITAUP = ITAUQ + M
2309                  IWORK = ITAUP + M
2310*
2311*                 Bidiagonalize L in U, copying result to WORK(IR)
2312*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2313*
2314                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2315     $                         WORK( ITAUQ ), WORK( ITAUP ),
2316     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2317                  CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2318*
2319*                 Generate right vectors bidiagonalizing L in WORK(IR)
2320*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2321*
2322                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2323     $                         WORK( ITAUP ), WORK( IWORK ),
2324     $                         LWORK-IWORK+1, IERR )
2325*
2326*                 Generate left vectors bidiagonalizing L in U
2327*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2328*
2329                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2330     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2331                  IWORK = IE + M
2332*
2333*                 Perform bidiagonal QR iteration, computing left
2334*                 singular vectors of L in U, and computing right
2335*                 singular vectors of L in WORK(IR)
2336*                 (Workspace: need M*M + BDSPAC)
2337*
2338                  CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2339     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2340     $                         WORK( IWORK ), INFO )
2341                  IU = IE + M
2342*
2343*                 Multiply right singular vectors of L in WORK(IR) by Q
2344*                 in A, storing result in WORK(IU) and copying to A
2345*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2346*
2347                  DO 40 I = 1, N, CHUNK
2348                     BLK = MIN( N-I+1, CHUNK )
2349                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2350     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2351     $                           WORK( IU ), LDWRKU )
2352                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2353     $                            A( 1, I ), LDA )
2354   40             CONTINUE
2355*
2356               ELSE
2357*
2358*                 Insufficient workspace for a fast algorithm
2359*
2360                  ITAU = 1
2361                  IWORK = ITAU + M
2362*
2363*                 Compute A=L*Q
2364*                 (Workspace: need 2*M, prefer M + M*NB)
2365*
2366                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2367     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2368*
2369*                 Copy L to U, zeroing out above it
2370*
2371                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2372                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2373     $                         LDU )
2374*
2375*                 Generate Q in A
2376*                 (Workspace: need 2*M, prefer M + M*NB)
2377*
2378                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2379     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2380                  IE = ITAU
2381                  ITAUQ = IE + M
2382                  ITAUP = ITAUQ + M
2383                  IWORK = ITAUP + M
2384*
2385*                 Bidiagonalize L in U
2386*                 (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2387*
2388                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2389     $                         WORK( ITAUQ ), WORK( ITAUP ),
2390     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2391*
2392*                 Multiply right vectors bidiagonalizing L by Q in A
2393*                 (Workspace: need 3*M + N, prefer 3*M + N*NB)
2394*
2395                  CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2396     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2397     $                         LWORK-IWORK+1, IERR )
2398*
2399*                 Generate left vectors bidiagonalizing L in U
2400*                 (Workspace: need 4*M, prefer 3*M + M*NB)
2401*
2402                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2403     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2404                  IWORK = IE + M
2405*
2406*                 Perform bidiagonal QR iteration, computing left
2407*                 singular vectors of A in U and computing right
2408*                 singular vectors of A in A
2409*                 (Workspace: need BDSPAC)
2410*
2411                  CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2412     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2413*
2414               END IF
2415*
2416            ELSE IF( WNTVS ) THEN
2417*
2418               IF( WNTUN ) THEN
2419*
2420*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2421*                 M right singular vectors to be computed in VT and
2422*                 no left singular vectors to be computed
2423*
2424                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2425*
2426*                    Sufficient workspace for a fast algorithm
2427*
2428                     IR = 1
2429                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2430*
2431*                       WORK(IR) is LDA by M
2432*
2433                        LDWRKR = LDA
2434                     ELSE
2435*
2436*                       WORK(IR) is M by M
2437*
2438                        LDWRKR = M
2439                     END IF
2440                     ITAU = IR + LDWRKR*M
2441                     IWORK = ITAU + M
2442*
2443*                    Compute A=L*Q
2444*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2445*
2446                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2447     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2448*
2449*                    Copy L to WORK(IR), zeroing out above it
2450*
2451                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2452     $                            LDWRKR )
2453                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2454     $                            WORK( IR+LDWRKR ), LDWRKR )
2455*
2456*                    Generate Q in A
2457*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2458*
2459                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2460     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2461                     IE = ITAU
2462                     ITAUQ = IE + M
2463                     ITAUP = ITAUQ + M
2464                     IWORK = ITAUP + M
2465*
2466*                    Bidiagonalize L in WORK(IR)
2467*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2468*
2469                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2470     $                            WORK( IE ), WORK( ITAUQ ),
2471     $                            WORK( ITAUP ), WORK( IWORK ),
2472     $                            LWORK-IWORK+1, IERR )
2473*
2474*                    Generate right vectors bidiagonalizing L in
2475*                    WORK(IR)
2476*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2477*
2478                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2479     $                            WORK( ITAUP ), WORK( IWORK ),
2480     $                            LWORK-IWORK+1, IERR )
2481                     IWORK = IE + M
2482*
2483*                    Perform bidiagonal QR iteration, computing right
2484*                    singular vectors of L in WORK(IR)
2485*                    (Workspace: need M*M + BDSPAC)
2486*
2487                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2488     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2489     $                            WORK( IWORK ), INFO )
2490*
2491*                    Multiply right singular vectors of L in WORK(IR) by
2492*                    Q in A, storing result in VT
2493*                    (Workspace: need M*M)
2494*
2495                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2496     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2497*
2498                  ELSE
2499*
2500*                    Insufficient workspace for a fast algorithm
2501*
2502                     ITAU = 1
2503                     IWORK = ITAU + M
2504*
2505*                    Compute A=L*Q
2506*                    (Workspace: need 2*M, prefer M + M*NB)
2507*
2508                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2509     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2510*
2511*                    Copy result to VT
2512*
2513                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2514*
2515*                    Generate Q in VT
2516*                    (Workspace: need 2*M, prefer M + M*NB)
2517*
2518                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2519     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2520                     IE = ITAU
2521                     ITAUQ = IE + M
2522                     ITAUP = ITAUQ + M
2523                     IWORK = ITAUP + M
2524*
2525*                    Zero out above L in A
2526*
2527                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2528     $                            LDA )
2529*
2530*                    Bidiagonalize L in A
2531*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2532*
2533                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2534     $                            WORK( ITAUQ ), WORK( ITAUP ),
2535     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2536*
2537*                    Multiply right vectors bidiagonalizing L by Q in VT
2538*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
2539*
2540                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2541     $                            WORK( ITAUP ), VT, LDVT,
2542     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2543                     IWORK = IE + M
2544*
2545*                    Perform bidiagonal QR iteration, computing right
2546*                    singular vectors of A in VT
2547*                    (Workspace: need BDSPAC)
2548*
2549                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2550     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2551     $                            INFO )
2552*
2553                  END IF
2554*
2555               ELSE IF( WNTUO ) THEN
2556*
2557*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2558*                 M right singular vectors to be computed in VT and
2559*                 M left singular vectors to be overwritten on A
2560*
2561                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2562*
2563*                    Sufficient workspace for a fast algorithm
2564*
2565                     IU = 1
2566                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2567*
2568*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2569*
2570                        LDWRKU = LDA
2571                        IR = IU + LDWRKU*M
2572                        LDWRKR = LDA
2573                     ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
2574*
2575*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2576*
2577                        LDWRKU = LDA
2578                        IR = IU + LDWRKU*M
2579                        LDWRKR = M
2580                     ELSE
2581*
2582*                       WORK(IU) is M by M and WORK(IR) is M by M
2583*
2584                        LDWRKU = M
2585                        IR = IU + LDWRKU*M
2586                        LDWRKR = M
2587                     END IF
2588                     ITAU = IR + LDWRKR*M
2589                     IWORK = ITAU + M
2590*
2591*                    Compute A=L*Q
2592*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2593*
2594                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2595     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2596*
2597*                    Copy L to WORK(IU), zeroing out below it
2598*
2599                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2600     $                            LDWRKU )
2601                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2602     $                            WORK( IU+LDWRKU ), LDWRKU )
2603*
2604*                    Generate Q in A
2605*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2606*
2607                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2608     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2609                     IE = ITAU
2610                     ITAUQ = IE + M
2611                     ITAUP = ITAUQ + M
2612                     IWORK = ITAUP + M
2613*
2614*                    Bidiagonalize L in WORK(IU), copying result to
2615*                    WORK(IR)
2616*                    (Workspace: need 2*M*M + 4*M,
2617*                                prefer 2*M*M+3*M+2*M*NB)
2618*
2619                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2620     $                            WORK( IE ), WORK( ITAUQ ),
2621     $                            WORK( ITAUP ), WORK( IWORK ),
2622     $                            LWORK-IWORK+1, IERR )
2623                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2624     $                            WORK( IR ), LDWRKR )
2625*
2626*                    Generate right bidiagonalizing vectors in WORK(IU)
2627*                    (Workspace: need 2*M*M + 4*M-1,
2628*                                prefer 2*M*M+3*M+(M-1)*NB)
2629*
2630                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2631     $                            WORK( ITAUP ), WORK( IWORK ),
2632     $                            LWORK-IWORK+1, IERR )
2633*
2634*                    Generate left bidiagonalizing vectors in WORK(IR)
2635*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2636*
2637                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2638     $                            WORK( ITAUQ ), WORK( IWORK ),
2639     $                            LWORK-IWORK+1, IERR )
2640                     IWORK = IE + M
2641*
2642*                    Perform bidiagonal QR iteration, computing left
2643*                    singular vectors of L in WORK(IR) and computing
2644*                    right singular vectors of L in WORK(IU)
2645*                    (Workspace: need 2*M*M + BDSPAC)
2646*
2647                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2648     $                            WORK( IU ), LDWRKU, WORK( IR ),
2649     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2650*
2651*                    Multiply right singular vectors of L in WORK(IU) by
2652*                    Q in A, storing result in VT
2653*                    (Workspace: need M*M)
2654*
2655                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2656     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2657*
2658*                    Copy left singular vectors of L to A
2659*                    (Workspace: need M*M)
2660*
2661                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2662     $                            LDA )
2663*
2664                  ELSE
2665*
2666*                    Insufficient workspace for a fast algorithm
2667*
2668                     ITAU = 1
2669                     IWORK = ITAU + M
2670*
2671*                    Compute A=L*Q, copying result to VT
2672*                    (Workspace: need 2*M, prefer M + M*NB)
2673*
2674                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2675     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2676                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2677*
2678*                    Generate Q in VT
2679*                    (Workspace: need 2*M, prefer M + M*NB)
2680*
2681                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2682     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2683                     IE = ITAU
2684                     ITAUQ = IE + M
2685                     ITAUP = ITAUQ + M
2686                     IWORK = ITAUP + M
2687*
2688*                    Zero out above L in A
2689*
2690                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2691     $                            LDA )
2692*
2693*                    Bidiagonalize L in A
2694*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2695*
2696                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2697     $                            WORK( ITAUQ ), WORK( ITAUP ),
2698     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2699*
2700*                    Multiply right vectors bidiagonalizing L by Q in VT
2701*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
2702*
2703                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2704     $                            WORK( ITAUP ), VT, LDVT,
2705     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2706*
2707*                    Generate left bidiagonalizing vectors of L in A
2708*                    (Workspace: need 4*M, prefer 3*M + M*NB)
2709*
2710                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2711     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2712                     IWORK = IE + M
2713*
2714*                    Perform bidiagonal QR iteration, compute left
2715*                    singular vectors of A in A and compute right
2716*                    singular vectors of A in VT
2717*                    (Workspace: need BDSPAC)
2718*
2719                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2720     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2721     $                            INFO )
2722*
2723                  END IF
2724*
2725               ELSE IF( WNTUAS ) THEN
2726*
2727*                 Path 6t(N much larger than M, JOBU='S' or 'A',
2728*                         JOBVT='S')
2729*                 M right singular vectors to be computed in VT and
2730*                 M left singular vectors to be computed in U
2731*
2732                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2733*
2734*                    Sufficient workspace for a fast algorithm
2735*
2736                     IU = 1
2737                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2738*
2739*                       WORK(IU) is LDA by N
2740*
2741                        LDWRKU = LDA
2742                     ELSE
2743*
2744*                       WORK(IU) is LDA by M
2745*
2746                        LDWRKU = M
2747                     END IF
2748                     ITAU = IU + LDWRKU*M
2749                     IWORK = ITAU + M
2750*
2751*                    Compute A=L*Q
2752*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2753*
2754                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2755     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2756*
2757*                    Copy L to WORK(IU), zeroing out above it
2758*
2759                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2760     $                            LDWRKU )
2761                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2762     $                            WORK( IU+LDWRKU ), LDWRKU )
2763*
2764*                    Generate Q in A
2765*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2766*
2767                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2768     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2769                     IE = ITAU
2770                     ITAUQ = IE + M
2771                     ITAUP = ITAUQ + M
2772                     IWORK = ITAUP + M
2773*
2774*                    Bidiagonalize L in WORK(IU), copying result to U
2775*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2776*
2777                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2778     $                            WORK( IE ), WORK( ITAUQ ),
2779     $                            WORK( ITAUP ), WORK( IWORK ),
2780     $                            LWORK-IWORK+1, IERR )
2781                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2782     $                            LDU )
2783*
2784*                    Generate right bidiagonalizing vectors in WORK(IU)
2785*                    (Workspace: need M*M + 4*M-1,
2786*                                prefer M*M+3*M+(M-1)*NB)
2787*
2788                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2789     $                            WORK( ITAUP ), WORK( IWORK ),
2790     $                            LWORK-IWORK+1, IERR )
2791*
2792*                    Generate left bidiagonalizing vectors in U
2793*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2794*
2795                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2796     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2797                     IWORK = IE + M
2798*
2799*                    Perform bidiagonal QR iteration, computing left
2800*                    singular vectors of L in U and computing right
2801*                    singular vectors of L in WORK(IU)
2802*                    (Workspace: need M*M + BDSPAC)
2803*
2804                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2805     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2806     $                            WORK( IWORK ), INFO )
2807*
2808*                    Multiply right singular vectors of L in WORK(IU) by
2809*                    Q in A, storing result in VT
2810*                    (Workspace: need M*M)
2811*
2812                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2813     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2814*
2815                  ELSE
2816*
2817*                    Insufficient workspace for a fast algorithm
2818*
2819                     ITAU = 1
2820                     IWORK = ITAU + M
2821*
2822*                    Compute A=L*Q, copying result to VT
2823*                    (Workspace: need 2*M, prefer M + M*NB)
2824*
2825                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2826     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2827                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2828*
2829*                    Generate Q in VT
2830*                    (Workspace: need 2*M, prefer M + M*NB)
2831*
2832                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2833     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2834*
2835*                    Copy L to U, zeroing out above it
2836*
2837                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2838                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2839     $                            LDU )
2840                     IE = ITAU
2841                     ITAUQ = IE + M
2842                     ITAUP = ITAUQ + M
2843                     IWORK = ITAUP + M
2844*
2845*                    Bidiagonalize L in U
2846*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2847*
2848                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2849     $                            WORK( ITAUQ ), WORK( ITAUP ),
2850     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2851*
2852*                    Multiply right bidiagonalizing vectors in U by Q
2853*                    in VT
2854*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
2855*
2856                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2857     $                            WORK( ITAUP ), VT, LDVT,
2858     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2859*
2860*                    Generate left bidiagonalizing vectors in U
2861*                    (Workspace: need 4*M, prefer 3*M + M*NB)
2862*
2863                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2864     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2865                     IWORK = IE + M
2866*
2867*                    Perform bidiagonal QR iteration, computing left
2868*                    singular vectors of A in U and computing right
2869*                    singular vectors of A in VT
2870*                    (Workspace: need BDSPAC)
2871*
2872                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2873     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2874     $                            INFO )
2875*
2876                  END IF
2877*
2878               END IF
2879*
2880            ELSE IF( WNTVA ) THEN
2881*
2882               IF( WNTUN ) THEN
2883*
2884*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2885*                 N right singular vectors to be computed in VT and
2886*                 no left singular vectors to be computed
2887*
2888                  IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
2889*
2890*                    Sufficient workspace for a fast algorithm
2891*
2892                     IR = 1
2893                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2894*
2895*                       WORK(IR) is LDA by M
2896*
2897                        LDWRKR = LDA
2898                     ELSE
2899*
2900*                       WORK(IR) is M by M
2901*
2902                        LDWRKR = M
2903                     END IF
2904                     ITAU = IR + LDWRKR*M
2905                     IWORK = ITAU + M
2906*
2907*                    Compute A=L*Q, copying result to VT
2908*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2909*
2910                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2911     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2912                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2913*
2914*                    Copy L to WORK(IR), zeroing out above it
2915*
2916                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2917     $                            LDWRKR )
2918                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2919     $                            WORK( IR+LDWRKR ), LDWRKR )
2920*
2921*                    Generate Q in VT
2922*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2923*
2924                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2925     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2926                     IE = ITAU
2927                     ITAUQ = IE + M
2928                     ITAUP = ITAUQ + M
2929                     IWORK = ITAUP + M
2930*
2931*                    Bidiagonalize L in WORK(IR)
2932*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2933*
2934                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2935     $                            WORK( IE ), WORK( ITAUQ ),
2936     $                            WORK( ITAUP ), WORK( IWORK ),
2937     $                            LWORK-IWORK+1, IERR )
2938*
2939*                    Generate right bidiagonalizing vectors in WORK(IR)
2940*                    (Workspace: need M*M + 4*M-1,
2941*                                prefer M*M+3*M+(M-1)*NB)
2942*
2943                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2944     $                            WORK( ITAUP ), WORK( IWORK ),
2945     $                            LWORK-IWORK+1, IERR )
2946                     IWORK = IE + M
2947*
2948*                    Perform bidiagonal QR iteration, computing right
2949*                    singular vectors of L in WORK(IR)
2950*                    (Workspace: need M*M + BDSPAC)
2951*
2952                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2953     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2954     $                            WORK( IWORK ), INFO )
2955*
2956*                    Multiply right singular vectors of L in WORK(IR) by
2957*                    Q in VT, storing result in A
2958*                    (Workspace: need M*M)
2959*
2960                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2961     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2962*
2963*                    Copy right singular vectors of A from A to VT
2964*
2965                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2966*
2967                  ELSE
2968*
2969*                    Insufficient workspace for a fast algorithm
2970*
2971                     ITAU = 1
2972                     IWORK = ITAU + M
2973*
2974*                    Compute A=L*Q, copying result to VT
2975*                    (Workspace: need 2*M, prefer M + M*NB)
2976*
2977                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2978     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2979                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2980*
2981*                    Generate Q in VT
2982*                    (Workspace: need M + N, prefer M + N*NB)
2983*
2984                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2985     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2986                     IE = ITAU
2987                     ITAUQ = IE + M
2988                     ITAUP = ITAUQ + M
2989                     IWORK = ITAUP + M
2990*
2991*                    Zero out above L in A
2992*
2993                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2994     $                            LDA )
2995*
2996*                    Bidiagonalize L in A
2997*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2998*
2999                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3000     $                            WORK( ITAUQ ), WORK( ITAUP ),
3001     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3002*
3003*                    Multiply right bidiagonalizing vectors in A by Q
3004*                    in VT
3005*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
3006*
3007                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3008     $                            WORK( ITAUP ), VT, LDVT,
3009     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3010                     IWORK = IE + M
3011*
3012*                    Perform bidiagonal QR iteration, computing right
3013*                    singular vectors of A in VT
3014*                    (Workspace: need BDSPAC)
3015*
3016                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
3017     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
3018     $                            INFO )
3019*
3020                  END IF
3021*
3022               ELSE IF( WNTUO ) THEN
3023*
3024*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3025*                 N right singular vectors to be computed in VT and
3026*                 M left singular vectors to be overwritten on A
3027*
3028                  IF( LWORK.GE.2*M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
3029*
3030*                    Sufficient workspace for a fast algorithm
3031*
3032                     IU = 1
3033                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3034*
3035*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
3036*
3037                        LDWRKU = LDA
3038                        IR = IU + LDWRKU*M
3039                        LDWRKR = LDA
3040                     ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
3041*
3042*                       WORK(IU) is LDA by M and WORK(IR) is M by M
3043*
3044                        LDWRKU = LDA
3045                        IR = IU + LDWRKU*M
3046                        LDWRKR = M
3047                     ELSE
3048*
3049*                       WORK(IU) is M by M and WORK(IR) is M by M
3050*
3051                        LDWRKU = M
3052                        IR = IU + LDWRKU*M
3053                        LDWRKR = M
3054                     END IF
3055                     ITAU = IR + LDWRKR*M
3056                     IWORK = ITAU + M
3057*
3058*                    Compute A=L*Q, copying result to VT
3059*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3060*
3061                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3062     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3063                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3064*
3065*                    Generate Q in VT
3066*                    (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3067*
3068                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3069     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3070*
3071*                    Copy L to WORK(IU), zeroing out above it
3072*
3073                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3074     $                            LDWRKU )
3075                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3076     $                            WORK( IU+LDWRKU ), LDWRKU )
3077                     IE = ITAU
3078                     ITAUQ = IE + M
3079                     ITAUP = ITAUQ + M
3080                     IWORK = ITAUP + M
3081*
3082*                    Bidiagonalize L in WORK(IU), copying result to
3083*                    WORK(IR)
3084*                    (Workspace: need 2*M*M + 4*M,
3085*                                prefer 2*M*M+3*M+2*M*NB)
3086*
3087                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3088     $                            WORK( IE ), WORK( ITAUQ ),
3089     $                            WORK( ITAUP ), WORK( IWORK ),
3090     $                            LWORK-IWORK+1, IERR )
3091                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3092     $                            WORK( IR ), LDWRKR )
3093*
3094*                    Generate right bidiagonalizing vectors in WORK(IU)
3095*                    (Workspace: need 2*M*M + 4*M-1,
3096*                                prefer 2*M*M+3*M+(M-1)*NB)
3097*
3098                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3099     $                            WORK( ITAUP ), WORK( IWORK ),
3100     $                            LWORK-IWORK+1, IERR )
3101*
3102*                    Generate left bidiagonalizing vectors in WORK(IR)
3103*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3104*
3105                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3106     $                            WORK( ITAUQ ), WORK( IWORK ),
3107     $                            LWORK-IWORK+1, IERR )
3108                     IWORK = IE + M
3109*
3110*                    Perform bidiagonal QR iteration, computing left
3111*                    singular vectors of L in WORK(IR) and computing
3112*                    right singular vectors of L in WORK(IU)
3113*                    (Workspace: need 2*M*M + BDSPAC)
3114*
3115                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3116     $                            WORK( IU ), LDWRKU, WORK( IR ),
3117     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3118*
3119*                    Multiply right singular vectors of L in WORK(IU) by
3120*                    Q in VT, storing result in A
3121*                    (Workspace: need M*M)
3122*
3123                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3124     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3125*
3126*                    Copy right singular vectors of A from A to VT
3127*
3128                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3129*
3130*                    Copy left singular vectors of A from WORK(IR) to A
3131*
3132                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3133     $                            LDA )
3134*
3135                  ELSE
3136*
3137*                    Insufficient workspace for a fast algorithm
3138*
3139                     ITAU = 1
3140                     IWORK = ITAU + M
3141*
3142*                    Compute A=L*Q, copying result to VT
3143*                    (Workspace: need 2*M, prefer M + M*NB)
3144*
3145                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3146     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3147                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3148*
3149*                    Generate Q in VT
3150*                    (Workspace: need M + N, prefer M + N*NB)
3151*
3152                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3153     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3154                     IE = ITAU
3155                     ITAUQ = IE + M
3156                     ITAUP = ITAUQ + M
3157                     IWORK = ITAUP + M
3158*
3159*                    Zero out above L in A
3160*
3161                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3162     $                            LDA )
3163*
3164*                    Bidiagonalize L in A
3165*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3166*
3167                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3168     $                            WORK( ITAUQ ), WORK( ITAUP ),
3169     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3170*
3171*                    Multiply right bidiagonalizing vectors in A by Q
3172*                    in VT
3173*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
3174*
3175                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3176     $                            WORK( ITAUP ), VT, LDVT,
3177     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3178*
3179*                    Generate left bidiagonalizing vectors in A
3180*                    (Workspace: need 4*M, prefer 3*M + M*NB)
3181*
3182                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3183     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3184                     IWORK = IE + M
3185*
3186*                    Perform bidiagonal QR iteration, computing left
3187*                    singular vectors of A in A and computing right
3188*                    singular vectors of A in VT
3189*                    (Workspace: need BDSPAC)
3190*
3191                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3192     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3193     $                            INFO )
3194*
3195                  END IF
3196*
3197               ELSE IF( WNTUAS ) THEN
3198*
3199*                 Path 9t(N much larger than M, JOBU='S' or 'A',
3200*                         JOBVT='A')
3201*                 N right singular vectors to be computed in VT and
3202*                 M left singular vectors to be computed in U
3203*
3204                  IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
3205*
3206*                    Sufficient workspace for a fast algorithm
3207*
3208                     IU = 1
3209                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
3210*
3211*                       WORK(IU) is LDA by M
3212*
3213                        LDWRKU = LDA
3214                     ELSE
3215*
3216*                       WORK(IU) is M by M
3217*
3218                        LDWRKU = M
3219                     END IF
3220                     ITAU = IU + LDWRKU*M
3221                     IWORK = ITAU + M
3222*
3223*                    Compute A=L*Q, copying result to VT
3224*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3225*
3226                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3227     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3228                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3229*
3230*                    Generate Q in VT
3231*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3232*
3233                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3234     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3235*
3236*                    Copy L to WORK(IU), zeroing out above it
3237*
3238                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3239     $                            LDWRKU )
3240                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3241     $                            WORK( IU+LDWRKU ), LDWRKU )
3242                     IE = ITAU
3243                     ITAUQ = IE + M
3244                     ITAUP = ITAUQ + M
3245                     IWORK = ITAUP + M
3246*
3247*                    Bidiagonalize L in WORK(IU), copying result to U
3248*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3249*
3250                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3251     $                            WORK( IE ), WORK( ITAUQ ),
3252     $                            WORK( ITAUP ), WORK( IWORK ),
3253     $                            LWORK-IWORK+1, IERR )
3254                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3255     $                            LDU )
3256*
3257*                    Generate right bidiagonalizing vectors in WORK(IU)
3258*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3259*
3260                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3261     $                            WORK( ITAUP ), WORK( IWORK ),
3262     $                            LWORK-IWORK+1, IERR )
3263*
3264*                    Generate left bidiagonalizing vectors in U
3265*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3266*
3267                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3268     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3269                     IWORK = IE + M
3270*
3271*                    Perform bidiagonal QR iteration, computing left
3272*                    singular vectors of L in U and computing right
3273*                    singular vectors of L in WORK(IU)
3274*                    (Workspace: need M*M + BDSPAC)
3275*
3276                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3277     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3278     $                            WORK( IWORK ), INFO )
3279*
3280*                    Multiply right singular vectors of L in WORK(IU) by
3281*                    Q in VT, storing result in A
3282*                    (Workspace: need M*M)
3283*
3284                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3285     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3286*
3287*                    Copy right singular vectors of A from A to VT
3288*
3289                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3290*
3291                  ELSE
3292*
3293*                    Insufficient workspace for a fast algorithm
3294*
3295                     ITAU = 1
3296                     IWORK = ITAU + M
3297*
3298*                    Compute A=L*Q, copying result to VT
3299*                    (Workspace: need 2*M, prefer M + M*NB)
3300*
3301                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3302     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3303                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3304*
3305*                    Generate Q in VT
3306*                    (Workspace: need M + N, prefer M + N*NB)
3307*
3308                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3309     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3310*
3311*                    Copy L to U, zeroing out above it
3312*
3313                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3314                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3315     $                            LDU )
3316                     IE = ITAU
3317                     ITAUQ = IE + M
3318                     ITAUP = ITAUQ + M
3319                     IWORK = ITAUP + M
3320*
3321*                    Bidiagonalize L in U
3322*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3323*
3324                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3325     $                            WORK( ITAUQ ), WORK( ITAUP ),
3326     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3327*
3328*                    Multiply right bidiagonalizing vectors in U by Q
3329*                    in VT
3330*                    (Workspace: need 3*M + N, prefer 3*M + N*NB)
3331*
3332                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3333     $                            WORK( ITAUP ), VT, LDVT,
3334     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3335*
3336*                    Generate left bidiagonalizing vectors in U
3337*                    (Workspace: need 4*M, prefer 3*M + M*NB)
3338*
3339                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3340     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3341                     IWORK = IE + M
3342*
3343*                    Perform bidiagonal QR iteration, computing left
3344*                    singular vectors of A in U and computing right
3345*                    singular vectors of A in VT
3346*                    (Workspace: need BDSPAC)
3347*
3348                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3349     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3350     $                            INFO )
3351*
3352                  END IF
3353*
3354               END IF
3355*
3356            END IF
3357*
3358         ELSE
3359*
3360*           N .LT. MNTHR
3361*
3362*           Path 10t(N greater than M, but not much larger)
3363*           Reduce to bidiagonal form without LQ decomposition
3364*
3365            IE = 1
3366            ITAUQ = IE + M
3367            ITAUP = ITAUQ + M
3368            IWORK = ITAUP + M
3369*
3370*           Bidiagonalize A
3371*           (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3372*
3373            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3374     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3375     $                   IERR )
3376            IF( WNTUAS ) THEN
3377*
3378*              If left singular vectors desired in U, copy result to U
3379*              and generate left bidiagonalizing vectors in U
3380*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3381*
3382               CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3383               CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3384     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3385            END IF
3386            IF( WNTVAS ) THEN
3387*
3388*              If right singular vectors desired in VT, copy result to
3389*              VT and generate right bidiagonalizing vectors in VT
3390*              (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3391*
3392               CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3393               IF( WNTVA )
3394     $            NRVT = N
3395               IF( WNTVS )
3396     $            NRVT = M
3397               CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3398     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3399            END IF
3400            IF( WNTUO ) THEN
3401*
3402*              If left singular vectors desired in A, generate left
3403*              bidiagonalizing vectors in A
3404*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3405*
3406               CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3407     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3408            END IF
3409            IF( WNTVO ) THEN
3410*
3411*              If right singular vectors desired in A, generate right
3412*              bidiagonalizing vectors in A
3413*              (Workspace: need 4*M, prefer 3*M + M*NB)
3414*
3415               CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3416     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3417            END IF
3418            IWORK = IE + M
3419            IF( WNTUAS .OR. WNTUO )
3420     $         NRU = M
3421            IF( WNTUN )
3422     $         NRU = 0
3423            IF( WNTVAS .OR. WNTVO )
3424     $         NCVT = N
3425            IF( WNTVN )
3426     $         NCVT = 0
3427            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3428*
3429*              Perform bidiagonal QR iteration, if desired, computing
3430*              left singular vectors in U and computing right singular
3431*              vectors in VT
3432*              (Workspace: need BDSPAC)
3433*
3434               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3435     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3436            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3437*
3438*              Perform bidiagonal QR iteration, if desired, computing
3439*              left singular vectors in U and computing right singular
3440*              vectors in A
3441*              (Workspace: need BDSPAC)
3442*
3443               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3444     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3445            ELSE
3446*
3447*              Perform bidiagonal QR iteration, if desired, computing
3448*              left singular vectors in A and computing right singular
3449*              vectors in VT
3450*              (Workspace: need BDSPAC)
3451*
3452               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3453     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3454            END IF
3455*
3456         END IF
3457*
3458      END IF
3459*
3460*     If DBDSQR failed to converge, copy unconverged superdiagonals
3461*     to WORK( 2:MINMN )
3462*
3463      IF( INFO.NE.0 ) THEN
3464         IF( IE.GT.2 ) THEN
3465            DO 50 I = 1, MINMN - 1
3466               WORK( I+1 ) = WORK( I+IE-1 )
3467   50       CONTINUE
3468         END IF
3469         IF( IE.LT.2 ) THEN
3470            DO 60 I = MINMN - 1, 1, -1
3471               WORK( I+1 ) = WORK( I+IE-1 )
3472   60       CONTINUE
3473         END IF
3474      END IF
3475*
3476*     Undo scaling if necessary
3477*
3478      IF( ISCL.EQ.1 ) THEN
3479         IF( ANRM.GT.BIGNUM )
3480     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3481     $                   IERR )
3482         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3483     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3484     $                   MINMN, IERR )
3485         IF( ANRM.LT.SMLNUM )
3486     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3487     $                   IERR )
3488         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3489     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3490     $                   MINMN, IERR )
3491      END IF
3492*
3493*     Return optimal workspace in WORK(1)
3494*
3495      WORK( 1 ) = MAXWRK
3496*
3497      RETURN
3498*
3499*     End of DGESVD
3500*
3501      END
3502