1*> \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SGESVD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22*                          WORK, LWORK, INFO )
23*
24*       .. Scalar Arguments ..
25*       CHARACTER          JOBU, JOBVT
26*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
27*       ..
28*       .. Array Arguments ..
29*       REAL               A( LDA, * ), S( * ), U( LDU, * ),
30*      $                   VT( LDVT, * ), WORK( * )
31*       ..
32*
33*
34*> \par Purpose:
35*  =============
36*>
37*> \verbatim
38*>
39*> SGESVD computes the singular value decomposition (SVD) of a real
40*> M-by-N matrix A, optionally computing the left and/or right singular
41*> vectors. The SVD is written
42*>
43*>      A = U * SIGMA * transpose(V)
44*>
45*> where SIGMA is an M-by-N matrix which is zero except for its
46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47*> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
48*> are the singular values of A; they are real and non-negative, and
49*> are returned in descending order.  The first min(m,n) columns of
50*> U and V are the left and right singular vectors of A.
51*>
52*> Note that the routine returns V**T, not V.
53*> \endverbatim
54*
55*  Arguments:
56*  ==========
57*
58*> \param[in] JOBU
59*> \verbatim
60*>          JOBU is CHARACTER*1
61*>          Specifies options for computing all or part of the matrix U:
62*>          = 'A':  all M columns of U are returned in array U:
63*>          = 'S':  the first min(m,n) columns of U (the left singular
64*>                  vectors) are returned in the array U;
65*>          = 'O':  the first min(m,n) columns of U (the left singular
66*>                  vectors) are overwritten on the array A;
67*>          = 'N':  no columns of U (no left singular vectors) are
68*>                  computed.
69*> \endverbatim
70*>
71*> \param[in] JOBVT
72*> \verbatim
73*>          JOBVT is CHARACTER*1
74*>          Specifies options for computing all or part of the matrix
75*>          V**T:
76*>          = 'A':  all N rows of V**T are returned in the array VT;
77*>          = 'S':  the first min(m,n) rows of V**T (the right singular
78*>                  vectors) are returned in the array VT;
79*>          = 'O':  the first min(m,n) rows of V**T (the right singular
80*>                  vectors) are overwritten on the array A;
81*>          = 'N':  no rows of V**T (no right singular vectors) are
82*>                  computed.
83*>
84*>          JOBVT and JOBU cannot both be 'O'.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*>          M is INTEGER
90*>          The number of rows of the input matrix A.  M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*>          N is INTEGER
96*>          The number of columns of the input matrix A.  N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*>          A is REAL array, dimension (LDA,N)
102*>          On entry, the M-by-N matrix A.
103*>          On exit,
104*>          if JOBU = 'O',  A is overwritten with the first min(m,n)
105*>                          columns of U (the left singular vectors,
106*>                          stored columnwise);
107*>          if JOBVT = 'O', A is overwritten with the first min(m,n)
108*>                          rows of V**T (the right singular vectors,
109*>                          stored rowwise);
110*>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111*>                          are destroyed.
112*> \endverbatim
113*>
114*> \param[in] LDA
115*> \verbatim
116*>          LDA is INTEGER
117*>          The leading dimension of the array A.  LDA >= max(1,M).
118*> \endverbatim
119*>
120*> \param[out] S
121*> \verbatim
122*>          S is REAL array, dimension (min(M,N))
123*>          The singular values of A, sorted so that S(i) >= S(i+1).
124*> \endverbatim
125*>
126*> \param[out] U
127*> \verbatim
128*>          U is REAL array, dimension (LDU,UCOL)
129*>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130*>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131*>          if JOBU = 'S', U contains the first min(m,n) columns of U
132*>          (the left singular vectors, stored columnwise);
133*>          if JOBU = 'N' or 'O', U is not referenced.
134*> \endverbatim
135*>
136*> \param[in] LDU
137*> \verbatim
138*>          LDU is INTEGER
139*>          The leading dimension of the array U.  LDU >= 1; if
140*>          JOBU = 'S' or 'A', LDU >= M.
141*> \endverbatim
142*>
143*> \param[out] VT
144*> \verbatim
145*>          VT is REAL array, dimension (LDVT,N)
146*>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147*>          V**T;
148*>          if JOBVT = 'S', VT contains the first min(m,n) rows of
149*>          V**T (the right singular vectors, stored rowwise);
150*>          if JOBVT = 'N' or 'O', VT is not referenced.
151*> \endverbatim
152*>
153*> \param[in] LDVT
154*> \verbatim
155*>          LDVT is INTEGER
156*>          The leading dimension of the array VT.  LDVT >= 1; if
157*>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158*> \endverbatim
159*>
160*> \param[out] WORK
161*> \verbatim
162*>          WORK is REAL array, dimension (MAX(1,LWORK))
163*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164*>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165*>          superdiagonal elements of an upper bidiagonal matrix B
166*>          whose diagonal is in S (not necessarily sorted). B
167*>          satisfies A = U * B * VT, so it has the same singular values
168*>          as A, and singular vectors related by U and VT.
169*> \endverbatim
170*>
171*> \param[in] LWORK
172*> \verbatim
173*>          LWORK is INTEGER
174*>          The dimension of the array WORK.
175*>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176*>             - PATH 1  (M much larger than N, JOBU='N')
177*>             - PATH 1t (N much larger than M, JOBVT='N')
178*>          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179*>          For good performance, LWORK should generally be larger.
180*>
181*>          If LWORK = -1, then a workspace query is assumed; the routine
182*>          only calculates the optimal size of the WORK array, returns
183*>          this value as the first entry of the WORK array, and no error
184*>          message related to LWORK is issued by XERBLA.
185*> \endverbatim
186*>
187*> \param[out] INFO
188*> \verbatim
189*>          INFO is INTEGER
190*>          = 0:  successful exit.
191*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
192*>          > 0:  if SBDSQR did not converge, INFO specifies how many
193*>                superdiagonals of an intermediate bidiagonal form B
194*>                did not converge to zero. See the description of WORK
195*>                above for details.
196*> \endverbatim
197*
198*  Authors:
199*  ========
200*
201*> \author Univ. of Tennessee
202*> \author Univ. of California Berkeley
203*> \author Univ. of Colorado Denver
204*> \author NAG Ltd.
205*
206*> \date April 2012
207*
208*> \ingroup realGEsing
209*
210*  =====================================================================
211      SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
212     $                   WORK, LWORK, INFO )
213*
214*  -- LAPACK driver routine (version 3.4.1) --
215*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
216*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217*     April 2012
218*
219*     .. Scalar Arguments ..
220      CHARACTER          JOBU, JOBVT
221      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
222*     ..
223*     .. Array Arguments ..
224      REAL               A( LDA, * ), S( * ), U( LDU, * ),
225     $                   VT( LDVT, * ), WORK( * )
226*     ..
227*
228*  =====================================================================
229*
230*     .. Parameters ..
231      REAL               ZERO, ONE
232      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
233*     ..
234*     .. Local Scalars ..
235      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
236     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
237      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
238     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
239     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
240     $                   NRVT, WRKBL
241      INTEGER            LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
242     $                   LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q,
243     $                   LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M
244      REAL               ANRM, BIGNUM, EPS, SMLNUM
245*     ..
246*     .. Local Arrays ..
247      REAL               DUM( 1 )
248*     ..
249*     .. External Subroutines ..
250      EXTERNAL           SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
251     $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
252     $                   XERBLA
253*     ..
254*     .. External Functions ..
255      LOGICAL            LSAME
256      INTEGER            ILAENV
257      REAL               SLAMCH, SLANGE
258      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
259*     ..
260*     .. Intrinsic Functions ..
261      INTRINSIC          MAX, MIN, SQRT
262*     ..
263*     .. Executable Statements ..
264*
265*     Test the input arguments
266*
267      INFO = 0
268      MINMN = MIN( M, N )
269      WNTUA = LSAME( JOBU, 'A' )
270      WNTUS = LSAME( JOBU, 'S' )
271      WNTUAS = WNTUA .OR. WNTUS
272      WNTUO = LSAME( JOBU, 'O' )
273      WNTUN = LSAME( JOBU, 'N' )
274      WNTVA = LSAME( JOBVT, 'A' )
275      WNTVS = LSAME( JOBVT, 'S' )
276      WNTVAS = WNTVA .OR. WNTVS
277      WNTVO = LSAME( JOBVT, 'O' )
278      WNTVN = LSAME( JOBVT, 'N' )
279      LQUERY = ( LWORK.EQ.-1 )
280*
281      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
282         INFO = -1
283      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
284     $         ( WNTVO .AND. WNTUO ) ) THEN
285         INFO = -2
286      ELSE IF( M.LT.0 ) THEN
287         INFO = -3
288      ELSE IF( N.LT.0 ) THEN
289         INFO = -4
290      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
291         INFO = -6
292      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
293         INFO = -9
294      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
295     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
296         INFO = -11
297      END IF
298*
299*     Compute workspace
300*      (Note: Comments in the code beginning "Workspace:" describe the
301*       minimal amount of workspace needed at that point in the code,
302*       as well as the preferred amount for good performance.
303*       NB refers to the optimal block size for the immediately
304*       following subroutine, as returned by ILAENV.)
305*
306      IF( INFO.EQ.0 ) THEN
307         MINWRK = 1
308         MAXWRK = 1
309         IF( M.GE.N .AND. MINMN.GT.0 ) THEN
310*
311*           Compute space needed for SBDSQR
312*
313            MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
314            BDSPAC = 5*N
315*           Compute space needed for SGEQRF
316            CALL SGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
317            LWORK_SGEQRF=DUM(1)
318*           Compute space needed for SORGQR
319            CALL SORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
320            LWORK_SORGQR_N=DUM(1)
321            CALL SORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
322            LWORK_SORGQR_M=DUM(1)
323*           Compute space needed for SGEBRD
324            CALL SGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
325     $                   DUM(1), DUM(1), -1, IERR )
326            LWORK_SGEBRD=DUM(1)
327*           Compute space needed for SORGBR P
328            CALL SORGBR( 'P', N, N, N, A, LDA, DUM(1),
329     $                   DUM(1), -1, IERR )
330            LWORK_SORGBR_P=DUM(1)
331*           Compute space needed for SORGBR Q
332            CALL SORGBR( 'Q', N, N, N, A, LDA, DUM(1),
333     $                   DUM(1), -1, IERR )
334            LWORK_SORGBR_Q=DUM(1)
335*
336            IF( M.GE.MNTHR ) THEN
337               IF( WNTUN ) THEN
338*
339*                 Path 1 (M much larger than N, JOBU='N')
340*
341                  MAXWRK = N + LWORK_SGEQRF
342                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_SGEBRD )
343                  IF( WNTVO .OR. WNTVAS )
344     $               MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_P )
345                  MAXWRK = MAX( MAXWRK, BDSPAC )
346                  MINWRK = MAX( 4*N, BDSPAC )
347               ELSE IF( WNTUO .AND. WNTVN ) THEN
348*
349*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350*
351                  WRKBL = N + LWORK_SGEQRF
352                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
353                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
354                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
355                  WRKBL = MAX( WRKBL, BDSPAC )
356                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
357                  MINWRK = MAX( 3*N+M, BDSPAC )
358               ELSE IF( WNTUO .AND. WNTVAS ) THEN
359*
360*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361*                 'A')
362*
363                  WRKBL = N + LWORK_SGEQRF
364                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
365                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
366                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
367                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
368                  WRKBL = MAX( WRKBL, BDSPAC )
369                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
370                  MINWRK = MAX( 3*N+M, BDSPAC )
371               ELSE IF( WNTUS .AND. WNTVN ) THEN
372*
373*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374*
375                  WRKBL = N + LWORK_SGEQRF
376                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
377                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
378                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
379                  WRKBL = MAX( WRKBL, BDSPAC )
380                  MAXWRK = N*N + WRKBL
381                  MINWRK = MAX( 3*N+M, BDSPAC )
382               ELSE IF( WNTUS .AND. WNTVO ) THEN
383*
384*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385*
386                  WRKBL = N + LWORK_SGEQRF
387                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
388                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
389                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
390                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
391                  WRKBL = MAX( WRKBL, BDSPAC )
392                  MAXWRK = 2*N*N + WRKBL
393                  MINWRK = MAX( 3*N+M, BDSPAC )
394               ELSE IF( WNTUS .AND. WNTVAS ) THEN
395*
396*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397*                 'A')
398*
399                  WRKBL = N + LWORK_SGEQRF
400                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N )
401                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
402                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
403                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
404                  WRKBL = MAX( WRKBL, BDSPAC )
405                  MAXWRK = N*N + WRKBL
406                  MINWRK = MAX( 3*N+M, BDSPAC )
407               ELSE IF( WNTUA .AND. WNTVN ) THEN
408*
409*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410*
411                  WRKBL = N + LWORK_SGEQRF
412                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M )
413                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
414                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
415                  WRKBL = MAX( WRKBL, BDSPAC )
416                  MAXWRK = N*N + WRKBL
417                  MINWRK = MAX( 3*N+M, BDSPAC )
418               ELSE IF( WNTUA .AND. WNTVO ) THEN
419*
420*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421*
422                  WRKBL = N + LWORK_SGEQRF
423                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M )
424                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
425                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
426                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
427                  WRKBL = MAX( WRKBL, BDSPAC )
428                  MAXWRK = 2*N*N + WRKBL
429                  MINWRK = MAX( 3*N+M, BDSPAC )
430               ELSE IF( WNTUA .AND. WNTVAS ) THEN
431*
432*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433*                 'A')
434*
435                  WRKBL = N + LWORK_SGEQRF
436                  WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M )
437                  WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD )
438                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q )
439                  WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P )
440                  WRKBL = MAX( WRKBL, BDSPAC )
441                  MAXWRK = N*N + WRKBL
442                  MINWRK = MAX( 3*N+M, BDSPAC )
443               END IF
444            ELSE
445*
446*              Path 10 (M at least N, but not much larger)
447*
448               CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
449     $                   DUM(1), DUM(1), -1, IERR )
450               LWORK_SGEBRD=DUM(1)
451               MAXWRK = 3*N + LWORK_SGEBRD
452               IF( WNTUS .OR. WNTUO ) THEN
453                  CALL SORGBR( 'Q', M, N, N, A, LDA, DUM(1),
454     $                   DUM(1), -1, IERR )
455                  LWORK_SORGBR_Q=DUM(1)
456                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_Q )
457               END IF
458               IF( WNTUA ) THEN
459                  CALL SORGBR( 'Q', M, M, N, A, LDA, DUM(1),
460     $                   DUM(1), -1, IERR )
461                  LWORK_SORGBR_Q=DUM(1)
462                  MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_Q )
463               END IF
464               IF( .NOT.WNTVN ) THEN
465                 MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_P )
466               END IF
467               MAXWRK = MAX( MAXWRK, BDSPAC )
468               MINWRK = MAX( 3*N+M, BDSPAC )
469            END IF
470         ELSE IF( MINMN.GT.0 ) THEN
471*
472*           Compute space needed for SBDSQR
473*
474            MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
475            BDSPAC = 5*M
476*           Compute space needed for SGELQF
477            CALL SGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
478            LWORK_SGELQF=DUM(1)
479*           Compute space needed for SORGLQ
480            CALL SORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
481            LWORK_SORGLQ_N=DUM(1)
482            CALL SORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
483            LWORK_SORGLQ_M=DUM(1)
484*           Compute space needed for SGEBRD
485            CALL SGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
486     $                   DUM(1), DUM(1), -1, IERR )
487            LWORK_SGEBRD=DUM(1)
488*            Compute space needed for SORGBR P
489            CALL SORGBR( 'P', M, M, M, A, N, DUM(1),
490     $                   DUM(1), -1, IERR )
491            LWORK_SORGBR_P=DUM(1)
492*           Compute space needed for SORGBR Q
493            CALL SORGBR( 'Q', M, M, M, A, N, DUM(1),
494     $                   DUM(1), -1, IERR )
495            LWORK_SORGBR_Q=DUM(1)
496            IF( N.GE.MNTHR ) THEN
497               IF( WNTVN ) THEN
498*
499*                 Path 1t(N much larger than M, JOBVT='N')
500*
501                  MAXWRK = M + LWORK_SGELQF
502                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_SGEBRD )
503                  IF( WNTUO .OR. WNTUAS )
504     $               MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_Q )
505                  MAXWRK = MAX( MAXWRK, BDSPAC )
506                  MINWRK = MAX( 4*M, BDSPAC )
507               ELSE IF( WNTVO .AND. WNTUN ) THEN
508*
509*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510*
511                  WRKBL = M + LWORK_SGELQF
512                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
513                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
514                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
515                  WRKBL = MAX( WRKBL, BDSPAC )
516                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
517                  MINWRK = MAX( 3*M+N, BDSPAC )
518               ELSE IF( WNTVO .AND. WNTUAS ) THEN
519*
520*                 Path 3t(N much larger than M, JOBU='S' or 'A',
521*                 JOBVT='O')
522*
523                  WRKBL = M + LWORK_SGELQF
524                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
525                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
526                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
527                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
528                  WRKBL = MAX( WRKBL, BDSPAC )
529                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
530                  MINWRK = MAX( 3*M+N, BDSPAC )
531               ELSE IF( WNTVS .AND. WNTUN ) THEN
532*
533*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534*
535                  WRKBL = M + LWORK_SGELQF
536                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
537                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
538                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
539                  WRKBL = MAX( WRKBL, BDSPAC )
540                  MAXWRK = M*M + WRKBL
541                  MINWRK = MAX( 3*M+N, BDSPAC )
542               ELSE IF( WNTVS .AND. WNTUO ) THEN
543*
544*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545*
546                  WRKBL = M + LWORK_SGELQF
547                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
548                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
549                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
550                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
551                  WRKBL = MAX( WRKBL, BDSPAC )
552                  MAXWRK = 2*M*M + WRKBL
553                  MINWRK = MAX( 3*M+N, BDSPAC )
554                  MAXWRK = MAX( MAXWRK, MINWRK )
555               ELSE IF( WNTVS .AND. WNTUAS ) THEN
556*
557*                 Path 6t(N much larger than M, JOBU='S' or 'A',
558*                 JOBVT='S')
559*
560                  WRKBL = M + LWORK_SGELQF
561                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M )
562                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
563                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
564                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
565                  WRKBL = MAX( WRKBL, BDSPAC )
566                  MAXWRK = M*M + WRKBL
567                  MINWRK = MAX( 3*M+N, BDSPAC )
568               ELSE IF( WNTVA .AND. WNTUN ) THEN
569*
570*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
571*
572                  WRKBL = M + LWORK_SGELQF
573                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N )
574                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
575                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
576                  WRKBL = MAX( WRKBL, BDSPAC )
577                  MAXWRK = M*M + WRKBL
578                  MINWRK = MAX( 3*M+N, BDSPAC )
579               ELSE IF( WNTVA .AND. WNTUO ) THEN
580*
581*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
582*
583                  WRKBL = M + LWORK_SGELQF
584                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N )
585                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
586                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
587                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
588                  WRKBL = MAX( WRKBL, BDSPAC )
589                  MAXWRK = 2*M*M + WRKBL
590                  MINWRK = MAX( 3*M+N, BDSPAC )
591               ELSE IF( WNTVA .AND. WNTUAS ) THEN
592*
593*                 Path 9t(N much larger than M, JOBU='S' or 'A',
594*                 JOBVT='A')
595*
596                  WRKBL = M + LWORK_SGELQF
597                  WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N )
598                  WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD )
599                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P )
600                  WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q )
601                  WRKBL = MAX( WRKBL, BDSPAC )
602                  MAXWRK = M*M + WRKBL
603                  MINWRK = MAX( 3*M+N, BDSPAC )
604               END IF
605            ELSE
606*
607*              Path 10t(N greater than M, but not much larger)
608*
609               CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
610     $                   DUM(1), DUM(1), -1, IERR )
611               LWORK_SGEBRD=DUM(1)
612               MAXWRK = 3*M + LWORK_SGEBRD
613               IF( WNTVS .OR. WNTVO ) THEN
614*                Compute space needed for SORGBR P
615                 CALL SORGBR( 'P', M, N, M, A, N, DUM(1),
616     $                   DUM(1), -1, IERR )
617                 LWORK_SORGBR_P=DUM(1)
618                 MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_P )
619               END IF
620               IF( WNTVA ) THEN
621                 CALL SORGBR( 'P', N, N, M, A, N, DUM(1),
622     $                   DUM(1), -1, IERR )
623                 LWORK_SORGBR_P=DUM(1)
624                 MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_P )
625               END IF
626               IF( .NOT.WNTUN ) THEN
627                  MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_Q )
628               END IF
629               MAXWRK = MAX( MAXWRK, BDSPAC )
630               MINWRK = MAX( 3*M+N, BDSPAC )
631            END IF
632         END IF
633         MAXWRK = MAX( MAXWRK, MINWRK )
634         WORK( 1 ) = MAXWRK
635*
636         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
637            INFO = -13
638         END IF
639      END IF
640*
641      IF( INFO.NE.0 ) THEN
642         CALL XERBLA( 'SGESVD', -INFO )
643         RETURN
644      ELSE IF( LQUERY ) THEN
645         RETURN
646      END IF
647*
648*     Quick return if possible
649*
650      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
651         RETURN
652      END IF
653*
654*     Get machine constants
655*
656      EPS = SLAMCH( 'P' )
657      SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
658      BIGNUM = ONE / SMLNUM
659*
660*     Scale A if max element outside range [SMLNUM,BIGNUM]
661*
662      ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
663      ISCL = 0
664      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
665         ISCL = 1
666         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
667      ELSE IF( ANRM.GT.BIGNUM ) THEN
668         ISCL = 1
669         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
670      END IF
671*
672      IF( M.GE.N ) THEN
673*
674*        A has at least as many rows as columns. If A has sufficiently
675*        more rows than columns, first reduce using the QR
676*        decomposition (if sufficient workspace available)
677*
678         IF( M.GE.MNTHR ) THEN
679*
680            IF( WNTUN ) THEN
681*
682*              Path 1 (M much larger than N, JOBU='N')
683*              No left singular vectors to be computed
684*
685               ITAU = 1
686               IWORK = ITAU + N
687*
688*              Compute A=Q*R
689*              (Workspace: need 2*N, prefer N+N*NB)
690*
691               CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
692     $                      LWORK-IWORK+1, IERR )
693*
694*              Zero out below R
695*
696               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
697               IE = 1
698               ITAUQ = IE + N
699               ITAUP = ITAUQ + N
700               IWORK = ITAUP + N
701*
702*              Bidiagonalize R in A
703*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
704*
705               CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
706     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
707     $                      IERR )
708               NCVT = 0
709               IF( WNTVO .OR. WNTVAS ) THEN
710*
711*                 If right singular vectors desired, generate P'.
712*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713*
714                  CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
715     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
716                  NCVT = N
717               END IF
718               IWORK = IE + N
719*
720*              Perform bidiagonal QR iteration, computing right
721*              singular vectors of A in A if desired
722*              (Workspace: need BDSPAC)
723*
724               CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
725     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
726*
727*              If right singular vectors desired in VT, copy them there
728*
729               IF( WNTVAS )
730     $            CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT )
731*
732            ELSE IF( WNTUO .AND. WNTVN ) THEN
733*
734*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735*              N left singular vectors to be overwritten on A and
736*              no right singular vectors to be computed
737*
738               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
739*
740*                 Sufficient workspace for a fast algorithm
741*
742                  IR = 1
743                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
744*
745*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
746*
747                     LDWRKU = LDA
748                     LDWRKR = LDA
749                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
750*
751*                    WORK(IU) is LDA by N, WORK(IR) is N by N
752*
753                     LDWRKU = LDA
754                     LDWRKR = N
755                  ELSE
756*
757*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758*
759                     LDWRKU = ( LWORK-N*N-N ) / N
760                     LDWRKR = N
761                  END IF
762                  ITAU = IR + LDWRKR*N
763                  IWORK = ITAU + N
764*
765*                 Compute A=Q*R
766*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767*
768                  CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
769     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
770*
771*                 Copy R to WORK(IR) and zero out below it
772*
773                  CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
774                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
775     $                         LDWRKR )
776*
777*                 Generate Q in A
778*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779*
780                  CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
781     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
782                  IE = ITAU
783                  ITAUQ = IE + N
784                  ITAUP = ITAUQ + N
785                  IWORK = ITAUP + N
786*
787*                 Bidiagonalize R in WORK(IR)
788*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789*
790                  CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
791     $                         WORK( ITAUQ ), WORK( ITAUP ),
792     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
793*
794*                 Generate left vectors bidiagonalizing R
795*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796*
797                  CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
798     $                         WORK( ITAUQ ), WORK( IWORK ),
799     $                         LWORK-IWORK+1, IERR )
800                  IWORK = IE + N
801*
802*                 Perform bidiagonal QR iteration, computing left
803*                 singular vectors of R in WORK(IR)
804*                 (Workspace: need N*N+BDSPAC)
805*
806                  CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
807     $                         WORK( IR ), LDWRKR, DUM, 1,
808     $                         WORK( IWORK ), INFO )
809                  IU = IE + N
810*
811*                 Multiply Q in A by left singular vectors of R in
812*                 WORK(IR), storing result in WORK(IU) and copying to A
813*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814*
815                  DO 10 I = 1, M, LDWRKU
816                     CHUNK = MIN( M-I+1, LDWRKU )
817                     CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
818     $                           LDA, WORK( IR ), LDWRKR, ZERO,
819     $                           WORK( IU ), LDWRKU )
820                     CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
821     $                            A( I, 1 ), LDA )
822   10             CONTINUE
823*
824               ELSE
825*
826*                 Insufficient workspace for a fast algorithm
827*
828                  IE = 1
829                  ITAUQ = IE + N
830                  ITAUP = ITAUQ + N
831                  IWORK = ITAUP + N
832*
833*                 Bidiagonalize A
834*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835*
836                  CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
837     $                         WORK( ITAUQ ), WORK( ITAUP ),
838     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
839*
840*                 Generate left vectors bidiagonalizing A
841*                 (Workspace: need 4*N, prefer 3*N+N*NB)
842*
843                  CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
844     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
845                  IWORK = IE + N
846*
847*                 Perform bidiagonal QR iteration, computing left
848*                 singular vectors of A in A
849*                 (Workspace: need BDSPAC)
850*
851                  CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
852     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
853*
854               END IF
855*
856            ELSE IF( WNTUO .AND. WNTVAS ) THEN
857*
858*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859*              N left singular vectors to be overwritten on A and
860*              N right singular vectors to be computed in VT
861*
862               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
863*
864*                 Sufficient workspace for a fast algorithm
865*
866                  IR = 1
867                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
868*
869*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
870*
871                     LDWRKU = LDA
872                     LDWRKR = LDA
873                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
874*
875*                    WORK(IU) is LDA by N and WORK(IR) is N by N
876*
877                     LDWRKU = LDA
878                     LDWRKR = N
879                  ELSE
880*
881*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882*
883                     LDWRKU = ( LWORK-N*N-N ) / N
884                     LDWRKR = N
885                  END IF
886                  ITAU = IR + LDWRKR*N
887                  IWORK = ITAU + N
888*
889*                 Compute A=Q*R
890*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891*
892                  CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
893     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
894*
895*                 Copy R to VT, zeroing out below it
896*
897                  CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
898                  IF( N.GT.1 )
899     $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
900     $                            VT( 2, 1 ), LDVT )
901*
902*                 Generate Q in A
903*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904*
905                  CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
906     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
907                  IE = ITAU
908                  ITAUQ = IE + N
909                  ITAUP = ITAUQ + N
910                  IWORK = ITAUP + N
911*
912*                 Bidiagonalize R in VT, copying result to WORK(IR)
913*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914*
915                  CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
916     $                         WORK( ITAUQ ), WORK( ITAUP ),
917     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
918                  CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
919*
920*                 Generate left vectors bidiagonalizing R in WORK(IR)
921*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922*
923                  CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
924     $                         WORK( ITAUQ ), WORK( IWORK ),
925     $                         LWORK-IWORK+1, IERR )
926*
927*                 Generate right vectors bidiagonalizing R in VT
928*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929*
930                  CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
931     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
932                  IWORK = IE + N
933*
934*                 Perform bidiagonal QR iteration, computing left
935*                 singular vectors of R in WORK(IR) and computing right
936*                 singular vectors of R in VT
937*                 (Workspace: need N*N+BDSPAC)
938*
939                  CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
940     $                         WORK( IR ), LDWRKR, DUM, 1,
941     $                         WORK( IWORK ), INFO )
942                  IU = IE + N
943*
944*                 Multiply Q in A by left singular vectors of R in
945*                 WORK(IR), storing result in WORK(IU) and copying to A
946*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947*
948                  DO 20 I = 1, M, LDWRKU
949                     CHUNK = MIN( M-I+1, LDWRKU )
950                     CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
951     $                           LDA, WORK( IR ), LDWRKR, ZERO,
952     $                           WORK( IU ), LDWRKU )
953                     CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
954     $                            A( I, 1 ), LDA )
955   20             CONTINUE
956*
957               ELSE
958*
959*                 Insufficient workspace for a fast algorithm
960*
961                  ITAU = 1
962                  IWORK = ITAU + N
963*
964*                 Compute A=Q*R
965*                 (Workspace: need 2*N, prefer N+N*NB)
966*
967                  CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
968     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
969*
970*                 Copy R to VT, zeroing out below it
971*
972                  CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
973                  IF( N.GT.1 )
974     $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
975     $                            VT( 2, 1 ), LDVT )
976*
977*                 Generate Q in A
978*                 (Workspace: need 2*N, prefer N+N*NB)
979*
980                  CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
981     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
982                  IE = ITAU
983                  ITAUQ = IE + N
984                  ITAUP = ITAUQ + N
985                  IWORK = ITAUP + N
986*
987*                 Bidiagonalize R in VT
988*                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
989*
990                  CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
991     $                         WORK( ITAUQ ), WORK( ITAUP ),
992     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
993*
994*                 Multiply Q in A by left vectors bidiagonalizing R
995*                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
996*
997                  CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
998     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
999     $                         LWORK-IWORK+1, IERR )
1000*
1001*                 Generate right vectors bidiagonalizing R in VT
1002*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003*
1004                  CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1005     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
1006                  IWORK = IE + N
1007*
1008*                 Perform bidiagonal QR iteration, computing left
1009*                 singular vectors of A in A and computing right
1010*                 singular vectors of A in VT
1011*                 (Workspace: need BDSPAC)
1012*
1013                  CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
1014     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
1015*
1016               END IF
1017*
1018            ELSE IF( WNTUS ) THEN
1019*
1020               IF( WNTVN ) THEN
1021*
1022*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023*                 N left singular vectors to be computed in U and
1024*                 no right singular vectors to be computed
1025*
1026                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1027*
1028*                    Sufficient workspace for a fast algorithm
1029*
1030                     IR = 1
1031                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1032*
1033*                       WORK(IR) is LDA by N
1034*
1035                        LDWRKR = LDA
1036                     ELSE
1037*
1038*                       WORK(IR) is N by N
1039*
1040                        LDWRKR = N
1041                     END IF
1042                     ITAU = IR + LDWRKR*N
1043                     IWORK = ITAU + N
1044*
1045*                    Compute A=Q*R
1046*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047*
1048                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1049     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1050*
1051*                    Copy R to WORK(IR), zeroing out below it
1052*
1053                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
1054     $                            LDWRKR )
1055                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1056     $                            WORK( IR+1 ), LDWRKR )
1057*
1058*                    Generate Q in A
1059*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060*
1061                     CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1062     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1063                     IE = ITAU
1064                     ITAUQ = IE + N
1065                     ITAUP = ITAUQ + N
1066                     IWORK = ITAUP + N
1067*
1068*                    Bidiagonalize R in WORK(IR)
1069*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070*
1071                     CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
1072     $                            WORK( IE ), WORK( ITAUQ ),
1073     $                            WORK( ITAUP ), WORK( IWORK ),
1074     $                            LWORK-IWORK+1, IERR )
1075*
1076*                    Generate left vectors bidiagonalizing R in WORK(IR)
1077*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078*
1079                     CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1080     $                            WORK( ITAUQ ), WORK( IWORK ),
1081     $                            LWORK-IWORK+1, IERR )
1082                     IWORK = IE + N
1083*
1084*                    Perform bidiagonal QR iteration, computing left
1085*                    singular vectors of R in WORK(IR)
1086*                    (Workspace: need N*N+BDSPAC)
1087*
1088                     CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1089     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1090     $                            WORK( IWORK ), INFO )
1091*
1092*                    Multiply Q in A by left singular vectors of R in
1093*                    WORK(IR), storing result in U
1094*                    (Workspace: need N*N)
1095*
1096                     CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1097     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1098*
1099                  ELSE
1100*
1101*                    Insufficient workspace for a fast algorithm
1102*
1103                     ITAU = 1
1104                     IWORK = ITAU + N
1105*
1106*                    Compute A=Q*R, copying result to U
1107*                    (Workspace: need 2*N, prefer N+N*NB)
1108*
1109                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1110     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1111                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1112*
1113*                    Generate Q in U
1114*                    (Workspace: need 2*N, prefer N+N*NB)
1115*
1116                     CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1117     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1118                     IE = ITAU
1119                     ITAUQ = IE + N
1120                     ITAUP = ITAUQ + N
1121                     IWORK = ITAUP + N
1122*
1123*                    Zero out below R in A
1124*
1125                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1126     $                            LDA )
1127*
1128*                    Bidiagonalize R in A
1129*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1130*
1131                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1132     $                            WORK( ITAUQ ), WORK( ITAUP ),
1133     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1134*
1135*                    Multiply Q in U by left vectors bidiagonalizing R
1136*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1137*
1138                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1139     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1140     $                            LWORK-IWORK+1, IERR )
1141                     IWORK = IE + N
1142*
1143*                    Perform bidiagonal QR iteration, computing left
1144*                    singular vectors of A in U
1145*                    (Workspace: need BDSPAC)
1146*
1147                     CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1148     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1149     $                            INFO )
1150*
1151                  END IF
1152*
1153               ELSE IF( WNTVO ) THEN
1154*
1155*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1156*                 N left singular vectors to be computed in U and
1157*                 N right singular vectors to be overwritten on A
1158*
1159                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1160*
1161*                    Sufficient workspace for a fast algorithm
1162*
1163                     IU = 1
1164                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1165*
1166*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1167*
1168                        LDWRKU = LDA
1169                        IR = IU + LDWRKU*N
1170                        LDWRKR = LDA
1171                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1172*
1173*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1174*
1175                        LDWRKU = LDA
1176                        IR = IU + LDWRKU*N
1177                        LDWRKR = N
1178                     ELSE
1179*
1180*                       WORK(IU) is N by N and WORK(IR) is N by N
1181*
1182                        LDWRKU = N
1183                        IR = IU + LDWRKU*N
1184                        LDWRKR = N
1185                     END IF
1186                     ITAU = IR + LDWRKR*N
1187                     IWORK = ITAU + N
1188*
1189*                    Compute A=Q*R
1190*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1191*
1192                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1193     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1194*
1195*                    Copy R to WORK(IU), zeroing out below it
1196*
1197                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1198     $                            LDWRKU )
1199                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1200     $                            WORK( IU+1 ), LDWRKU )
1201*
1202*                    Generate Q in A
1203*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1204*
1205                     CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1206     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1207                     IE = ITAU
1208                     ITAUQ = IE + N
1209                     ITAUP = ITAUQ + N
1210                     IWORK = ITAUP + N
1211*
1212*                    Bidiagonalize R in WORK(IU), copying result to
1213*                    WORK(IR)
1214*                    (Workspace: need 2*N*N+4*N,
1215*                                prefer 2*N*N+3*N+2*N*NB)
1216*
1217                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1218     $                            WORK( IE ), WORK( ITAUQ ),
1219     $                            WORK( ITAUP ), WORK( IWORK ),
1220     $                            LWORK-IWORK+1, IERR )
1221                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1222     $                            WORK( IR ), LDWRKR )
1223*
1224*                    Generate left bidiagonalizing vectors in WORK(IU)
1225*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1226*
1227                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1228     $                            WORK( ITAUQ ), WORK( IWORK ),
1229     $                            LWORK-IWORK+1, IERR )
1230*
1231*                    Generate right bidiagonalizing vectors in WORK(IR)
1232*                    (Workspace: need 2*N*N+4*N-1,
1233*                                prefer 2*N*N+3*N+(N-1)*NB)
1234*
1235                     CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1236     $                            WORK( ITAUP ), WORK( IWORK ),
1237     $                            LWORK-IWORK+1, IERR )
1238                     IWORK = IE + N
1239*
1240*                    Perform bidiagonal QR iteration, computing left
1241*                    singular vectors of R in WORK(IU) and computing
1242*                    right singular vectors of R in WORK(IR)
1243*                    (Workspace: need 2*N*N+BDSPAC)
1244*
1245                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1246     $                            WORK( IR ), LDWRKR, WORK( IU ),
1247     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1248*
1249*                    Multiply Q in A by left singular vectors of R in
1250*                    WORK(IU), storing result in U
1251*                    (Workspace: need N*N)
1252*
1253                     CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1254     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1255*
1256*                    Copy right singular vectors of R to A
1257*                    (Workspace: need N*N)
1258*
1259                     CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1260     $                            LDA )
1261*
1262                  ELSE
1263*
1264*                    Insufficient workspace for a fast algorithm
1265*
1266                     ITAU = 1
1267                     IWORK = ITAU + N
1268*
1269*                    Compute A=Q*R, copying result to U
1270*                    (Workspace: need 2*N, prefer N+N*NB)
1271*
1272                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1273     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1274                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1275*
1276*                    Generate Q in U
1277*                    (Workspace: need 2*N, prefer N+N*NB)
1278*
1279                     CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1280     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1281                     IE = ITAU
1282                     ITAUQ = IE + N
1283                     ITAUP = ITAUQ + N
1284                     IWORK = ITAUP + N
1285*
1286*                    Zero out below R in A
1287*
1288                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1289     $                            LDA )
1290*
1291*                    Bidiagonalize R in A
1292*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1293*
1294                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1295     $                            WORK( ITAUQ ), WORK( ITAUP ),
1296     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1297*
1298*                    Multiply Q in U by left vectors bidiagonalizing R
1299*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1300*
1301                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1302     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1303     $                            LWORK-IWORK+1, IERR )
1304*
1305*                    Generate right vectors bidiagonalizing R in A
1306*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1307*
1308                     CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1309     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1310                     IWORK = IE + N
1311*
1312*                    Perform bidiagonal QR iteration, computing left
1313*                    singular vectors of A in U and computing right
1314*                    singular vectors of A in A
1315*                    (Workspace: need BDSPAC)
1316*
1317                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1318     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1319     $                            INFO )
1320*
1321                  END IF
1322*
1323               ELSE IF( WNTVAS ) THEN
1324*
1325*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1326*                         or 'A')
1327*                 N left singular vectors to be computed in U and
1328*                 N right singular vectors to be computed in VT
1329*
1330                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1331*
1332*                    Sufficient workspace for a fast algorithm
1333*
1334                     IU = 1
1335                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1336*
1337*                       WORK(IU) is LDA by N
1338*
1339                        LDWRKU = LDA
1340                     ELSE
1341*
1342*                       WORK(IU) is N by N
1343*
1344                        LDWRKU = N
1345                     END IF
1346                     ITAU = IU + LDWRKU*N
1347                     IWORK = ITAU + N
1348*
1349*                    Compute A=Q*R
1350*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1351*
1352                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1353     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1354*
1355*                    Copy R to WORK(IU), zeroing out below it
1356*
1357                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1358     $                            LDWRKU )
1359                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1360     $                            WORK( IU+1 ), LDWRKU )
1361*
1362*                    Generate Q in A
1363*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1364*
1365                     CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1366     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1367                     IE = ITAU
1368                     ITAUQ = IE + N
1369                     ITAUP = ITAUQ + N
1370                     IWORK = ITAUP + N
1371*
1372*                    Bidiagonalize R in WORK(IU), copying result to VT
1373*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1374*
1375                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1376     $                            WORK( IE ), WORK( ITAUQ ),
1377     $                            WORK( ITAUP ), WORK( IWORK ),
1378     $                            LWORK-IWORK+1, IERR )
1379                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1380     $                            LDVT )
1381*
1382*                    Generate left bidiagonalizing vectors in WORK(IU)
1383*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1384*
1385                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1386     $                            WORK( ITAUQ ), WORK( IWORK ),
1387     $                            LWORK-IWORK+1, IERR )
1388*
1389*                    Generate right bidiagonalizing vectors in VT
1390*                    (Workspace: need N*N+4*N-1,
1391*                                prefer N*N+3*N+(N-1)*NB)
1392*
1393                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1394     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1395                     IWORK = IE + N
1396*
1397*                    Perform bidiagonal QR iteration, computing left
1398*                    singular vectors of R in WORK(IU) and computing
1399*                    right singular vectors of R in VT
1400*                    (Workspace: need N*N+BDSPAC)
1401*
1402                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1403     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1404     $                            WORK( IWORK ), INFO )
1405*
1406*                    Multiply Q in A by left singular vectors of R in
1407*                    WORK(IU), storing result in U
1408*                    (Workspace: need N*N)
1409*
1410                     CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1411     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1412*
1413                  ELSE
1414*
1415*                    Insufficient workspace for a fast algorithm
1416*
1417                     ITAU = 1
1418                     IWORK = ITAU + N
1419*
1420*                    Compute A=Q*R, copying result to U
1421*                    (Workspace: need 2*N, prefer N+N*NB)
1422*
1423                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1424     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1425                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1426*
1427*                    Generate Q in U
1428*                    (Workspace: need 2*N, prefer N+N*NB)
1429*
1430                     CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1431     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1432*
1433*                    Copy R to VT, zeroing out below it
1434*
1435                     CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1436                     IF( N.GT.1 )
1437     $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1438     $                               VT( 2, 1 ), LDVT )
1439                     IE = ITAU
1440                     ITAUQ = IE + N
1441                     ITAUP = ITAUQ + N
1442                     IWORK = ITAUP + N
1443*
1444*                    Bidiagonalize R in VT
1445*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1446*
1447                     CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1448     $                            WORK( ITAUQ ), WORK( ITAUP ),
1449     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1450*
1451*                    Multiply Q in U by left bidiagonalizing vectors
1452*                    in VT
1453*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1454*
1455                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1456     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1457     $                            LWORK-IWORK+1, IERR )
1458*
1459*                    Generate right bidiagonalizing vectors in VT
1460*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1461*
1462                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1463     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1464                     IWORK = IE + N
1465*
1466*                    Perform bidiagonal QR iteration, computing left
1467*                    singular vectors of A in U and computing right
1468*                    singular vectors of A in VT
1469*                    (Workspace: need BDSPAC)
1470*
1471                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1472     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1473     $                            INFO )
1474*
1475                  END IF
1476*
1477               END IF
1478*
1479            ELSE IF( WNTUA ) THEN
1480*
1481               IF( WNTVN ) THEN
1482*
1483*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1484*                 M left singular vectors to be computed in U and
1485*                 no right singular vectors to be computed
1486*
1487                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1488*
1489*                    Sufficient workspace for a fast algorithm
1490*
1491                     IR = 1
1492                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1493*
1494*                       WORK(IR) is LDA by N
1495*
1496                        LDWRKR = LDA
1497                     ELSE
1498*
1499*                       WORK(IR) is N by N
1500*
1501                        LDWRKR = N
1502                     END IF
1503                     ITAU = IR + LDWRKR*N
1504                     IWORK = ITAU + N
1505*
1506*                    Compute A=Q*R, copying result to U
1507*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1508*
1509                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1510     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1511                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1512*
1513*                    Copy R to WORK(IR), zeroing out below it
1514*
1515                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
1516     $                            LDWRKR )
1517                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1518     $                            WORK( IR+1 ), LDWRKR )
1519*
1520*                    Generate Q in U
1521*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1522*
1523                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1524     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1525                     IE = ITAU
1526                     ITAUQ = IE + N
1527                     ITAUP = ITAUQ + N
1528                     IWORK = ITAUP + N
1529*
1530*                    Bidiagonalize R in WORK(IR)
1531*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1532*
1533                     CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
1534     $                            WORK( IE ), WORK( ITAUQ ),
1535     $                            WORK( ITAUP ), WORK( IWORK ),
1536     $                            LWORK-IWORK+1, IERR )
1537*
1538*                    Generate left bidiagonalizing vectors in WORK(IR)
1539*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1540*
1541                     CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1542     $                            WORK( ITAUQ ), WORK( IWORK ),
1543     $                            LWORK-IWORK+1, IERR )
1544                     IWORK = IE + N
1545*
1546*                    Perform bidiagonal QR iteration, computing left
1547*                    singular vectors of R in WORK(IR)
1548*                    (Workspace: need N*N+BDSPAC)
1549*
1550                     CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1551     $                            1, WORK( IR ), LDWRKR, DUM, 1,
1552     $                            WORK( IWORK ), INFO )
1553*
1554*                    Multiply Q in U by left singular vectors of R in
1555*                    WORK(IR), storing result in A
1556*                    (Workspace: need N*N)
1557*
1558                     CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1559     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1560*
1561*                    Copy left singular vectors of A from A to U
1562*
1563                     CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1564*
1565                  ELSE
1566*
1567*                    Insufficient workspace for a fast algorithm
1568*
1569                     ITAU = 1
1570                     IWORK = ITAU + N
1571*
1572*                    Compute A=Q*R, copying result to U
1573*                    (Workspace: need 2*N, prefer N+N*NB)
1574*
1575                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1576     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1577                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1578*
1579*                    Generate Q in U
1580*                    (Workspace: need N+M, prefer N+M*NB)
1581*
1582                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1583     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1584                     IE = ITAU
1585                     ITAUQ = IE + N
1586                     ITAUP = ITAUQ + N
1587                     IWORK = ITAUP + N
1588*
1589*                    Zero out below R in A
1590*
1591                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1592     $                            LDA )
1593*
1594*                    Bidiagonalize R in A
1595*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1596*
1597                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1598     $                            WORK( ITAUQ ), WORK( ITAUP ),
1599     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1600*
1601*                    Multiply Q in U by left bidiagonalizing vectors
1602*                    in A
1603*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1604*
1605                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1606     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1607     $                            LWORK-IWORK+1, IERR )
1608                     IWORK = IE + N
1609*
1610*                    Perform bidiagonal QR iteration, computing left
1611*                    singular vectors of A in U
1612*                    (Workspace: need BDSPAC)
1613*
1614                     CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1615     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1616     $                            INFO )
1617*
1618                  END IF
1619*
1620               ELSE IF( WNTVO ) THEN
1621*
1622*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1623*                 M left singular vectors to be computed in U and
1624*                 N right singular vectors to be overwritten on A
1625*
1626                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1627*
1628*                    Sufficient workspace for a fast algorithm
1629*
1630                     IU = 1
1631                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1632*
1633*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1634*
1635                        LDWRKU = LDA
1636                        IR = IU + LDWRKU*N
1637                        LDWRKR = LDA
1638                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1639*
1640*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1641*
1642                        LDWRKU = LDA
1643                        IR = IU + LDWRKU*N
1644                        LDWRKR = N
1645                     ELSE
1646*
1647*                       WORK(IU) is N by N and WORK(IR) is N by N
1648*
1649                        LDWRKU = N
1650                        IR = IU + LDWRKU*N
1651                        LDWRKR = N
1652                     END IF
1653                     ITAU = IR + LDWRKR*N
1654                     IWORK = ITAU + N
1655*
1656*                    Compute A=Q*R, copying result to U
1657*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1658*
1659                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1660     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1661                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1662*
1663*                    Generate Q in U
1664*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1665*
1666                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1667     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1668*
1669*                    Copy R to WORK(IU), zeroing out below it
1670*
1671                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1672     $                            LDWRKU )
1673                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1674     $                            WORK( IU+1 ), LDWRKU )
1675                     IE = ITAU
1676                     ITAUQ = IE + N
1677                     ITAUP = ITAUQ + N
1678                     IWORK = ITAUP + N
1679*
1680*                    Bidiagonalize R in WORK(IU), copying result to
1681*                    WORK(IR)
1682*                    (Workspace: need 2*N*N+4*N,
1683*                                prefer 2*N*N+3*N+2*N*NB)
1684*
1685                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1686     $                            WORK( IE ), WORK( ITAUQ ),
1687     $                            WORK( ITAUP ), WORK( IWORK ),
1688     $                            LWORK-IWORK+1, IERR )
1689                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1690     $                            WORK( IR ), LDWRKR )
1691*
1692*                    Generate left bidiagonalizing vectors in WORK(IU)
1693*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1694*
1695                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1696     $                            WORK( ITAUQ ), WORK( IWORK ),
1697     $                            LWORK-IWORK+1, IERR )
1698*
1699*                    Generate right bidiagonalizing vectors in WORK(IR)
1700*                    (Workspace: need 2*N*N+4*N-1,
1701*                                prefer 2*N*N+3*N+(N-1)*NB)
1702*
1703                     CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1704     $                            WORK( ITAUP ), WORK( IWORK ),
1705     $                            LWORK-IWORK+1, IERR )
1706                     IWORK = IE + N
1707*
1708*                    Perform bidiagonal QR iteration, computing left
1709*                    singular vectors of R in WORK(IU) and computing
1710*                    right singular vectors of R in WORK(IR)
1711*                    (Workspace: need 2*N*N+BDSPAC)
1712*
1713                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1714     $                            WORK( IR ), LDWRKR, WORK( IU ),
1715     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1716*
1717*                    Multiply Q in U by left singular vectors of R in
1718*                    WORK(IU), storing result in A
1719*                    (Workspace: need N*N)
1720*
1721                     CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1722     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1723*
1724*                    Copy left singular vectors of A from A to U
1725*
1726                     CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1727*
1728*                    Copy right singular vectors of R from WORK(IR) to A
1729*
1730                     CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1731     $                            LDA )
1732*
1733                  ELSE
1734*
1735*                    Insufficient workspace for a fast algorithm
1736*
1737                     ITAU = 1
1738                     IWORK = ITAU + N
1739*
1740*                    Compute A=Q*R, copying result to U
1741*                    (Workspace: need 2*N, prefer N+N*NB)
1742*
1743                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1744     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1745                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1746*
1747*                    Generate Q in U
1748*                    (Workspace: need N+M, prefer N+M*NB)
1749*
1750                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1751     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1752                     IE = ITAU
1753                     ITAUQ = IE + N
1754                     ITAUP = ITAUQ + N
1755                     IWORK = ITAUP + N
1756*
1757*                    Zero out below R in A
1758*
1759                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1760     $                            LDA )
1761*
1762*                    Bidiagonalize R in A
1763*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1764*
1765                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1766     $                            WORK( ITAUQ ), WORK( ITAUP ),
1767     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1768*
1769*                    Multiply Q in U by left bidiagonalizing vectors
1770*                    in A
1771*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1772*
1773                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1774     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1775     $                            LWORK-IWORK+1, IERR )
1776*
1777*                    Generate right bidiagonalizing vectors in A
1778*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1779*
1780                     CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1781     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1782                     IWORK = IE + N
1783*
1784*                    Perform bidiagonal QR iteration, computing left
1785*                    singular vectors of A in U and computing right
1786*                    singular vectors of A in A
1787*                    (Workspace: need BDSPAC)
1788*
1789                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1790     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1791     $                            INFO )
1792*
1793                  END IF
1794*
1795               ELSE IF( WNTVAS ) THEN
1796*
1797*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1798*                         or 'A')
1799*                 M left singular vectors to be computed in U and
1800*                 N right singular vectors to be computed in VT
1801*
1802                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1803*
1804*                    Sufficient workspace for a fast algorithm
1805*
1806                     IU = 1
1807                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1808*
1809*                       WORK(IU) is LDA by N
1810*
1811                        LDWRKU = LDA
1812                     ELSE
1813*
1814*                       WORK(IU) is N by N
1815*
1816                        LDWRKU = N
1817                     END IF
1818                     ITAU = IU + LDWRKU*N
1819                     IWORK = ITAU + N
1820*
1821*                    Compute A=Q*R, copying result to U
1822*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1823*
1824                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1825     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1826                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1827*
1828*                    Generate Q in U
1829*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1830*
1831                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1832     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1833*
1834*                    Copy R to WORK(IU), zeroing out below it
1835*
1836                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1837     $                            LDWRKU )
1838                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1839     $                            WORK( IU+1 ), LDWRKU )
1840                     IE = ITAU
1841                     ITAUQ = IE + N
1842                     ITAUP = ITAUQ + N
1843                     IWORK = ITAUP + N
1844*
1845*                    Bidiagonalize R in WORK(IU), copying result to VT
1846*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1847*
1848                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1849     $                            WORK( IE ), WORK( ITAUQ ),
1850     $                            WORK( ITAUP ), WORK( IWORK ),
1851     $                            LWORK-IWORK+1, IERR )
1852                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1853     $                            LDVT )
1854*
1855*                    Generate left bidiagonalizing vectors in WORK(IU)
1856*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1857*
1858                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1859     $                            WORK( ITAUQ ), WORK( IWORK ),
1860     $                            LWORK-IWORK+1, IERR )
1861*
1862*                    Generate right bidiagonalizing vectors in VT
1863*                    (Workspace: need N*N+4*N-1,
1864*                                prefer N*N+3*N+(N-1)*NB)
1865*
1866                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1867     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1868                     IWORK = IE + N
1869*
1870*                    Perform bidiagonal QR iteration, computing left
1871*                    singular vectors of R in WORK(IU) and computing
1872*                    right singular vectors of R in VT
1873*                    (Workspace: need N*N+BDSPAC)
1874*
1875                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1876     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1877     $                            WORK( IWORK ), INFO )
1878*
1879*                    Multiply Q in U by left singular vectors of R in
1880*                    WORK(IU), storing result in A
1881*                    (Workspace: need N*N)
1882*
1883                     CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1884     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1885*
1886*                    Copy left singular vectors of A from A to U
1887*
1888                     CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1889*
1890                  ELSE
1891*
1892*                    Insufficient workspace for a fast algorithm
1893*
1894                     ITAU = 1
1895                     IWORK = ITAU + N
1896*
1897*                    Compute A=Q*R, copying result to U
1898*                    (Workspace: need 2*N, prefer N+N*NB)
1899*
1900                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1901     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1902                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1903*
1904*                    Generate Q in U
1905*                    (Workspace: need N+M, prefer N+M*NB)
1906*
1907                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1908     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1909*
1910*                    Copy R from A to VT, zeroing out below it
1911*
1912                     CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1913                     IF( N.GT.1 )
1914     $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1915     $                               VT( 2, 1 ), LDVT )
1916                     IE = ITAU
1917                     ITAUQ = IE + N
1918                     ITAUP = ITAUQ + N
1919                     IWORK = ITAUP + N
1920*
1921*                    Bidiagonalize R in VT
1922*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1923*
1924                     CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1925     $                            WORK( ITAUQ ), WORK( ITAUP ),
1926     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1927*
1928*                    Multiply Q in U by left bidiagonalizing vectors
1929*                    in VT
1930*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1931*
1932                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1933     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1934     $                            LWORK-IWORK+1, IERR )
1935*
1936*                    Generate right bidiagonalizing vectors in VT
1937*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1938*
1939                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1940     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1941                     IWORK = IE + N
1942*
1943*                    Perform bidiagonal QR iteration, computing left
1944*                    singular vectors of A in U and computing right
1945*                    singular vectors of A in VT
1946*                    (Workspace: need BDSPAC)
1947*
1948                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1949     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1950     $                            INFO )
1951*
1952                  END IF
1953*
1954               END IF
1955*
1956            END IF
1957*
1958         ELSE
1959*
1960*           M .LT. MNTHR
1961*
1962*           Path 10 (M at least N, but not much larger)
1963*           Reduce to bidiagonal form without QR decomposition
1964*
1965            IE = 1
1966            ITAUQ = IE + N
1967            ITAUP = ITAUQ + N
1968            IWORK = ITAUP + N
1969*
1970*           Bidiagonalize A
1971*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1972*
1973            CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1974     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1975     $                   IERR )
1976            IF( WNTUAS ) THEN
1977*
1978*              If left singular vectors desired in U, copy result to U
1979*              and generate left bidiagonalizing vectors in U
1980*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1981*
1982               CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1983               IF( WNTUS )
1984     $            NCU = N
1985               IF( WNTUA )
1986     $            NCU = M
1987               CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1988     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1989            END IF
1990            IF( WNTVAS ) THEN
1991*
1992*              If right singular vectors desired in VT, copy result to
1993*              VT and generate right bidiagonalizing vectors in VT
1994*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1995*
1996               CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1997               CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1998     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1999            END IF
2000            IF( WNTUO ) THEN
2001*
2002*              If left singular vectors desired in A, generate left
2003*              bidiagonalizing vectors in A
2004*              (Workspace: need 4*N, prefer 3*N+N*NB)
2005*
2006               CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2007     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2008            END IF
2009            IF( WNTVO ) THEN
2010*
2011*              If right singular vectors desired in A, generate right
2012*              bidiagonalizing vectors in A
2013*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2014*
2015               CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2016     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2017            END IF
2018            IWORK = IE + N
2019            IF( WNTUAS .OR. WNTUO )
2020     $         NRU = M
2021            IF( WNTUN )
2022     $         NRU = 0
2023            IF( WNTVAS .OR. WNTVO )
2024     $         NCVT = N
2025            IF( WNTVN )
2026     $         NCVT = 0
2027            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2028*
2029*              Perform bidiagonal QR iteration, if desired, computing
2030*              left singular vectors in U and computing right singular
2031*              vectors in VT
2032*              (Workspace: need BDSPAC)
2033*
2034               CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2035     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
2036            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2037*
2038*              Perform bidiagonal QR iteration, if desired, computing
2039*              left singular vectors in U and computing right singular
2040*              vectors in A
2041*              (Workspace: need BDSPAC)
2042*
2043               CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
2044     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
2045            ELSE
2046*
2047*              Perform bidiagonal QR iteration, if desired, computing
2048*              left singular vectors in A and computing right singular
2049*              vectors in VT
2050*              (Workspace: need BDSPAC)
2051*
2052               CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
2053     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
2054            END IF
2055*
2056         END IF
2057*
2058      ELSE
2059*
2060*        A has more columns than rows. If A has sufficiently more
2061*        columns than rows, first reduce using the LQ decomposition (if
2062*        sufficient workspace available)
2063*
2064         IF( N.GE.MNTHR ) THEN
2065*
2066            IF( WNTVN ) THEN
2067*
2068*              Path 1t(N much larger than M, JOBVT='N')
2069*              No right singular vectors to be computed
2070*
2071               ITAU = 1
2072               IWORK = ITAU + M
2073*
2074*              Compute A=L*Q
2075*              (Workspace: need 2*M, prefer M+M*NB)
2076*
2077               CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2078     $                      LWORK-IWORK+1, IERR )
2079*
2080*              Zero out above L
2081*
2082               CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
2083               IE = 1
2084               ITAUQ = IE + M
2085               ITAUP = ITAUQ + M
2086               IWORK = ITAUP + M
2087*
2088*              Bidiagonalize L in A
2089*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
2090*
2091               CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2092     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2093     $                      IERR )
2094               IF( WNTUO .OR. WNTUAS ) THEN
2095*
2096*                 If left singular vectors desired, generate Q
2097*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2098*
2099                  CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2100     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2101               END IF
2102               IWORK = IE + M
2103               NRU = 0
2104               IF( WNTUO .OR. WNTUAS )
2105     $            NRU = M
2106*
2107*              Perform bidiagonal QR iteration, computing left singular
2108*              vectors of A in A if desired
2109*              (Workspace: need BDSPAC)
2110*
2111               CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2112     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2113*
2114*              If left singular vectors desired in U, copy them there
2115*
2116               IF( WNTUAS )
2117     $            CALL SLACPY( 'F', M, M, A, LDA, U, LDU )
2118*
2119            ELSE IF( WNTVO .AND. WNTUN ) THEN
2120*
2121*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2122*              M right singular vectors to be overwritten on A and
2123*              no left singular vectors to be computed
2124*
2125               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2126*
2127*                 Sufficient workspace for a fast algorithm
2128*
2129                  IR = 1
2130                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2131*
2132*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2133*
2134                     LDWRKU = LDA
2135                     CHUNK = N
2136                     LDWRKR = LDA
2137                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2138*
2139*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2140*
2141                     LDWRKU = LDA
2142                     CHUNK = N
2143                     LDWRKR = M
2144                  ELSE
2145*
2146*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2147*
2148                     LDWRKU = M
2149                     CHUNK = ( LWORK-M*M-M ) / M
2150                     LDWRKR = M
2151                  END IF
2152                  ITAU = IR + LDWRKR*M
2153                  IWORK = ITAU + M
2154*
2155*                 Compute A=L*Q
2156*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2157*
2158                  CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2159     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2160*
2161*                 Copy L to WORK(IR) and zero out above it
2162*
2163                  CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2164                  CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2165     $                         WORK( IR+LDWRKR ), LDWRKR )
2166*
2167*                 Generate Q in A
2168*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2169*
2170                  CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2171     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2172                  IE = ITAU
2173                  ITAUQ = IE + M
2174                  ITAUP = ITAUQ + M
2175                  IWORK = ITAUP + M
2176*
2177*                 Bidiagonalize L in WORK(IR)
2178*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2179*
2180                  CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2181     $                         WORK( ITAUQ ), WORK( ITAUP ),
2182     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2183*
2184*                 Generate right vectors bidiagonalizing L
2185*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2186*
2187                  CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2188     $                         WORK( ITAUP ), WORK( IWORK ),
2189     $                         LWORK-IWORK+1, IERR )
2190                  IWORK = IE + M
2191*
2192*                 Perform bidiagonal QR iteration, computing right
2193*                 singular vectors of L in WORK(IR)
2194*                 (Workspace: need M*M+BDSPAC)
2195*
2196                  CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2197     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2198     $                         WORK( IWORK ), INFO )
2199                  IU = IE + M
2200*
2201*                 Multiply right singular vectors of L in WORK(IR) by Q
2202*                 in A, storing result in WORK(IU) and copying to A
2203*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2204*
2205                  DO 30 I = 1, N, CHUNK
2206                     BLK = MIN( N-I+1, CHUNK )
2207                     CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2208     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2209     $                           WORK( IU ), LDWRKU )
2210                     CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2211     $                            A( 1, I ), LDA )
2212   30             CONTINUE
2213*
2214               ELSE
2215*
2216*                 Insufficient workspace for a fast algorithm
2217*
2218                  IE = 1
2219                  ITAUQ = IE + M
2220                  ITAUP = ITAUQ + M
2221                  IWORK = ITAUP + M
2222*
2223*                 Bidiagonalize A
2224*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2225*
2226                  CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
2227     $                         WORK( ITAUQ ), WORK( ITAUP ),
2228     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2229*
2230*                 Generate right vectors bidiagonalizing A
2231*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2232*
2233                  CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2234     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2235                  IWORK = IE + M
2236*
2237*                 Perform bidiagonal QR iteration, computing right
2238*                 singular vectors of A in A
2239*                 (Workspace: need BDSPAC)
2240*
2241                  CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2242     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2243*
2244               END IF
2245*
2246            ELSE IF( WNTVO .AND. WNTUAS ) THEN
2247*
2248*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2249*              M right singular vectors to be overwritten on A and
2250*              M left singular vectors to be computed in U
2251*
2252               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2253*
2254*                 Sufficient workspace for a fast algorithm
2255*
2256                  IR = 1
2257                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2258*
2259*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2260*
2261                     LDWRKU = LDA
2262                     CHUNK = N
2263                     LDWRKR = LDA
2264                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2265*
2266*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2267*
2268                     LDWRKU = LDA
2269                     CHUNK = N
2270                     LDWRKR = M
2271                  ELSE
2272*
2273*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2274*
2275                     LDWRKU = M
2276                     CHUNK = ( LWORK-M*M-M ) / M
2277                     LDWRKR = M
2278                  END IF
2279                  ITAU = IR + LDWRKR*M
2280                  IWORK = ITAU + M
2281*
2282*                 Compute A=L*Q
2283*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2284*
2285                  CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2286     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2287*
2288*                 Copy L to U, zeroing about above it
2289*
2290                  CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2291                  CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2292     $                         LDU )
2293*
2294*                 Generate Q in A
2295*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2296*
2297                  CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2298     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2299                  IE = ITAU
2300                  ITAUQ = IE + M
2301                  ITAUP = ITAUQ + M
2302                  IWORK = ITAUP + M
2303*
2304*                 Bidiagonalize L in U, copying result to WORK(IR)
2305*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2306*
2307                  CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2308     $                         WORK( ITAUQ ), WORK( ITAUP ),
2309     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2310                  CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2311*
2312*                 Generate right vectors bidiagonalizing L in WORK(IR)
2313*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2314*
2315                  CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2316     $                         WORK( ITAUP ), WORK( IWORK ),
2317     $                         LWORK-IWORK+1, IERR )
2318*
2319*                 Generate left vectors bidiagonalizing L in U
2320*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2321*
2322                  CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2323     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2324                  IWORK = IE + M
2325*
2326*                 Perform bidiagonal QR iteration, computing left
2327*                 singular vectors of L in U, and computing right
2328*                 singular vectors of L in WORK(IR)
2329*                 (Workspace: need M*M+BDSPAC)
2330*
2331                  CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2332     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2333     $                         WORK( IWORK ), INFO )
2334                  IU = IE + M
2335*
2336*                 Multiply right singular vectors of L in WORK(IR) by Q
2337*                 in A, storing result in WORK(IU) and copying to A
2338*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2339*
2340                  DO 40 I = 1, N, CHUNK
2341                     BLK = MIN( N-I+1, CHUNK )
2342                     CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2343     $                           LDWRKR, A( 1, I ), LDA, ZERO,
2344     $                           WORK( IU ), LDWRKU )
2345                     CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2346     $                            A( 1, I ), LDA )
2347   40             CONTINUE
2348*
2349               ELSE
2350*
2351*                 Insufficient workspace for a fast algorithm
2352*
2353                  ITAU = 1
2354                  IWORK = ITAU + M
2355*
2356*                 Compute A=L*Q
2357*                 (Workspace: need 2*M, prefer M+M*NB)
2358*
2359                  CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2360     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2361*
2362*                 Copy L to U, zeroing out above it
2363*
2364                  CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2365                  CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2366     $                         LDU )
2367*
2368*                 Generate Q in A
2369*                 (Workspace: need 2*M, prefer M+M*NB)
2370*
2371                  CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2372     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2373                  IE = ITAU
2374                  ITAUQ = IE + M
2375                  ITAUP = ITAUQ + M
2376                  IWORK = ITAUP + M
2377*
2378*                 Bidiagonalize L in U
2379*                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2380*
2381                  CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2382     $                         WORK( ITAUQ ), WORK( ITAUP ),
2383     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2384*
2385*                 Multiply right vectors bidiagonalizing L by Q in A
2386*                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2387*
2388                  CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2389     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2390     $                         LWORK-IWORK+1, IERR )
2391*
2392*                 Generate left vectors bidiagonalizing L in U
2393*                 (Workspace: need 4*M, prefer 3*M+M*NB)
2394*
2395                  CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2396     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2397                  IWORK = IE + M
2398*
2399*                 Perform bidiagonal QR iteration, computing left
2400*                 singular vectors of A in U and computing right
2401*                 singular vectors of A in A
2402*                 (Workspace: need BDSPAC)
2403*
2404                  CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2405     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2406*
2407               END IF
2408*
2409            ELSE IF( WNTVS ) THEN
2410*
2411               IF( WNTUN ) THEN
2412*
2413*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2414*                 M right singular vectors to be computed in VT and
2415*                 no left singular vectors to be computed
2416*
2417                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2418*
2419*                    Sufficient workspace for a fast algorithm
2420*
2421                     IR = 1
2422                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2423*
2424*                       WORK(IR) is LDA by M
2425*
2426                        LDWRKR = LDA
2427                     ELSE
2428*
2429*                       WORK(IR) is M by M
2430*
2431                        LDWRKR = M
2432                     END IF
2433                     ITAU = IR + LDWRKR*M
2434                     IWORK = ITAU + M
2435*
2436*                    Compute A=L*Q
2437*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2438*
2439                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2440     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2441*
2442*                    Copy L to WORK(IR), zeroing out above it
2443*
2444                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
2445     $                            LDWRKR )
2446                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2447     $                            WORK( IR+LDWRKR ), LDWRKR )
2448*
2449*                    Generate Q in A
2450*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2451*
2452                     CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2453     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2454                     IE = ITAU
2455                     ITAUQ = IE + M
2456                     ITAUP = ITAUQ + M
2457                     IWORK = ITAUP + M
2458*
2459*                    Bidiagonalize L in WORK(IR)
2460*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2461*
2462                     CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
2463     $                            WORK( IE ), WORK( ITAUQ ),
2464     $                            WORK( ITAUP ), WORK( IWORK ),
2465     $                            LWORK-IWORK+1, IERR )
2466*
2467*                    Generate right vectors bidiagonalizing L in
2468*                    WORK(IR)
2469*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2470*
2471                     CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2472     $                            WORK( ITAUP ), WORK( IWORK ),
2473     $                            LWORK-IWORK+1, IERR )
2474                     IWORK = IE + M
2475*
2476*                    Perform bidiagonal QR iteration, computing right
2477*                    singular vectors of L in WORK(IR)
2478*                    (Workspace: need M*M+BDSPAC)
2479*
2480                     CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2481     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2482     $                            WORK( IWORK ), INFO )
2483*
2484*                    Multiply right singular vectors of L in WORK(IR) by
2485*                    Q in A, storing result in VT
2486*                    (Workspace: need M*M)
2487*
2488                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2489     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2490*
2491                  ELSE
2492*
2493*                    Insufficient workspace for a fast algorithm
2494*
2495                     ITAU = 1
2496                     IWORK = ITAU + M
2497*
2498*                    Compute A=L*Q
2499*                    (Workspace: need 2*M, prefer M+M*NB)
2500*
2501                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2502     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2503*
2504*                    Copy result to VT
2505*
2506                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2507*
2508*                    Generate Q in VT
2509*                    (Workspace: need 2*M, prefer M+M*NB)
2510*
2511                     CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2512     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2513                     IE = ITAU
2514                     ITAUQ = IE + M
2515                     ITAUP = ITAUQ + M
2516                     IWORK = ITAUP + M
2517*
2518*                    Zero out above L in A
2519*
2520                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2521     $                            LDA )
2522*
2523*                    Bidiagonalize L in A
2524*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2525*
2526                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2527     $                            WORK( ITAUQ ), WORK( ITAUP ),
2528     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2529*
2530*                    Multiply right vectors bidiagonalizing L by Q in VT
2531*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2532*
2533                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2534     $                            WORK( ITAUP ), VT, LDVT,
2535     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2536                     IWORK = IE + M
2537*
2538*                    Perform bidiagonal QR iteration, computing right
2539*                    singular vectors of A in VT
2540*                    (Workspace: need BDSPAC)
2541*
2542                     CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2543     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2544     $                            INFO )
2545*
2546                  END IF
2547*
2548               ELSE IF( WNTUO ) THEN
2549*
2550*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2551*                 M right singular vectors to be computed in VT and
2552*                 M left singular vectors to be overwritten on A
2553*
2554                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2555*
2556*                    Sufficient workspace for a fast algorithm
2557*
2558                     IU = 1
2559                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2560*
2561*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2562*
2563                        LDWRKU = LDA
2564                        IR = IU + LDWRKU*M
2565                        LDWRKR = LDA
2566                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2567*
2568*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2569*
2570                        LDWRKU = LDA
2571                        IR = IU + LDWRKU*M
2572                        LDWRKR = M
2573                     ELSE
2574*
2575*                       WORK(IU) is M by M and WORK(IR) is M by M
2576*
2577                        LDWRKU = M
2578                        IR = IU + LDWRKU*M
2579                        LDWRKR = M
2580                     END IF
2581                     ITAU = IR + LDWRKR*M
2582                     IWORK = ITAU + M
2583*
2584*                    Compute A=L*Q
2585*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2586*
2587                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2588     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2589*
2590*                    Copy L to WORK(IU), zeroing out below it
2591*
2592                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2593     $                            LDWRKU )
2594                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2595     $                            WORK( IU+LDWRKU ), LDWRKU )
2596*
2597*                    Generate Q in A
2598*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2599*
2600                     CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2601     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2602                     IE = ITAU
2603                     ITAUQ = IE + M
2604                     ITAUP = ITAUQ + M
2605                     IWORK = ITAUP + M
2606*
2607*                    Bidiagonalize L in WORK(IU), copying result to
2608*                    WORK(IR)
2609*                    (Workspace: need 2*M*M+4*M,
2610*                                prefer 2*M*M+3*M+2*M*NB)
2611*
2612                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2613     $                            WORK( IE ), WORK( ITAUQ ),
2614     $                            WORK( ITAUP ), WORK( IWORK ),
2615     $                            LWORK-IWORK+1, IERR )
2616                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2617     $                            WORK( IR ), LDWRKR )
2618*
2619*                    Generate right bidiagonalizing vectors in WORK(IU)
2620*                    (Workspace: need 2*M*M+4*M-1,
2621*                                prefer 2*M*M+3*M+(M-1)*NB)
2622*
2623                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2624     $                            WORK( ITAUP ), WORK( IWORK ),
2625     $                            LWORK-IWORK+1, IERR )
2626*
2627*                    Generate left bidiagonalizing vectors in WORK(IR)
2628*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2629*
2630                     CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2631     $                            WORK( ITAUQ ), WORK( IWORK ),
2632     $                            LWORK-IWORK+1, IERR )
2633                     IWORK = IE + M
2634*
2635*                    Perform bidiagonal QR iteration, computing left
2636*                    singular vectors of L in WORK(IR) and computing
2637*                    right singular vectors of L in WORK(IU)
2638*                    (Workspace: need 2*M*M+BDSPAC)
2639*
2640                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2641     $                            WORK( IU ), LDWRKU, WORK( IR ),
2642     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2643*
2644*                    Multiply right singular vectors of L in WORK(IU) by
2645*                    Q in A, storing result in VT
2646*                    (Workspace: need M*M)
2647*
2648                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2649     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2650*
2651*                    Copy left singular vectors of L to A
2652*                    (Workspace: need M*M)
2653*
2654                     CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2655     $                            LDA )
2656*
2657                  ELSE
2658*
2659*                    Insufficient workspace for a fast algorithm
2660*
2661                     ITAU = 1
2662                     IWORK = ITAU + M
2663*
2664*                    Compute A=L*Q, copying result to VT
2665*                    (Workspace: need 2*M, prefer M+M*NB)
2666*
2667                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2668     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2669                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2670*
2671*                    Generate Q in VT
2672*                    (Workspace: need 2*M, prefer M+M*NB)
2673*
2674                     CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2675     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2676                     IE = ITAU
2677                     ITAUQ = IE + M
2678                     ITAUP = ITAUQ + M
2679                     IWORK = ITAUP + M
2680*
2681*                    Zero out above L in A
2682*
2683                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2684     $                            LDA )
2685*
2686*                    Bidiagonalize L in A
2687*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2688*
2689                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2690     $                            WORK( ITAUQ ), WORK( ITAUP ),
2691     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2692*
2693*                    Multiply right vectors bidiagonalizing L by Q in VT
2694*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2695*
2696                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2697     $                            WORK( ITAUP ), VT, LDVT,
2698     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2699*
2700*                    Generate left bidiagonalizing vectors of L in A
2701*                    (Workspace: need 4*M, prefer 3*M+M*NB)
2702*
2703                     CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2704     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2705                     IWORK = IE + M
2706*
2707*                    Perform bidiagonal QR iteration, compute left
2708*                    singular vectors of A in A and compute right
2709*                    singular vectors of A in VT
2710*                    (Workspace: need BDSPAC)
2711*
2712                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2713     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2714     $                            INFO )
2715*
2716                  END IF
2717*
2718               ELSE IF( WNTUAS ) THEN
2719*
2720*                 Path 6t(N much larger than M, JOBU='S' or 'A',
2721*                         JOBVT='S')
2722*                 M right singular vectors to be computed in VT and
2723*                 M left singular vectors to be computed in U
2724*
2725                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2726*
2727*                    Sufficient workspace for a fast algorithm
2728*
2729                     IU = 1
2730                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2731*
2732*                       WORK(IU) is LDA by N
2733*
2734                        LDWRKU = LDA
2735                     ELSE
2736*
2737*                       WORK(IU) is LDA by M
2738*
2739                        LDWRKU = M
2740                     END IF
2741                     ITAU = IU + LDWRKU*M
2742                     IWORK = ITAU + M
2743*
2744*                    Compute A=L*Q
2745*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2746*
2747                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2748     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2749*
2750*                    Copy L to WORK(IU), zeroing out above it
2751*
2752                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2753     $                            LDWRKU )
2754                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2755     $                            WORK( IU+LDWRKU ), LDWRKU )
2756*
2757*                    Generate Q in A
2758*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2759*
2760                     CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2761     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2762                     IE = ITAU
2763                     ITAUQ = IE + M
2764                     ITAUP = ITAUQ + M
2765                     IWORK = ITAUP + M
2766*
2767*                    Bidiagonalize L in WORK(IU), copying result to U
2768*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2769*
2770                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2771     $                            WORK( IE ), WORK( ITAUQ ),
2772     $                            WORK( ITAUP ), WORK( IWORK ),
2773     $                            LWORK-IWORK+1, IERR )
2774                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2775     $                            LDU )
2776*
2777*                    Generate right bidiagonalizing vectors in WORK(IU)
2778*                    (Workspace: need M*M+4*M-1,
2779*                                prefer M*M+3*M+(M-1)*NB)
2780*
2781                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2782     $                            WORK( ITAUP ), WORK( IWORK ),
2783     $                            LWORK-IWORK+1, IERR )
2784*
2785*                    Generate left bidiagonalizing vectors in U
2786*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2787*
2788                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2789     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2790                     IWORK = IE + M
2791*
2792*                    Perform bidiagonal QR iteration, computing left
2793*                    singular vectors of L in U and computing right
2794*                    singular vectors of L in WORK(IU)
2795*                    (Workspace: need M*M+BDSPAC)
2796*
2797                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2798     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2799     $                            WORK( IWORK ), INFO )
2800*
2801*                    Multiply right singular vectors of L in WORK(IU) by
2802*                    Q in A, storing result in VT
2803*                    (Workspace: need M*M)
2804*
2805                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2806     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2807*
2808                  ELSE
2809*
2810*                    Insufficient workspace for a fast algorithm
2811*
2812                     ITAU = 1
2813                     IWORK = ITAU + M
2814*
2815*                    Compute A=L*Q, copying result to VT
2816*                    (Workspace: need 2*M, prefer M+M*NB)
2817*
2818                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2819     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2820                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2821*
2822*                    Generate Q in VT
2823*                    (Workspace: need 2*M, prefer M+M*NB)
2824*
2825                     CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2826     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2827*
2828*                    Copy L to U, zeroing out above it
2829*
2830                     CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2831                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2832     $                            LDU )
2833                     IE = ITAU
2834                     ITAUQ = IE + M
2835                     ITAUP = ITAUQ + M
2836                     IWORK = ITAUP + M
2837*
2838*                    Bidiagonalize L in U
2839*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2840*
2841                     CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2842     $                            WORK( ITAUQ ), WORK( ITAUP ),
2843     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2844*
2845*                    Multiply right bidiagonalizing vectors in U by Q
2846*                    in VT
2847*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2848*
2849                     CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2850     $                            WORK( ITAUP ), VT, LDVT,
2851     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2852*
2853*                    Generate left bidiagonalizing vectors in U
2854*                    (Workspace: need 4*M, prefer 3*M+M*NB)
2855*
2856                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2857     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2858                     IWORK = IE + M
2859*
2860*                    Perform bidiagonal QR iteration, computing left
2861*                    singular vectors of A in U and computing right
2862*                    singular vectors of A in VT
2863*                    (Workspace: need BDSPAC)
2864*
2865                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2866     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2867     $                            INFO )
2868*
2869                  END IF
2870*
2871               END IF
2872*
2873            ELSE IF( WNTVA ) THEN
2874*
2875               IF( WNTUN ) THEN
2876*
2877*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2878*                 N right singular vectors to be computed in VT and
2879*                 no left singular vectors to be computed
2880*
2881                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2882*
2883*                    Sufficient workspace for a fast algorithm
2884*
2885                     IR = 1
2886                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2887*
2888*                       WORK(IR) is LDA by M
2889*
2890                        LDWRKR = LDA
2891                     ELSE
2892*
2893*                       WORK(IR) is M by M
2894*
2895                        LDWRKR = M
2896                     END IF
2897                     ITAU = IR + LDWRKR*M
2898                     IWORK = ITAU + M
2899*
2900*                    Compute A=L*Q, copying result to VT
2901*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2902*
2903                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2904     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2905                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2906*
2907*                    Copy L to WORK(IR), zeroing out above it
2908*
2909                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
2910     $                            LDWRKR )
2911                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2912     $                            WORK( IR+LDWRKR ), LDWRKR )
2913*
2914*                    Generate Q in VT
2915*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2916*
2917                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2918     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2919                     IE = ITAU
2920                     ITAUQ = IE + M
2921                     ITAUP = ITAUQ + M
2922                     IWORK = ITAUP + M
2923*
2924*                    Bidiagonalize L in WORK(IR)
2925*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2926*
2927                     CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
2928     $                            WORK( IE ), WORK( ITAUQ ),
2929     $                            WORK( ITAUP ), WORK( IWORK ),
2930     $                            LWORK-IWORK+1, IERR )
2931*
2932*                    Generate right bidiagonalizing vectors in WORK(IR)
2933*                    (Workspace: need M*M+4*M-1,
2934*                                prefer M*M+3*M+(M-1)*NB)
2935*
2936                     CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2937     $                            WORK( ITAUP ), WORK( IWORK ),
2938     $                            LWORK-IWORK+1, IERR )
2939                     IWORK = IE + M
2940*
2941*                    Perform bidiagonal QR iteration, computing right
2942*                    singular vectors of L in WORK(IR)
2943*                    (Workspace: need M*M+BDSPAC)
2944*
2945                     CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2946     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2947     $                            WORK( IWORK ), INFO )
2948*
2949*                    Multiply right singular vectors of L in WORK(IR) by
2950*                    Q in VT, storing result in A
2951*                    (Workspace: need M*M)
2952*
2953                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2954     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2955*
2956*                    Copy right singular vectors of A from A to VT
2957*
2958                     CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
2959*
2960                  ELSE
2961*
2962*                    Insufficient workspace for a fast algorithm
2963*
2964                     ITAU = 1
2965                     IWORK = ITAU + M
2966*
2967*                    Compute A=L*Q, copying result to VT
2968*                    (Workspace: need 2*M, prefer M+M*NB)
2969*
2970                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2971     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2972                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2973*
2974*                    Generate Q in VT
2975*                    (Workspace: need M+N, prefer M+N*NB)
2976*
2977                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2978     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2979                     IE = ITAU
2980                     ITAUQ = IE + M
2981                     ITAUP = ITAUQ + M
2982                     IWORK = ITAUP + M
2983*
2984*                    Zero out above L in A
2985*
2986                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2987     $                            LDA )
2988*
2989*                    Bidiagonalize L in A
2990*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2991*
2992                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2993     $                            WORK( ITAUQ ), WORK( ITAUP ),
2994     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2995*
2996*                    Multiply right bidiagonalizing vectors in A by Q
2997*                    in VT
2998*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2999*
3000                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3001     $                            WORK( ITAUP ), VT, LDVT,
3002     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3003                     IWORK = IE + M
3004*
3005*                    Perform bidiagonal QR iteration, computing right
3006*                    singular vectors of A in VT
3007*                    (Workspace: need BDSPAC)
3008*
3009                     CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
3010     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
3011     $                            INFO )
3012*
3013                  END IF
3014*
3015               ELSE IF( WNTUO ) THEN
3016*
3017*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3018*                 N right singular vectors to be computed in VT and
3019*                 M left singular vectors to be overwritten on A
3020*
3021                  IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3022*
3023*                    Sufficient workspace for a fast algorithm
3024*
3025                     IU = 1
3026                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3027*
3028*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
3029*
3030                        LDWRKU = LDA
3031                        IR = IU + LDWRKU*M
3032                        LDWRKR = LDA
3033                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3034*
3035*                       WORK(IU) is LDA by M and WORK(IR) is M by M
3036*
3037                        LDWRKU = LDA
3038                        IR = IU + LDWRKU*M
3039                        LDWRKR = M
3040                     ELSE
3041*
3042*                       WORK(IU) is M by M and WORK(IR) is M by M
3043*
3044                        LDWRKU = M
3045                        IR = IU + LDWRKU*M
3046                        LDWRKR = M
3047                     END IF
3048                     ITAU = IR + LDWRKR*M
3049                     IWORK = ITAU + M
3050*
3051*                    Compute A=L*Q, copying result to VT
3052*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3053*
3054                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3055     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3056                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3057*
3058*                    Generate Q in VT
3059*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3060*
3061                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3062     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3063*
3064*                    Copy L to WORK(IU), zeroing out above it
3065*
3066                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
3067     $                            LDWRKU )
3068                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
3069     $                            WORK( IU+LDWRKU ), LDWRKU )
3070                     IE = ITAU
3071                     ITAUQ = IE + M
3072                     ITAUP = ITAUQ + M
3073                     IWORK = ITAUP + M
3074*
3075*                    Bidiagonalize L in WORK(IU), copying result to
3076*                    WORK(IR)
3077*                    (Workspace: need 2*M*M+4*M,
3078*                                prefer 2*M*M+3*M+2*M*NB)
3079*
3080                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
3081     $                            WORK( IE ), WORK( ITAUQ ),
3082     $                            WORK( ITAUP ), WORK( IWORK ),
3083     $                            LWORK-IWORK+1, IERR )
3084                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3085     $                            WORK( IR ), LDWRKR )
3086*
3087*                    Generate right bidiagonalizing vectors in WORK(IU)
3088*                    (Workspace: need 2*M*M+4*M-1,
3089*                                prefer 2*M*M+3*M+(M-1)*NB)
3090*
3091                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3092     $                            WORK( ITAUP ), WORK( IWORK ),
3093     $                            LWORK-IWORK+1, IERR )
3094*
3095*                    Generate left bidiagonalizing vectors in WORK(IR)
3096*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3097*
3098                     CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3099     $                            WORK( ITAUQ ), WORK( IWORK ),
3100     $                            LWORK-IWORK+1, IERR )
3101                     IWORK = IE + M
3102*
3103*                    Perform bidiagonal QR iteration, computing left
3104*                    singular vectors of L in WORK(IR) and computing
3105*                    right singular vectors of L in WORK(IU)
3106*                    (Workspace: need 2*M*M+BDSPAC)
3107*
3108                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3109     $                            WORK( IU ), LDWRKU, WORK( IR ),
3110     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3111*
3112*                    Multiply right singular vectors of L in WORK(IU) by
3113*                    Q in VT, storing result in A
3114*                    (Workspace: need M*M)
3115*
3116                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3117     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3118*
3119*                    Copy right singular vectors of A from A to VT
3120*
3121                     CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
3122*
3123*                    Copy left singular vectors of A from WORK(IR) to A
3124*
3125                     CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3126     $                            LDA )
3127*
3128                  ELSE
3129*
3130*                    Insufficient workspace for a fast algorithm
3131*
3132                     ITAU = 1
3133                     IWORK = ITAU + M
3134*
3135*                    Compute A=L*Q, copying result to VT
3136*                    (Workspace: need 2*M, prefer M+M*NB)
3137*
3138                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3139     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3140                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3141*
3142*                    Generate Q in VT
3143*                    (Workspace: need M+N, prefer M+N*NB)
3144*
3145                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3146     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3147                     IE = ITAU
3148                     ITAUQ = IE + M
3149                     ITAUP = ITAUQ + M
3150                     IWORK = ITAUP + M
3151*
3152*                    Zero out above L in A
3153*
3154                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3155     $                            LDA )
3156*
3157*                    Bidiagonalize L in A
3158*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3159*
3160                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
3161     $                            WORK( ITAUQ ), WORK( ITAUP ),
3162     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3163*
3164*                    Multiply right bidiagonalizing vectors in A by Q
3165*                    in VT
3166*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3167*
3168                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3169     $                            WORK( ITAUP ), VT, LDVT,
3170     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3171*
3172*                    Generate left bidiagonalizing vectors in A
3173*                    (Workspace: need 4*M, prefer 3*M+M*NB)
3174*
3175                     CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3176     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3177                     IWORK = IE + M
3178*
3179*                    Perform bidiagonal QR iteration, computing left
3180*                    singular vectors of A in A and computing right
3181*                    singular vectors of A in VT
3182*                    (Workspace: need BDSPAC)
3183*
3184                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3185     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3186     $                            INFO )
3187*
3188                  END IF
3189*
3190               ELSE IF( WNTUAS ) THEN
3191*
3192*                 Path 9t(N much larger than M, JOBU='S' or 'A',
3193*                         JOBVT='A')
3194*                 N right singular vectors to be computed in VT and
3195*                 M left singular vectors to be computed in U
3196*
3197                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3198*
3199*                    Sufficient workspace for a fast algorithm
3200*
3201                     IU = 1
3202                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
3203*
3204*                       WORK(IU) is LDA by M
3205*
3206                        LDWRKU = LDA
3207                     ELSE
3208*
3209*                       WORK(IU) is M by M
3210*
3211                        LDWRKU = M
3212                     END IF
3213                     ITAU = IU + LDWRKU*M
3214                     IWORK = ITAU + M
3215*
3216*                    Compute A=L*Q, copying result to VT
3217*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3218*
3219                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3220     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3221                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3222*
3223*                    Generate Q in VT
3224*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3225*
3226                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3227     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3228*
3229*                    Copy L to WORK(IU), zeroing out above it
3230*
3231                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
3232     $                            LDWRKU )
3233                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
3234     $                            WORK( IU+LDWRKU ), LDWRKU )
3235                     IE = ITAU
3236                     ITAUQ = IE + M
3237                     ITAUP = ITAUQ + M
3238                     IWORK = ITAUP + M
3239*
3240*                    Bidiagonalize L in WORK(IU), copying result to U
3241*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3242*
3243                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
3244     $                            WORK( IE ), WORK( ITAUQ ),
3245     $                            WORK( ITAUP ), WORK( IWORK ),
3246     $                            LWORK-IWORK+1, IERR )
3247                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3248     $                            LDU )
3249*
3250*                    Generate right bidiagonalizing vectors in WORK(IU)
3251*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3252*
3253                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3254     $                            WORK( ITAUP ), WORK( IWORK ),
3255     $                            LWORK-IWORK+1, IERR )
3256*
3257*                    Generate left bidiagonalizing vectors in U
3258*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3259*
3260                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3261     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3262                     IWORK = IE + M
3263*
3264*                    Perform bidiagonal QR iteration, computing left
3265*                    singular vectors of L in U and computing right
3266*                    singular vectors of L in WORK(IU)
3267*                    (Workspace: need M*M+BDSPAC)
3268*
3269                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3270     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3271     $                            WORK( IWORK ), INFO )
3272*
3273*                    Multiply right singular vectors of L in WORK(IU) by
3274*                    Q in VT, storing result in A
3275*                    (Workspace: need M*M)
3276*
3277                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3278     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3279*
3280*                    Copy right singular vectors of A from A to VT
3281*
3282                     CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
3283*
3284                  ELSE
3285*
3286*                    Insufficient workspace for a fast algorithm
3287*
3288                     ITAU = 1
3289                     IWORK = ITAU + M
3290*
3291*                    Compute A=L*Q, copying result to VT
3292*                    (Workspace: need 2*M, prefer M+M*NB)
3293*
3294                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3295     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3296                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3297*
3298*                    Generate Q in VT
3299*                    (Workspace: need M+N, prefer M+N*NB)
3300*
3301                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3302     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3303*
3304*                    Copy L to U, zeroing out above it
3305*
3306                     CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
3307                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3308     $                            LDU )
3309                     IE = ITAU
3310                     ITAUQ = IE + M
3311                     ITAUP = ITAUQ + M
3312                     IWORK = ITAUP + M
3313*
3314*                    Bidiagonalize L in U
3315*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3316*
3317                     CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
3318     $                            WORK( ITAUQ ), WORK( ITAUP ),
3319     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3320*
3321*                    Multiply right bidiagonalizing vectors in U by Q
3322*                    in VT
3323*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3324*
3325                     CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3326     $                            WORK( ITAUP ), VT, LDVT,
3327     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3328*
3329*                    Generate left bidiagonalizing vectors in U
3330*                    (Workspace: need 4*M, prefer 3*M+M*NB)
3331*
3332                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3333     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3334                     IWORK = IE + M
3335*
3336*                    Perform bidiagonal QR iteration, computing left
3337*                    singular vectors of A in U and computing right
3338*                    singular vectors of A in VT
3339*                    (Workspace: need BDSPAC)
3340*
3341                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3342     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3343     $                            INFO )
3344*
3345                  END IF
3346*
3347               END IF
3348*
3349            END IF
3350*
3351         ELSE
3352*
3353*           N .LT. MNTHR
3354*
3355*           Path 10t(N greater than M, but not much larger)
3356*           Reduce to bidiagonal form without LQ decomposition
3357*
3358            IE = 1
3359            ITAUQ = IE + M
3360            ITAUP = ITAUQ + M
3361            IWORK = ITAUP + M
3362*
3363*           Bidiagonalize A
3364*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3365*
3366            CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3367     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3368     $                   IERR )
3369            IF( WNTUAS ) THEN
3370*
3371*              If left singular vectors desired in U, copy result to U
3372*              and generate left bidiagonalizing vectors in U
3373*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3374*
3375               CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
3376               CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3377     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3378            END IF
3379            IF( WNTVAS ) THEN
3380*
3381*              If right singular vectors desired in VT, copy result to
3382*              VT and generate right bidiagonalizing vectors in VT
3383*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3384*
3385               CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3386               IF( WNTVA )
3387     $            NRVT = N
3388               IF( WNTVS )
3389     $            NRVT = M
3390               CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3391     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3392            END IF
3393            IF( WNTUO ) THEN
3394*
3395*              If left singular vectors desired in A, generate left
3396*              bidiagonalizing vectors in A
3397*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3398*
3399               CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3400     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3401            END IF
3402            IF( WNTVO ) THEN
3403*
3404*              If right singular vectors desired in A, generate right
3405*              bidiagonalizing vectors in A
3406*              (Workspace: need 4*M, prefer 3*M+M*NB)
3407*
3408               CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3409     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3410            END IF
3411            IWORK = IE + M
3412            IF( WNTUAS .OR. WNTUO )
3413     $         NRU = M
3414            IF( WNTUN )
3415     $         NRU = 0
3416            IF( WNTVAS .OR. WNTVO )
3417     $         NCVT = N
3418            IF( WNTVN )
3419     $         NCVT = 0
3420            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3421*
3422*              Perform bidiagonal QR iteration, if desired, computing
3423*              left singular vectors in U and computing right singular
3424*              vectors in VT
3425*              (Workspace: need BDSPAC)
3426*
3427               CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3428     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3429            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3430*
3431*              Perform bidiagonal QR iteration, if desired, computing
3432*              left singular vectors in U and computing right singular
3433*              vectors in A
3434*              (Workspace: need BDSPAC)
3435*
3436               CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3437     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3438            ELSE
3439*
3440*              Perform bidiagonal QR iteration, if desired, computing
3441*              left singular vectors in A and computing right singular
3442*              vectors in VT
3443*              (Workspace: need BDSPAC)
3444*
3445               CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3446     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3447            END IF
3448*
3449         END IF
3450*
3451      END IF
3452*
3453*     If SBDSQR failed to converge, copy unconverged superdiagonals
3454*     to WORK( 2:MINMN )
3455*
3456      IF( INFO.NE.0 ) THEN
3457         IF( IE.GT.2 ) THEN
3458            DO 50 I = 1, MINMN - 1
3459               WORK( I+1 ) = WORK( I+IE-1 )
3460   50       CONTINUE
3461         END IF
3462         IF( IE.LT.2 ) THEN
3463            DO 60 I = MINMN - 1, 1, -1
3464               WORK( I+1 ) = WORK( I+IE-1 )
3465   60       CONTINUE
3466         END IF
3467      END IF
3468*
3469*     Undo scaling if necessary
3470*
3471      IF( ISCL.EQ.1 ) THEN
3472         IF( ANRM.GT.BIGNUM )
3473     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3474     $                   IERR )
3475         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3476     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3477     $                   MINMN, IERR )
3478         IF( ANRM.LT.SMLNUM )
3479     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3480     $                   IERR )
3481         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3482     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3483     $                   MINMN, IERR )
3484      END IF
3485*
3486*     Return optimal workspace in WORK(1)
3487*
3488      WORK( 1 ) = MAXWRK
3489*
3490      RETURN
3491*
3492*     End of SGESVD
3493*
3494      END
3495