1      SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
2     $                   WORK, LWORK, RWORK, INFO )
3*
4*  -- LAPACK driver routine (version 3.0) --
5*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6*     Courant Institute, Argonne National Lab, and Rice University
7*     October 31, 1999
8*
9*     .. Scalar Arguments ..
10      CHARACTER          JOBU, JOBVT
11      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
12*     ..
13*     .. Array Arguments ..
14      REAL               RWORK( * ), S( * )
15      COMPLEX            A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
16     $                   WORK( * )
17*     ..
18*
19*  Purpose
20*  =======
21*
22*  CGESVD computes the singular value decomposition (SVD) of a complex
23*  M-by-N matrix A, optionally computing the left and/or right singular
24*  vectors. The SVD is written
25*
26*       A = U * SIGMA * conjugate-transpose(V)
27*
28*  where SIGMA is an M-by-N matrix which is zero except for its
29*  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
30*  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
31*  are the singular values of A; they are real and non-negative, and
32*  are returned in descending order.  The first min(m,n) columns of
33*  U and V are the left and right singular vectors of A.
34*
35*  Note that the routine returns V**H, not V.
36*
37*  Arguments
38*  =========
39*
40*  JOBU    (input) CHARACTER*1
41*          Specifies options for computing all or part of the matrix U:
42*          = 'A':  all M columns of U are returned in array U:
43*          = 'S':  the first min(m,n) columns of U (the left singular
44*                  vectors) are returned in the array U;
45*          = 'O':  the first min(m,n) columns of U (the left singular
46*                  vectors) are overwritten on the array A;
47*          = 'N':  no columns of U (no left singular vectors) are
48*                  computed.
49*
50*  JOBVT   (input) CHARACTER*1
51*          Specifies options for computing all or part of the matrix
52*          V**H:
53*          = 'A':  all N rows of V**H are returned in the array VT;
54*          = 'S':  the first min(m,n) rows of V**H (the right singular
55*                  vectors) are returned in the array VT;
56*          = 'O':  the first min(m,n) rows of V**H (the right singular
57*                  vectors) are overwritten on the array A;
58*          = 'N':  no rows of V**H (no right singular vectors) are
59*                  computed.
60*
61*          JOBVT and JOBU cannot both be 'O'.
62*
63*  M       (input) INTEGER
64*          The number of rows of the input matrix A.  M >= 0.
65*
66*  N       (input) INTEGER
67*          The number of columns of the input matrix A.  N >= 0.
68*
69*  A       (input/output) COMPLEX array, dimension (LDA,N)
70*          On entry, the M-by-N matrix A.
71*          On exit,
72*          if JOBU = 'O',  A is overwritten with the first min(m,n)
73*                          columns of U (the left singular vectors,
74*                          stored columnwise);
75*          if JOBVT = 'O', A is overwritten with the first min(m,n)
76*                          rows of V**H (the right singular vectors,
77*                          stored rowwise);
78*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
79*                          are destroyed.
80*
81*  LDA     (input) INTEGER
82*          The leading dimension of the array A.  LDA >= max(1,M).
83*
84*  S       (output) REAL array, dimension (min(M,N))
85*          The singular values of A, sorted so that S(i) >= S(i+1).
86*
87*  U       (output) COMPLEX array, dimension (LDU,UCOL)
88*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
89*          If JOBU = 'A', U contains the M-by-M unitary matrix U;
90*          if JOBU = 'S', U contains the first min(m,n) columns of U
91*          (the left singular vectors, stored columnwise);
92*          if JOBU = 'N' or 'O', U is not referenced.
93*
94*  LDU     (input) INTEGER
95*          The leading dimension of the array U.  LDU >= 1; if
96*          JOBU = 'S' or 'A', LDU >= M.
97*
98*  VT      (output) COMPLEX array, dimension (LDVT,N)
99*          If JOBVT = 'A', VT contains the N-by-N unitary matrix
100*          V**H;
101*          if JOBVT = 'S', VT contains the first min(m,n) rows of
102*          V**H (the right singular vectors, stored rowwise);
103*          if JOBVT = 'N' or 'O', VT is not referenced.
104*
105*  LDVT    (input) INTEGER
106*          The leading dimension of the array VT.  LDVT >= 1; if
107*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
108*
109*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
110*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
111*
112*  LWORK   (input) INTEGER
113*          The dimension of the array WORK. LWORK >= 1.
114*          LWORK >=  2*MIN(M,N)+MAX(M,N).
115*          For good performance, LWORK should generally be larger.
116*
117*          If LWORK = -1, then a workspace query is assumed; the routine
118*          only calculates the optimal size of the WORK array, returns
119*          this value as the first entry of the WORK array, and no error
120*          message related to LWORK is issued by XERBLA.
121*
122*  RWORK   (workspace) REAL array, dimension (5*min(M,N))
123*          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
124*          unconverged superdiagonal elements of an upper bidiagonal
125*          matrix B whose diagonal is in S (not necessarily sorted).
126*          B satisfies A = U * B * VT, so it has the same singular
127*          values as A, and singular vectors related by U and VT.
128*
129*  INFO    (output) INTEGER
130*          = 0:  successful exit.
131*          < 0:  if INFO = -i, the i-th argument had an illegal value.
132*          > 0:  if CBDSQR did not converge, INFO specifies how many
133*                superdiagonals of an intermediate bidiagonal form B
134*                did not converge to zero. See the description of RWORK
135*                above for details.
136*
137*  =====================================================================
138*
139*     .. Parameters ..
140      COMPLEX            CZERO, CONE
141      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
142     $                   CONE = ( 1.0E0, 0.0E0 ) )
143      REAL               ZERO, ONE
144      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
145*     ..
146*     .. Local Scalars ..
147      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
148     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
149      INTEGER            BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
150     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
151     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
152     $                   NRVT, WRKBL
153      REAL               ANRM, BIGNUM, EPS, SMLNUM
154*     ..
155*     .. Local Arrays ..
156      REAL               DUM( 1 )
157      COMPLEX            CDUM( 1 )
158*     ..
159*     .. External Subroutines ..
160      EXTERNAL           CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY,
161     $                   CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR,
162     $                   SLASCL, XERBLA
163*     ..
164*     .. External Functions ..
165      LOGICAL            LSAME
166      INTEGER            ILAENV
167      REAL               CLANGE, SLAMCH
168      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
169*     ..
170*     .. Intrinsic Functions ..
171      INTRINSIC          MAX, MIN, SQRT
172*     ..
173*     .. Executable Statements ..
174*
175*     Test the input arguments
176*
177      INFO = 0
178      MINMN = MIN( M, N )
179      MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
180      WNTUA = LSAME( JOBU, 'A' )
181      WNTUS = LSAME( JOBU, 'S' )
182      WNTUAS = WNTUA .OR. WNTUS
183      WNTUO = LSAME( JOBU, 'O' )
184      WNTUN = LSAME( JOBU, 'N' )
185      WNTVA = LSAME( JOBVT, 'A' )
186      WNTVS = LSAME( JOBVT, 'S' )
187      WNTVAS = WNTVA .OR. WNTVS
188      WNTVO = LSAME( JOBVT, 'O' )
189      WNTVN = LSAME( JOBVT, 'N' )
190      MINWRK = 1
191      LQUERY = ( LWORK.EQ.-1 )
192*
193      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
194         INFO = -1
195      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
196     $         ( WNTVO .AND. WNTUO ) ) THEN
197         INFO = -2
198      ELSE IF( M.LT.0 ) THEN
199         INFO = -3
200      ELSE IF( N.LT.0 ) THEN
201         INFO = -4
202      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
203         INFO = -6
204      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
205         INFO = -9
206      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
207     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
208         INFO = -11
209      END IF
210*
211*     Compute workspace
212*      (Note: Comments in the code beginning "Workspace:" describe the
213*       minimal amount of workspace needed at that point in the code,
214*       as well as the preferred amount for good performance.
215*       CWorkspace refers to complex workspace, and RWorkspace to
216*       real workspace. NB refers to the optimal block size for the
217*       immediately following subroutine, as returned by ILAENV.)
218*
219      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
220     $   N.GT.0 ) THEN
221         IF( M.GE.N ) THEN
222*
223*           Space needed for CBDSQR is BDSPAC = 5*N
224*
225            IF( M.GE.MNTHR ) THEN
226               IF( WNTUN ) THEN
227*
228*                 Path 1 (M much larger than N, JOBU='N')
229*
230                  MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1,
231     $                     -1 )
232                  MAXWRK = MAX( MAXWRK, 2*N+2*N*
233     $                     ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
234                  IF( WNTVO .OR. WNTVAS )
235     $               MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
236     $                        ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
237                  MINWRK = 3*N
238                  MAXWRK = MAX( MINWRK, MAXWRK )
239               ELSE IF( WNTUO .AND. WNTVN ) THEN
240*
241*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
242*
243                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
244                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
245     $                    N, N, -1 ) )
246                  WRKBL = MAX( WRKBL, 2*N+2*N*
247     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
248                  WRKBL = MAX( WRKBL, 2*N+N*
249     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
250                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
251                  MINWRK = 2*N + M
252                  MAXWRK = MAX( MINWRK, MAXWRK )
253               ELSE IF( WNTUO .AND. WNTVAS ) THEN
254*
255*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
256*                 'A')
257*
258                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
259                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
260     $                    N, N, -1 ) )
261                  WRKBL = MAX( WRKBL, 2*N+2*N*
262     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
263                  WRKBL = MAX( WRKBL, 2*N+N*
264     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
265                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
266     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
267                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
268                  MINWRK = 2*N + M
269                  MAXWRK = MAX( MINWRK, MAXWRK )
270               ELSE IF( WNTUS .AND. WNTVN ) THEN
271*
272*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
273*
274                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
275                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
276     $                    N, N, -1 ) )
277                  WRKBL = MAX( WRKBL, 2*N+2*N*
278     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
279                  WRKBL = MAX( WRKBL, 2*N+N*
280     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
281                  MAXWRK = N*N + WRKBL
282                  MINWRK = 2*N + M
283                  MAXWRK = MAX( MINWRK, MAXWRK )
284               ELSE IF( WNTUS .AND. WNTVO ) THEN
285*
286*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
287*
288                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
289                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
290     $                    N, N, -1 ) )
291                  WRKBL = MAX( WRKBL, 2*N+2*N*
292     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
293                  WRKBL = MAX( WRKBL, 2*N+N*
294     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
295                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
296     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
297                  MAXWRK = 2*N*N + WRKBL
298                  MINWRK = 2*N + M
299                  MAXWRK = MAX( MINWRK, MAXWRK )
300               ELSE IF( WNTUS .AND. WNTVAS ) THEN
301*
302*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
303*                 'A')
304*
305                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
306                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
307     $                    N, N, -1 ) )
308                  WRKBL = MAX( WRKBL, 2*N+2*N*
309     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
310                  WRKBL = MAX( WRKBL, 2*N+N*
311     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
312                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
313     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
314                  MAXWRK = N*N + WRKBL
315                  MINWRK = 2*N + M
316                  MAXWRK = MAX( MINWRK, MAXWRK )
317               ELSE IF( WNTUA .AND. WNTVN ) THEN
318*
319*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
320*
321                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
322                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
323     $                    M, N, -1 ) )
324                  WRKBL = MAX( WRKBL, 2*N+2*N*
325     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
326                  WRKBL = MAX( WRKBL, 2*N+N*
327     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
328                  MAXWRK = N*N + WRKBL
329                  MINWRK = 2*N + M
330                  MAXWRK = MAX( MINWRK, MAXWRK )
331               ELSE IF( WNTUA .AND. WNTVO ) THEN
332*
333*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
334*
335                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
336                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
337     $                    M, N, -1 ) )
338                  WRKBL = MAX( WRKBL, 2*N+2*N*
339     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
340                  WRKBL = MAX( WRKBL, 2*N+N*
341     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
342                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
343     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
344                  MAXWRK = 2*N*N + WRKBL
345                  MINWRK = 2*N + M
346                  MAXWRK = MAX( MINWRK, MAXWRK )
347               ELSE IF( WNTUA .AND. WNTVAS ) THEN
348*
349*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
350*                 'A')
351*
352                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
353                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
354     $                    M, N, -1 ) )
355                  WRKBL = MAX( WRKBL, 2*N+2*N*
356     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
357                  WRKBL = MAX( WRKBL, 2*N+N*
358     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
359                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
360     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
361                  MAXWRK = N*N + WRKBL
362                  MINWRK = 2*N + M
363                  MAXWRK = MAX( MINWRK, MAXWRK )
364               END IF
365            ELSE
366*
367*              Path 10 (M at least N, but not much larger)
368*
369               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
370     $                  -1, -1 )
371               IF( WNTUS .OR. WNTUO )
372     $            MAXWRK = MAX( MAXWRK, 2*N+N*
373     $                     ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) )
374               IF( WNTUA )
375     $            MAXWRK = MAX( MAXWRK, 2*N+M*
376     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
377               IF( .NOT.WNTVN )
378     $            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
379     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
380               MINWRK = 2*N + M
381               MAXWRK = MAX( MINWRK, MAXWRK )
382            END IF
383         ELSE
384*
385*           Space needed for CBDSQR is BDSPAC = 5*M
386*
387            IF( N.GE.MNTHR ) THEN
388               IF( WNTVN ) THEN
389*
390*                 Path 1t(N much larger than M, JOBVT='N')
391*
392                  MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1,
393     $                     -1 )
394                  MAXWRK = MAX( MAXWRK, 2*M+2*M*
395     $                     ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
396                  IF( WNTUO .OR. WNTUAS )
397     $               MAXWRK = MAX( MAXWRK, 2*M+M*
398     $                        ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
399                  MINWRK = 3*M
400                  MAXWRK = MAX( MINWRK, MAXWRK )
401               ELSE IF( WNTVO .AND. WNTUN ) THEN
402*
403*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
404*
405                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
406                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
407     $                    N, M, -1 ) )
408                  WRKBL = MAX( WRKBL, 2*M+2*M*
409     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
410                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
411     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
412                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
413                  MINWRK = 2*M + N
414                  MAXWRK = MAX( MINWRK, MAXWRK )
415               ELSE IF( WNTVO .AND. WNTUAS ) THEN
416*
417*                 Path 3t(N much larger than M, JOBU='S' or 'A',
418*                 JOBVT='O')
419*
420                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
421                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
422     $                    N, M, -1 ) )
423                  WRKBL = MAX( WRKBL, 2*M+2*M*
424     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
425                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
426     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
427                  WRKBL = MAX( WRKBL, 2*M+M*
428     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
429                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
430                  MINWRK = 2*M + N
431                  MAXWRK = MAX( MINWRK, MAXWRK )
432               ELSE IF( WNTVS .AND. WNTUN ) THEN
433*
434*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
435*
436                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
437                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
438     $                    N, M, -1 ) )
439                  WRKBL = MAX( WRKBL, 2*M+2*M*
440     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
441                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
442     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
443                  MAXWRK = M*M + WRKBL
444                  MINWRK = 2*M + N
445                  MAXWRK = MAX( MINWRK, MAXWRK )
446               ELSE IF( WNTVS .AND. WNTUO ) THEN
447*
448*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
449*
450                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
451                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
452     $                    N, M, -1 ) )
453                  WRKBL = MAX( WRKBL, 2*M+2*M*
454     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
455                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
456     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
457                  WRKBL = MAX( WRKBL, 2*M+M*
458     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
459                  MAXWRK = 2*M*M + WRKBL
460                  MINWRK = 2*M + N
461                  MAXWRK = MAX( MINWRK, MAXWRK )
462               ELSE IF( WNTVS .AND. WNTUAS ) THEN
463*
464*                 Path 6t(N much larger than M, JOBU='S' or 'A',
465*                 JOBVT='S')
466*
467                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
468                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
469     $                    N, M, -1 ) )
470                  WRKBL = MAX( WRKBL, 2*M+2*M*
471     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
472                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
473     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
474                  WRKBL = MAX( WRKBL, 2*M+M*
475     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
476                  MAXWRK = M*M + WRKBL
477                  MINWRK = 2*M + N
478                  MAXWRK = MAX( MINWRK, MAXWRK )
479               ELSE IF( WNTVA .AND. WNTUN ) THEN
480*
481*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
482*
483                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
484                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
485     $                    N, M, -1 ) )
486                  WRKBL = MAX( WRKBL, 2*M+2*M*
487     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
488                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
489     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
490                  MAXWRK = M*M + WRKBL
491                  MINWRK = 2*M + N
492                  MAXWRK = MAX( MINWRK, MAXWRK )
493               ELSE IF( WNTVA .AND. WNTUO ) THEN
494*
495*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
496*
497                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
498                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
499     $                    N, M, -1 ) )
500                  WRKBL = MAX( WRKBL, 2*M+2*M*
501     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
502                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
503     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
504                  WRKBL = MAX( WRKBL, 2*M+M*
505     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
506                  MAXWRK = 2*M*M + WRKBL
507                  MINWRK = 2*M + N
508                  MAXWRK = MAX( MINWRK, MAXWRK )
509               ELSE IF( WNTVA .AND. WNTUAS ) THEN
510*
511*                 Path 9t(N much larger than M, JOBU='S' or 'A',
512*                 JOBVT='A')
513*
514                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
515                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
516     $                    N, M, -1 ) )
517                  WRKBL = MAX( WRKBL, 2*M+2*M*
518     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
519                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
520     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
521                  WRKBL = MAX( WRKBL, 2*M+M*
522     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
523                  MAXWRK = M*M + WRKBL
524                  MINWRK = 2*M + N
525                  MAXWRK = MAX( MINWRK, MAXWRK )
526               END IF
527            ELSE
528*
529*              Path 10t(N greater than M, but not much larger)
530*
531               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
532     $                  -1, -1 )
533               IF( WNTVS .OR. WNTVO )
534     $            MAXWRK = MAX( MAXWRK, 2*M+M*
535     $                     ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) )
536               IF( WNTVA )
537     $            MAXWRK = MAX( MAXWRK, 2*M+N*
538     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) )
539               IF( .NOT.WNTUN )
540     $            MAXWRK = MAX( MAXWRK, 2*M+( M-1 )*
541     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
542               MINWRK = 2*M + N
543               MAXWRK = MAX( MINWRK, MAXWRK )
544            END IF
545         END IF
546         WORK( 1 ) = MAXWRK
547      END IF
548*
549      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
550         INFO = -13
551      END IF
552      IF( INFO.NE.0 ) THEN
553         CALL XERBLA( 'CGESVD', -INFO )
554         RETURN
555      ELSE IF( LQUERY ) THEN
556         RETURN
557      END IF
558*
559*     Quick return if possible
560*
561      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
562         IF( LWORK.GE.1 )
563     $      WORK( 1 ) = ONE
564         RETURN
565      END IF
566*
567*     Get machine constants
568*
569      EPS = SLAMCH( 'P' )
570      SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
571      BIGNUM = ONE / SMLNUM
572*
573*     Scale A if max element outside range [SMLNUM,BIGNUM]
574*
575      ANRM = CLANGE( 'M', M, N, A, LDA, DUM )
576      ISCL = 0
577      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
578         ISCL = 1
579         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
580      ELSE IF( ANRM.GT.BIGNUM ) THEN
581         ISCL = 1
582         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
583      END IF
584*
585      IF( M.GE.N ) THEN
586*
587*        A has at least as many rows as columns. If A has sufficiently
588*        more rows than columns, first reduce using the QR
589*        decomposition (if sufficient workspace available)
590*
591         IF( M.GE.MNTHR ) THEN
592*
593            IF( WNTUN ) THEN
594*
595*              Path 1 (M much larger than N, JOBU='N')
596*              No left singular vectors to be computed
597*
598               ITAU = 1
599               IWORK = ITAU + N
600*
601*              Compute A=Q*R
602*              (CWorkspace: need 2*N, prefer N+N*NB)
603*              (RWorkspace: need 0)
604*
605               CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
606     $                      LWORK-IWORK+1, IERR )
607*
608*              Zero out below R
609*
610               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
611     $                      LDA )
612               IE = 1
613               ITAUQ = 1
614               ITAUP = ITAUQ + N
615               IWORK = ITAUP + N
616*
617*              Bidiagonalize R in A
618*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
619*              (RWorkspace: need N)
620*
621               CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
622     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
623     $                      IERR )
624               NCVT = 0
625               IF( WNTVO .OR. WNTVAS ) THEN
626*
627*                 If right singular vectors desired, generate P'.
628*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
629*                 (RWorkspace: 0)
630*
631                  CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
632     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
633                  NCVT = N
634               END IF
635               IRWORK = IE + N
636*
637*              Perform bidiagonal QR iteration, computing right
638*              singular vectors of A in A if desired
639*              (CWorkspace: 0)
640*              (RWorkspace: need BDSPAC)
641*
642               CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
643     $                      CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
644*
645*              If right singular vectors desired in VT, copy them there
646*
647               IF( WNTVAS )
648     $            CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT )
649*
650            ELSE IF( WNTUO .AND. WNTVN ) THEN
651*
652*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
653*              N left singular vectors to be overwritten on A and
654*              no right singular vectors to be computed
655*
656               IF( LWORK.GE.N*N+3*N ) THEN
657*
658*                 Sufficient workspace for a fast algorithm
659*
660                  IR = 1
661                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
662*
663*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
664*
665                     LDWRKU = LDA
666                     LDWRKR = LDA
667                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
668*
669*                    WORK(IU) is LDA by N, WORK(IR) is N by N
670*
671                     LDWRKU = LDA
672                     LDWRKR = N
673                  ELSE
674*
675*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
676*
677                     LDWRKU = ( LWORK-N*N ) / N
678                     LDWRKR = N
679                  END IF
680                  ITAU = IR + LDWRKR*N
681                  IWORK = ITAU + N
682*
683*                 Compute A=Q*R
684*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
685*                 (RWorkspace: 0)
686*
687                  CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
688     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
689*
690*                 Copy R to WORK(IR) and zero out below it
691*
692                  CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
693                  CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
694     $                         WORK( IR+1 ), LDWRKR )
695*
696*                 Generate Q in A
697*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
698*                 (RWorkspace: 0)
699*
700                  CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
701     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
702                  IE = 1
703                  ITAUQ = ITAU
704                  ITAUP = ITAUQ + N
705                  IWORK = ITAUP + N
706*
707*                 Bidiagonalize R in WORK(IR)
708*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
709*                 (RWorkspace: need N)
710*
711                  CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
712     $                         WORK( ITAUQ ), WORK( ITAUP ),
713     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
714*
715*                 Generate left vectors bidiagonalizing R
716*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
717*                 (RWorkspace: need 0)
718*
719                  CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
720     $                         WORK( ITAUQ ), WORK( IWORK ),
721     $                         LWORK-IWORK+1, IERR )
722                  IRWORK = IE + N
723*
724*                 Perform bidiagonal QR iteration, computing left
725*                 singular vectors of R in WORK(IR)
726*                 (CWorkspace: need N*N)
727*                 (RWorkspace: need BDSPAC)
728*
729                  CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
730     $                         WORK( IR ), LDWRKR, CDUM, 1,
731     $                         RWORK( IRWORK ), INFO )
732                  IU = ITAUQ
733*
734*                 Multiply Q in A by left singular vectors of R in
735*                 WORK(IR), storing result in WORK(IU) and copying to A
736*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
737*                 (RWorkspace: 0)
738*
739                  DO 10 I = 1, M, LDWRKU
740                     CHUNK = MIN( M-I+1, LDWRKU )
741                     CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
742     $                           LDA, WORK( IR ), LDWRKR, CZERO,
743     $                           WORK( IU ), LDWRKU )
744                     CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
745     $                            A( I, 1 ), LDA )
746   10             CONTINUE
747*
748               ELSE
749*
750*                 Insufficient workspace for a fast algorithm
751*
752                  IE = 1
753                  ITAUQ = 1
754                  ITAUP = ITAUQ + N
755                  IWORK = ITAUP + N
756*
757*                 Bidiagonalize A
758*                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
759*                 (RWorkspace: N)
760*
761                  CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
762     $                         WORK( ITAUQ ), WORK( ITAUP ),
763     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
764*
765*                 Generate left vectors bidiagonalizing A
766*                 (CWorkspace: need 3*N, prefer 2*N+N*NB)
767*                 (RWorkspace: 0)
768*
769                  CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
770     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
771                  IRWORK = IE + N
772*
773*                 Perform bidiagonal QR iteration, computing left
774*                 singular vectors of A in A
775*                 (CWorkspace: need 0)
776*                 (RWorkspace: need BDSPAC)
777*
778                  CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
779     $                         A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
780*
781               END IF
782*
783            ELSE IF( WNTUO .AND. WNTVAS ) THEN
784*
785*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
786*              N left singular vectors to be overwritten on A and
787*              N right singular vectors to be computed in VT
788*
789               IF( LWORK.GE.N*N+3*N ) THEN
790*
791*                 Sufficient workspace for a fast algorithm
792*
793                  IR = 1
794                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
795*
796*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
797*
798                     LDWRKU = LDA
799                     LDWRKR = LDA
800                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
801*
802*                    WORK(IU) is LDA by N and WORK(IR) is N by N
803*
804                     LDWRKU = LDA
805                     LDWRKR = N
806                  ELSE
807*
808*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
809*
810                     LDWRKU = ( LWORK-N*N ) / N
811                     LDWRKR = N
812                  END IF
813                  ITAU = IR + LDWRKR*N
814                  IWORK = ITAU + N
815*
816*                 Compute A=Q*R
817*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
818*                 (RWorkspace: 0)
819*
820                  CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
821     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
822*
823*                 Copy R to VT, zeroing out below it
824*
825                  CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
826                  CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
827     $                         LDVT )
828*
829*                 Generate Q in A
830*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
831*                 (RWorkspace: 0)
832*
833                  CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
834     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
835                  IE = 1
836                  ITAUQ = ITAU
837                  ITAUP = ITAUQ + N
838                  IWORK = ITAUP + N
839*
840*                 Bidiagonalize R in VT, copying result to WORK(IR)
841*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
842*                 (RWorkspace: need N)
843*
844                  CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
845     $                         WORK( ITAUQ ), WORK( ITAUP ),
846     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
847                  CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
848*
849*                 Generate left vectors bidiagonalizing R in WORK(IR)
850*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
851*                 (RWorkspace: 0)
852*
853                  CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
854     $                         WORK( ITAUQ ), WORK( IWORK ),
855     $                         LWORK-IWORK+1, IERR )
856*
857*                 Generate right vectors bidiagonalizing R in VT
858*                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
859*                 (RWorkspace: 0)
860*
861                  CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
862     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
863                  IRWORK = IE + N
864*
865*                 Perform bidiagonal QR iteration, computing left
866*                 singular vectors of R in WORK(IR) and computing right
867*                 singular vectors of R in VT
868*                 (CWorkspace: need N*N)
869*                 (RWorkspace: need BDSPAC)
870*
871                  CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
872     $                         LDVT, WORK( IR ), LDWRKR, CDUM, 1,
873     $                         RWORK( IRWORK ), INFO )
874                  IU = ITAUQ
875*
876*                 Multiply Q in A by left singular vectors of R in
877*                 WORK(IR), storing result in WORK(IU) and copying to A
878*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
879*                 (RWorkspace: 0)
880*
881                  DO 20 I = 1, M, LDWRKU
882                     CHUNK = MIN( M-I+1, LDWRKU )
883                     CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
884     $                           LDA, WORK( IR ), LDWRKR, CZERO,
885     $                           WORK( IU ), LDWRKU )
886                     CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
887     $                            A( I, 1 ), LDA )
888   20             CONTINUE
889*
890               ELSE
891*
892*                 Insufficient workspace for a fast algorithm
893*
894                  ITAU = 1
895                  IWORK = ITAU + N
896*
897*                 Compute A=Q*R
898*                 (CWorkspace: need 2*N, prefer N+N*NB)
899*                 (RWorkspace: 0)
900*
901                  CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
902     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
903*
904*                 Copy R to VT, zeroing out below it
905*
906                  CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
907                  CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
908     $                         LDVT )
909*
910*                 Generate Q in A
911*                 (CWorkspace: need 2*N, prefer N+N*NB)
912*                 (RWorkspace: 0)
913*
914                  CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
915     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
916                  IE = 1
917                  ITAUQ = ITAU
918                  ITAUP = ITAUQ + N
919                  IWORK = ITAUP + N
920*
921*                 Bidiagonalize R in VT
922*                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
923*                 (RWorkspace: N)
924*
925                  CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
926     $                         WORK( ITAUQ ), WORK( ITAUP ),
927     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
928*
929*                 Multiply Q in A by left vectors bidiagonalizing R
930*                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
931*                 (RWorkspace: 0)
932*
933                  CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
934     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
935     $                         LWORK-IWORK+1, IERR )
936*
937*                 Generate right vectors bidiagonalizing R in VT
938*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
939*                 (RWorkspace: 0)
940*
941                  CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
942     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
943                  IRWORK = IE + N
944*
945*                 Perform bidiagonal QR iteration, computing left
946*                 singular vectors of A in A and computing right
947*                 singular vectors of A in VT
948*                 (CWorkspace: 0)
949*                 (RWorkspace: need BDSPAC)
950*
951                  CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
952     $                         LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
953     $                         INFO )
954*
955               END IF
956*
957            ELSE IF( WNTUS ) THEN
958*
959               IF( WNTVN ) THEN
960*
961*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
962*                 N left singular vectors to be computed in U and
963*                 no right singular vectors to be computed
964*
965                  IF( LWORK.GE.N*N+3*N ) THEN
966*
967*                    Sufficient workspace for a fast algorithm
968*
969                     IR = 1
970                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
971*
972*                       WORK(IR) is LDA by N
973*
974                        LDWRKR = LDA
975                     ELSE
976*
977*                       WORK(IR) is N by N
978*
979                        LDWRKR = N
980                     END IF
981                     ITAU = IR + LDWRKR*N
982                     IWORK = ITAU + N
983*
984*                    Compute A=Q*R
985*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
986*                    (RWorkspace: 0)
987*
988                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
989     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
990*
991*                    Copy R to WORK(IR), zeroing out below it
992*
993                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
994     $                            LDWRKR )
995                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
996     $                            WORK( IR+1 ), LDWRKR )
997*
998*                    Generate Q in A
999*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1000*                    (RWorkspace: 0)
1001*
1002                     CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1003     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1004                     IE = 1
1005                     ITAUQ = ITAU
1006                     ITAUP = ITAUQ + N
1007                     IWORK = ITAUP + N
1008*
1009*                    Bidiagonalize R in WORK(IR)
1010*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1011*                    (RWorkspace: need N)
1012*
1013                     CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
1014     $                            RWORK( IE ), WORK( ITAUQ ),
1015     $                            WORK( ITAUP ), WORK( IWORK ),
1016     $                            LWORK-IWORK+1, IERR )
1017*
1018*                    Generate left vectors bidiagonalizing R in WORK(IR)
1019*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1020*                    (RWorkspace: 0)
1021*
1022                     CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1023     $                            WORK( ITAUQ ), WORK( IWORK ),
1024     $                            LWORK-IWORK+1, IERR )
1025                     IRWORK = IE + N
1026*
1027*                    Perform bidiagonal QR iteration, computing left
1028*                    singular vectors of R in WORK(IR)
1029*                    (CWorkspace: need N*N)
1030*                    (RWorkspace: need BDSPAC)
1031*
1032                     CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1033     $                            1, WORK( IR ), LDWRKR, CDUM, 1,
1034     $                            RWORK( IRWORK ), INFO )
1035*
1036*                    Multiply Q in A by left singular vectors of R in
1037*                    WORK(IR), storing result in U
1038*                    (CWorkspace: need N*N)
1039*                    (RWorkspace: 0)
1040*
1041                     CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1042     $                           WORK( IR ), LDWRKR, CZERO, U, LDU )
1043*
1044                  ELSE
1045*
1046*                    Insufficient workspace for a fast algorithm
1047*
1048                     ITAU = 1
1049                     IWORK = ITAU + N
1050*
1051*                    Compute A=Q*R, copying result to U
1052*                    (CWorkspace: need 2*N, prefer N+N*NB)
1053*                    (RWorkspace: 0)
1054*
1055                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1056     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1057                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1058*
1059*                    Generate Q in U
1060*                    (CWorkspace: need 2*N, prefer N+N*NB)
1061*                    (RWorkspace: 0)
1062*
1063                     CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1064     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1065                     IE = 1
1066                     ITAUQ = ITAU
1067                     ITAUP = ITAUQ + N
1068                     IWORK = ITAUP + N
1069*
1070*                    Zero out below R in A
1071*
1072                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1073     $                            A( 2, 1 ), LDA )
1074*
1075*                    Bidiagonalize R in A
1076*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1077*                    (RWorkspace: need N)
1078*
1079                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1080     $                            WORK( ITAUQ ), WORK( ITAUP ),
1081     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1082*
1083*                    Multiply Q in U by left vectors bidiagonalizing R
1084*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1085*                    (RWorkspace: 0)
1086*
1087                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1088     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1089     $                            LWORK-IWORK+1, IERR )
1090                     IRWORK = IE + N
1091*
1092*                    Perform bidiagonal QR iteration, computing left
1093*                    singular vectors of A in U
1094*                    (CWorkspace: 0)
1095*                    (RWorkspace: need BDSPAC)
1096*
1097                     CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1098     $                            1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1099     $                            INFO )
1100*
1101                  END IF
1102*
1103               ELSE IF( WNTVO ) THEN
1104*
1105*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1106*                 N left singular vectors to be computed in U and
1107*                 N right singular vectors to be overwritten on A
1108*
1109                  IF( LWORK.GE.2*N*N+3*N ) THEN
1110*
1111*                    Sufficient workspace for a fast algorithm
1112*
1113                     IU = 1
1114                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1115*
1116*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1117*
1118                        LDWRKU = LDA
1119                        IR = IU + LDWRKU*N
1120                        LDWRKR = LDA
1121                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1122*
1123*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1124*
1125                        LDWRKU = LDA
1126                        IR = IU + LDWRKU*N
1127                        LDWRKR = N
1128                     ELSE
1129*
1130*                       WORK(IU) is N by N and WORK(IR) is N by N
1131*
1132                        LDWRKU = N
1133                        IR = IU + LDWRKU*N
1134                        LDWRKR = N
1135                     END IF
1136                     ITAU = IR + LDWRKR*N
1137                     IWORK = ITAU + N
1138*
1139*                    Compute A=Q*R
1140*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1141*                    (RWorkspace: 0)
1142*
1143                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1144     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1145*
1146*                    Copy R to WORK(IU), zeroing out below it
1147*
1148                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1149     $                            LDWRKU )
1150                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1151     $                            WORK( IU+1 ), LDWRKU )
1152*
1153*                    Generate Q in A
1154*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1155*                    (RWorkspace: 0)
1156*
1157                     CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1158     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1159                     IE = 1
1160                     ITAUQ = ITAU
1161                     ITAUP = ITAUQ + N
1162                     IWORK = ITAUP + N
1163*
1164*                    Bidiagonalize R in WORK(IU), copying result to
1165*                    WORK(IR)
1166*                    (CWorkspace: need   2*N*N+3*N,
1167*                                 prefer 2*N*N+2*N+2*N*NB)
1168*                    (RWorkspace: need   N)
1169*
1170                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1171     $                            RWORK( IE ), WORK( ITAUQ ),
1172     $                            WORK( ITAUP ), WORK( IWORK ),
1173     $                            LWORK-IWORK+1, IERR )
1174                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1175     $                            WORK( IR ), LDWRKR )
1176*
1177*                    Generate left bidiagonalizing vectors in WORK(IU)
1178*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1179*                    (RWorkspace: 0)
1180*
1181                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1182     $                            WORK( ITAUQ ), WORK( IWORK ),
1183     $                            LWORK-IWORK+1, IERR )
1184*
1185*                    Generate right bidiagonalizing vectors in WORK(IR)
1186*                    (CWorkspace: need   2*N*N+3*N-1,
1187*                                 prefer 2*N*N+2*N+(N-1)*NB)
1188*                    (RWorkspace: 0)
1189*
1190                     CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1191     $                            WORK( ITAUP ), WORK( IWORK ),
1192     $                            LWORK-IWORK+1, IERR )
1193                     IRWORK = IE + N
1194*
1195*                    Perform bidiagonal QR iteration, computing left
1196*                    singular vectors of R in WORK(IU) and computing
1197*                    right singular vectors of R in WORK(IR)
1198*                    (CWorkspace: need 2*N*N)
1199*                    (RWorkspace: need BDSPAC)
1200*
1201                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1202     $                            WORK( IR ), LDWRKR, WORK( IU ),
1203     $                            LDWRKU, CDUM, 1, RWORK( IRWORK ),
1204     $                            INFO )
1205*
1206*                    Multiply Q in A by left singular vectors of R in
1207*                    WORK(IU), storing result in U
1208*                    (CWorkspace: need N*N)
1209*                    (RWorkspace: 0)
1210*
1211                     CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1212     $                           WORK( IU ), LDWRKU, CZERO, U, LDU )
1213*
1214*                    Copy right singular vectors of R to A
1215*                    (CWorkspace: need N*N)
1216*                    (RWorkspace: 0)
1217*
1218                     CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1219     $                            LDA )
1220*
1221                  ELSE
1222*
1223*                    Insufficient workspace for a fast algorithm
1224*
1225                     ITAU = 1
1226                     IWORK = ITAU + N
1227*
1228*                    Compute A=Q*R, copying result to U
1229*                    (CWorkspace: need 2*N, prefer N+N*NB)
1230*                    (RWorkspace: 0)
1231*
1232                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1233     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1234                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1235*
1236*                    Generate Q in U
1237*                    (CWorkspace: need 2*N, prefer N+N*NB)
1238*                    (RWorkspace: 0)
1239*
1240                     CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1241     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1242                     IE = 1
1243                     ITAUQ = ITAU
1244                     ITAUP = ITAUQ + N
1245                     IWORK = ITAUP + N
1246*
1247*                    Zero out below R in A
1248*
1249                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1250     $                            A( 2, 1 ), LDA )
1251*
1252*                    Bidiagonalize R in A
1253*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1254*                    (RWorkspace: need N)
1255*
1256                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1257     $                            WORK( ITAUQ ), WORK( ITAUP ),
1258     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1259*
1260*                    Multiply Q in U by left vectors bidiagonalizing R
1261*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1262*                    (RWorkspace: 0)
1263*
1264                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1265     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1266     $                            LWORK-IWORK+1, IERR )
1267*
1268*                    Generate right vectors bidiagonalizing R in A
1269*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1270*                    (RWorkspace: 0)
1271*
1272                     CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1273     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1274                     IRWORK = IE + N
1275*
1276*                    Perform bidiagonal QR iteration, computing left
1277*                    singular vectors of A in U and computing right
1278*                    singular vectors of A in A
1279*                    (CWorkspace: 0)
1280*                    (RWorkspace: need BDSPAC)
1281*
1282                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1283     $                            LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1284     $                            INFO )
1285*
1286                  END IF
1287*
1288               ELSE IF( WNTVAS ) THEN
1289*
1290*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1291*                         or 'A')
1292*                 N left singular vectors to be computed in U and
1293*                 N right singular vectors to be computed in VT
1294*
1295                  IF( LWORK.GE.N*N+3*N ) THEN
1296*
1297*                    Sufficient workspace for a fast algorithm
1298*
1299                     IU = 1
1300                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1301*
1302*                       WORK(IU) is LDA by N
1303*
1304                        LDWRKU = LDA
1305                     ELSE
1306*
1307*                       WORK(IU) is N by N
1308*
1309                        LDWRKU = N
1310                     END IF
1311                     ITAU = IU + LDWRKU*N
1312                     IWORK = ITAU + N
1313*
1314*                    Compute A=Q*R
1315*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1316*                    (RWorkspace: 0)
1317*
1318                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1319     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1320*
1321*                    Copy R to WORK(IU), zeroing out below it
1322*
1323                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1324     $                            LDWRKU )
1325                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1326     $                            WORK( IU+1 ), LDWRKU )
1327*
1328*                    Generate Q in A
1329*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1330*                    (RWorkspace: 0)
1331*
1332                     CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1333     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1334                     IE = 1
1335                     ITAUQ = ITAU
1336                     ITAUP = ITAUQ + N
1337                     IWORK = ITAUP + N
1338*
1339*                    Bidiagonalize R in WORK(IU), copying result to VT
1340*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1341*                    (RWorkspace: need N)
1342*
1343                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1344     $                            RWORK( IE ), WORK( ITAUQ ),
1345     $                            WORK( ITAUP ), WORK( IWORK ),
1346     $                            LWORK-IWORK+1, IERR )
1347                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1348     $                            LDVT )
1349*
1350*                    Generate left bidiagonalizing vectors in WORK(IU)
1351*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1352*                    (RWorkspace: 0)
1353*
1354                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1355     $                            WORK( ITAUQ ), WORK( IWORK ),
1356     $                            LWORK-IWORK+1, IERR )
1357*
1358*                    Generate right bidiagonalizing vectors in VT
1359*                    (CWorkspace: need   N*N+3*N-1,
1360*                                 prefer N*N+2*N+(N-1)*NB)
1361*                    (RWorkspace: 0)
1362*
1363                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1364     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1365                     IRWORK = IE + N
1366*
1367*                    Perform bidiagonal QR iteration, computing left
1368*                    singular vectors of R in WORK(IU) and computing
1369*                    right singular vectors of R in VT
1370*                    (CWorkspace: need N*N)
1371*                    (RWorkspace: need BDSPAC)
1372*
1373                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1374     $                            LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1375     $                            RWORK( IRWORK ), INFO )
1376*
1377*                    Multiply Q in A by left singular vectors of R in
1378*                    WORK(IU), storing result in U
1379*                    (CWorkspace: need N*N)
1380*                    (RWorkspace: 0)
1381*
1382                     CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1383     $                           WORK( IU ), LDWRKU, CZERO, U, LDU )
1384*
1385                  ELSE
1386*
1387*                    Insufficient workspace for a fast algorithm
1388*
1389                     ITAU = 1
1390                     IWORK = ITAU + N
1391*
1392*                    Compute A=Q*R, copying result to U
1393*                    (CWorkspace: need 2*N, prefer N+N*NB)
1394*                    (RWorkspace: 0)
1395*
1396                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1397     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1398                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1399*
1400*                    Generate Q in U
1401*                    (CWorkspace: need 2*N, prefer N+N*NB)
1402*                    (RWorkspace: 0)
1403*
1404                     CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1405     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1406*
1407*                    Copy R to VT, zeroing out below it
1408*
1409                     CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
1410                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1411     $                            VT( 2, 1 ), LDVT )
1412                     IE = 1
1413                     ITAUQ = ITAU
1414                     ITAUP = ITAUQ + N
1415                     IWORK = ITAUP + N
1416*
1417*                    Bidiagonalize R in VT
1418*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1419*                    (RWorkspace: need N)
1420*
1421                     CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1422     $                            WORK( ITAUQ ), WORK( ITAUP ),
1423     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1424*
1425*                    Multiply Q in U by left bidiagonalizing vectors
1426*                    in VT
1427*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1428*                    (RWorkspace: 0)
1429*
1430                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1431     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1432     $                            LWORK-IWORK+1, IERR )
1433*
1434*                    Generate right bidiagonalizing vectors in VT
1435*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1436*                    (RWorkspace: 0)
1437*
1438                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1439     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1440                     IRWORK = IE + N
1441*
1442*                    Perform bidiagonal QR iteration, computing left
1443*                    singular vectors of A in U and computing right
1444*                    singular vectors of A in VT
1445*                    (CWorkspace: 0)
1446*                    (RWorkspace: need BDSPAC)
1447*
1448                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1449     $                            LDVT, U, LDU, CDUM, 1,
1450     $                            RWORK( IRWORK ), INFO )
1451*
1452                  END IF
1453*
1454               END IF
1455*
1456            ELSE IF( WNTUA ) THEN
1457*
1458               IF( WNTVN ) THEN
1459*
1460*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1461*                 M left singular vectors to be computed in U and
1462*                 no right singular vectors to be computed
1463*
1464                  IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1465*
1466*                    Sufficient workspace for a fast algorithm
1467*
1468                     IR = 1
1469                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1470*
1471*                       WORK(IR) is LDA by N
1472*
1473                        LDWRKR = LDA
1474                     ELSE
1475*
1476*                       WORK(IR) is N by N
1477*
1478                        LDWRKR = N
1479                     END IF
1480                     ITAU = IR + LDWRKR*N
1481                     IWORK = ITAU + N
1482*
1483*                    Compute A=Q*R, copying result to U
1484*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1485*                    (RWorkspace: 0)
1486*
1487                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1488     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1489                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1490*
1491*                    Copy R to WORK(IR), zeroing out below it
1492*
1493                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
1494     $                            LDWRKR )
1495                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1496     $                            WORK( IR+1 ), LDWRKR )
1497*
1498*                    Generate Q in U
1499*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1500*                    (RWorkspace: 0)
1501*
1502                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1503     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1504                     IE = 1
1505                     ITAUQ = ITAU
1506                     ITAUP = ITAUQ + N
1507                     IWORK = ITAUP + N
1508*
1509*                    Bidiagonalize R in WORK(IR)
1510*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1511*                    (RWorkspace: need N)
1512*
1513                     CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
1514     $                            RWORK( IE ), WORK( ITAUQ ),
1515     $                            WORK( ITAUP ), WORK( IWORK ),
1516     $                            LWORK-IWORK+1, IERR )
1517*
1518*                    Generate left bidiagonalizing vectors in WORK(IR)
1519*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1520*                    (RWorkspace: 0)
1521*
1522                     CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1523     $                            WORK( ITAUQ ), WORK( IWORK ),
1524     $                            LWORK-IWORK+1, IERR )
1525                     IRWORK = IE + N
1526*
1527*                    Perform bidiagonal QR iteration, computing left
1528*                    singular vectors of R in WORK(IR)
1529*                    (CWorkspace: need N*N)
1530*                    (RWorkspace: need BDSPAC)
1531*
1532                     CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1533     $                            1, WORK( IR ), LDWRKR, CDUM, 1,
1534     $                            RWORK( IRWORK ), INFO )
1535*
1536*                    Multiply Q in U by left singular vectors of R in
1537*                    WORK(IR), storing result in A
1538*                    (CWorkspace: need N*N)
1539*                    (RWorkspace: 0)
1540*
1541                     CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1542     $                           WORK( IR ), LDWRKR, CZERO, A, LDA )
1543*
1544*                    Copy left singular vectors of A from A to U
1545*
1546                     CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1547*
1548                  ELSE
1549*
1550*                    Insufficient workspace for a fast algorithm
1551*
1552                     ITAU = 1
1553                     IWORK = ITAU + N
1554*
1555*                    Compute A=Q*R, copying result to U
1556*                    (CWorkspace: need 2*N, prefer N+N*NB)
1557*                    (RWorkspace: 0)
1558*
1559                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1560     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1561                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1562*
1563*                    Generate Q in U
1564*                    (CWorkspace: need N+M, prefer N+M*NB)
1565*                    (RWorkspace: 0)
1566*
1567                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1568     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1569                     IE = 1
1570                     ITAUQ = ITAU
1571                     ITAUP = ITAUQ + N
1572                     IWORK = ITAUP + N
1573*
1574*                    Zero out below R in A
1575*
1576                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1577     $                            A( 2, 1 ), LDA )
1578*
1579*                    Bidiagonalize R in A
1580*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1581*                    (RWorkspace: need N)
1582*
1583                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1584     $                            WORK( ITAUQ ), WORK( ITAUP ),
1585     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1586*
1587*                    Multiply Q in U by left bidiagonalizing vectors
1588*                    in A
1589*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1590*                    (RWorkspace: 0)
1591*
1592                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1593     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1594     $                            LWORK-IWORK+1, IERR )
1595                     IRWORK = IE + N
1596*
1597*                    Perform bidiagonal QR iteration, computing left
1598*                    singular vectors of A in U
1599*                    (CWorkspace: 0)
1600*                    (RWorkspace: need BDSPAC)
1601*
1602                     CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1603     $                            1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1604     $                            INFO )
1605*
1606                  END IF
1607*
1608               ELSE IF( WNTVO ) THEN
1609*
1610*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1611*                 M left singular vectors to be computed in U and
1612*                 N right singular vectors to be overwritten on A
1613*
1614                  IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
1615*
1616*                    Sufficient workspace for a fast algorithm
1617*
1618                     IU = 1
1619                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1620*
1621*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1622*
1623                        LDWRKU = LDA
1624                        IR = IU + LDWRKU*N
1625                        LDWRKR = LDA
1626                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1627*
1628*                       WORK(IU) is LDA by N and WORK(IR) is N by N
1629*
1630                        LDWRKU = LDA
1631                        IR = IU + LDWRKU*N
1632                        LDWRKR = N
1633                     ELSE
1634*
1635*                       WORK(IU) is N by N and WORK(IR) is N by N
1636*
1637                        LDWRKU = N
1638                        IR = IU + LDWRKU*N
1639                        LDWRKR = N
1640                     END IF
1641                     ITAU = IR + LDWRKR*N
1642                     IWORK = ITAU + N
1643*
1644*                    Compute A=Q*R, copying result to U
1645*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1646*                    (RWorkspace: 0)
1647*
1648                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1649     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1650                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1651*
1652*                    Generate Q in U
1653*                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1654*                    (RWorkspace: 0)
1655*
1656                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1657     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1658*
1659*                    Copy R to WORK(IU), zeroing out below it
1660*
1661                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1662     $                            LDWRKU )
1663                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1664     $                            WORK( IU+1 ), LDWRKU )
1665                     IE = 1
1666                     ITAUQ = ITAU
1667                     ITAUP = ITAUQ + N
1668                     IWORK = ITAUP + N
1669*
1670*                    Bidiagonalize R in WORK(IU), copying result to
1671*                    WORK(IR)
1672*                    (CWorkspace: need   2*N*N+3*N,
1673*                                 prefer 2*N*N+2*N+2*N*NB)
1674*                    (RWorkspace: need   N)
1675*
1676                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1677     $                            RWORK( IE ), WORK( ITAUQ ),
1678     $                            WORK( ITAUP ), WORK( IWORK ),
1679     $                            LWORK-IWORK+1, IERR )
1680                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1681     $                            WORK( IR ), LDWRKR )
1682*
1683*                    Generate left bidiagonalizing vectors in WORK(IU)
1684*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1685*                    (RWorkspace: 0)
1686*
1687                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1688     $                            WORK( ITAUQ ), WORK( IWORK ),
1689     $                            LWORK-IWORK+1, IERR )
1690*
1691*                    Generate right bidiagonalizing vectors in WORK(IR)
1692*                    (CWorkspace: need   2*N*N+3*N-1,
1693*                                 prefer 2*N*N+2*N+(N-1)*NB)
1694*                    (RWorkspace: 0)
1695*
1696                     CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1697     $                            WORK( ITAUP ), WORK( IWORK ),
1698     $                            LWORK-IWORK+1, IERR )
1699                     IRWORK = IE + N
1700*
1701*                    Perform bidiagonal QR iteration, computing left
1702*                    singular vectors of R in WORK(IU) and computing
1703*                    right singular vectors of R in WORK(IR)
1704*                    (CWorkspace: need 2*N*N)
1705*                    (RWorkspace: need BDSPAC)
1706*
1707                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1708     $                            WORK( IR ), LDWRKR, WORK( IU ),
1709     $                            LDWRKU, CDUM, 1, RWORK( IRWORK ),
1710     $                            INFO )
1711*
1712*                    Multiply Q in U by left singular vectors of R in
1713*                    WORK(IU), storing result in A
1714*                    (CWorkspace: need N*N)
1715*                    (RWorkspace: 0)
1716*
1717                     CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1718     $                           WORK( IU ), LDWRKU, CZERO, A, LDA )
1719*
1720*                    Copy left singular vectors of A from A to U
1721*
1722                     CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1723*
1724*                    Copy right singular vectors of R from WORK(IR) to A
1725*
1726                     CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1727     $                            LDA )
1728*
1729                  ELSE
1730*
1731*                    Insufficient workspace for a fast algorithm
1732*
1733                     ITAU = 1
1734                     IWORK = ITAU + N
1735*
1736*                    Compute A=Q*R, copying result to U
1737*                    (CWorkspace: need 2*N, prefer N+N*NB)
1738*                    (RWorkspace: 0)
1739*
1740                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1741     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1742                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1743*
1744*                    Generate Q in U
1745*                    (CWorkspace: need N+M, prefer N+M*NB)
1746*                    (RWorkspace: 0)
1747*
1748                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1749     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1750                     IE = 1
1751                     ITAUQ = ITAU
1752                     ITAUP = ITAUQ + N
1753                     IWORK = ITAUP + N
1754*
1755*                    Zero out below R in A
1756*
1757                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1758     $                            A( 2, 1 ), LDA )
1759*
1760*                    Bidiagonalize R in A
1761*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1762*                    (RWorkspace: need N)
1763*
1764                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1765     $                            WORK( ITAUQ ), WORK( ITAUP ),
1766     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1767*
1768*                    Multiply Q in U by left bidiagonalizing vectors
1769*                    in A
1770*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1771*                    (RWorkspace: 0)
1772*
1773                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1774     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1775     $                            LWORK-IWORK+1, IERR )
1776*
1777*                    Generate right bidiagonalizing vectors in A
1778*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1779*                    (RWorkspace: 0)
1780*
1781                     CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1782     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1783                     IRWORK = IE + N
1784*
1785*                    Perform bidiagonal QR iteration, computing left
1786*                    singular vectors of A in U and computing right
1787*                    singular vectors of A in A
1788*                    (CWorkspace: 0)
1789*                    (RWorkspace: need BDSPAC)
1790*
1791                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1792     $                            LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1793     $                            INFO )
1794*
1795                  END IF
1796*
1797               ELSE IF( WNTVAS ) THEN
1798*
1799*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1800*                         or 'A')
1801*                 M left singular vectors to be computed in U and
1802*                 N right singular vectors to be computed in VT
1803*
1804                  IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1805*
1806*                    Sufficient workspace for a fast algorithm
1807*
1808                     IU = 1
1809                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
1810*
1811*                       WORK(IU) is LDA by N
1812*
1813                        LDWRKU = LDA
1814                     ELSE
1815*
1816*                       WORK(IU) is N by N
1817*
1818                        LDWRKU = N
1819                     END IF
1820                     ITAU = IU + LDWRKU*N
1821                     IWORK = ITAU + N
1822*
1823*                    Compute A=Q*R, copying result to U
1824*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1825*                    (RWorkspace: 0)
1826*
1827                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1828     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1829                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1830*
1831*                    Generate Q in U
1832*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1833*                    (RWorkspace: 0)
1834*
1835                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1836     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1837*
1838*                    Copy R to WORK(IU), zeroing out below it
1839*
1840                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1841     $                            LDWRKU )
1842                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1843     $                            WORK( IU+1 ), LDWRKU )
1844                     IE = 1
1845                     ITAUQ = ITAU
1846                     ITAUP = ITAUQ + N
1847                     IWORK = ITAUP + N
1848*
1849*                    Bidiagonalize R in WORK(IU), copying result to VT
1850*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1851*                    (RWorkspace: need N)
1852*
1853                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1854     $                            RWORK( IE ), WORK( ITAUQ ),
1855     $                            WORK( ITAUP ), WORK( IWORK ),
1856     $                            LWORK-IWORK+1, IERR )
1857                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1858     $                            LDVT )
1859*
1860*                    Generate left bidiagonalizing vectors in WORK(IU)
1861*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1862*                    (RWorkspace: 0)
1863*
1864                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1865     $                            WORK( ITAUQ ), WORK( IWORK ),
1866     $                            LWORK-IWORK+1, IERR )
1867*
1868*                    Generate right bidiagonalizing vectors in VT
1869*                    (CWorkspace: need   N*N+3*N-1,
1870*                                 prefer N*N+2*N+(N-1)*NB)
1871*                    (RWorkspace: need   0)
1872*
1873                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1874     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1875                     IRWORK = IE + N
1876*
1877*                    Perform bidiagonal QR iteration, computing left
1878*                    singular vectors of R in WORK(IU) and computing
1879*                    right singular vectors of R in VT
1880*                    (CWorkspace: need N*N)
1881*                    (RWorkspace: need BDSPAC)
1882*
1883                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1884     $                            LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1885     $                            RWORK( IRWORK ), INFO )
1886*
1887*                    Multiply Q in U by left singular vectors of R in
1888*                    WORK(IU), storing result in A
1889*                    (CWorkspace: need N*N)
1890*                    (RWorkspace: 0)
1891*
1892                     CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1893     $                           WORK( IU ), LDWRKU, CZERO, A, LDA )
1894*
1895*                    Copy left singular vectors of A from A to U
1896*
1897                     CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1898*
1899                  ELSE
1900*
1901*                    Insufficient workspace for a fast algorithm
1902*
1903                     ITAU = 1
1904                     IWORK = ITAU + N
1905*
1906*                    Compute A=Q*R, copying result to U
1907*                    (CWorkspace: need 2*N, prefer N+N*NB)
1908*                    (RWorkspace: 0)
1909*
1910                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1911     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1912                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1913*
1914*                    Generate Q in U
1915*                    (CWorkspace: need N+M, prefer N+M*NB)
1916*                    (RWorkspace: 0)
1917*
1918                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1919     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1920*
1921*                    Copy R from A to VT, zeroing out below it
1922*
1923                     CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
1924                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1925     $                            VT( 2, 1 ), LDVT )
1926                     IE = 1
1927                     ITAUQ = ITAU
1928                     ITAUP = ITAUQ + N
1929                     IWORK = ITAUP + N
1930*
1931*                    Bidiagonalize R in VT
1932*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1933*                    (RWorkspace: need N)
1934*
1935                     CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1936     $                            WORK( ITAUQ ), WORK( ITAUP ),
1937     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1938*
1939*                    Multiply Q in U by left bidiagonalizing vectors
1940*                    in VT
1941*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1942*                    (RWorkspace: 0)
1943*
1944                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1945     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1946     $                            LWORK-IWORK+1, IERR )
1947*
1948*                    Generate right bidiagonalizing vectors in VT
1949*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1950*                    (RWorkspace: 0)
1951*
1952                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1953     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1954                     IRWORK = IE + N
1955*
1956*                    Perform bidiagonal QR iteration, computing left
1957*                    singular vectors of A in U and computing right
1958*                    singular vectors of A in VT
1959*                    (CWorkspace: 0)
1960*                    (RWorkspace: need BDSPAC)
1961*
1962                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1963     $                            LDVT, U, LDU, CDUM, 1,
1964     $                            RWORK( IRWORK ), INFO )
1965*
1966                  END IF
1967*
1968               END IF
1969*
1970            END IF
1971*
1972         ELSE
1973*
1974*           M .LT. MNTHR
1975*
1976*           Path 10 (M at least N, but not much larger)
1977*           Reduce to bidiagonal form without QR decomposition
1978*
1979            IE = 1
1980            ITAUQ = 1
1981            ITAUP = ITAUQ + N
1982            IWORK = ITAUP + N
1983*
1984*           Bidiagonalize A
1985*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
1986*           (RWorkspace: need N)
1987*
1988            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
1989     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1990     $                   IERR )
1991            IF( WNTUAS ) THEN
1992*
1993*              If left singular vectors desired in U, copy result to U
1994*              and generate left bidiagonalizing vectors in U
1995*              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
1996*              (RWorkspace: 0)
1997*
1998               CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1999               IF( WNTUS )
2000     $            NCU = N
2001               IF( WNTUA )
2002     $            NCU = M
2003               CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
2004     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2005            END IF
2006            IF( WNTVAS ) THEN
2007*
2008*              If right singular vectors desired in VT, copy result to
2009*              VT and generate right bidiagonalizing vectors in VT
2010*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2011*              (RWorkspace: 0)
2012*
2013               CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
2014               CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2015     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2016            END IF
2017            IF( WNTUO ) THEN
2018*
2019*              If left singular vectors desired in A, generate left
2020*              bidiagonalizing vectors in A
2021*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
2022*              (RWorkspace: 0)
2023*
2024               CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2025     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2026            END IF
2027            IF( WNTVO ) THEN
2028*
2029*              If right singular vectors desired in A, generate right
2030*              bidiagonalizing vectors in A
2031*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2032*              (RWorkspace: 0)
2033*
2034               CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2035     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
2036            END IF
2037            IRWORK = IE + N
2038            IF( WNTUAS .OR. WNTUO )
2039     $         NRU = M
2040            IF( WNTUN )
2041     $         NRU = 0
2042            IF( WNTVAS .OR. WNTVO )
2043     $         NCVT = N
2044            IF( WNTVN )
2045     $         NCVT = 0
2046            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2047*
2048*              Perform bidiagonal QR iteration, if desired, computing
2049*              left singular vectors in U and computing right singular
2050*              vectors in VT
2051*              (CWorkspace: 0)
2052*              (RWorkspace: need BDSPAC)
2053*
2054               CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2055     $                      LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
2056     $                      INFO )
2057            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2058*
2059*              Perform bidiagonal QR iteration, if desired, computing
2060*              left singular vectors in U and computing right singular
2061*              vectors in A
2062*              (CWorkspace: 0)
2063*              (RWorkspace: need BDSPAC)
2064*
2065               CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
2066     $                      LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
2067     $                      INFO )
2068            ELSE
2069*
2070*              Perform bidiagonal QR iteration, if desired, computing
2071*              left singular vectors in A and computing right singular
2072*              vectors in VT
2073*              (CWorkspace: 0)
2074*              (RWorkspace: need BDSPAC)
2075*
2076               CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2077     $                      LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
2078     $                      INFO )
2079            END IF
2080*
2081         END IF
2082*
2083      ELSE
2084*
2085*        A has more columns than rows. If A has sufficiently more
2086*        columns than rows, first reduce using the LQ decomposition (if
2087*        sufficient workspace available)
2088*
2089         IF( N.GE.MNTHR ) THEN
2090*
2091            IF( WNTVN ) THEN
2092*
2093*              Path 1t(N much larger than M, JOBVT='N')
2094*              No right singular vectors to be computed
2095*
2096               ITAU = 1
2097               IWORK = ITAU + M
2098*
2099*              Compute A=L*Q
2100*              (CWorkspace: need 2*M, prefer M+M*NB)
2101*              (RWorkspace: 0)
2102*
2103               CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2104     $                      LWORK-IWORK+1, IERR )
2105*
2106*              Zero out above L
2107*
2108               CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
2109     $                      LDA )
2110               IE = 1
2111               ITAUQ = 1
2112               ITAUP = ITAUQ + M
2113               IWORK = ITAUP + M
2114*
2115*              Bidiagonalize L in A
2116*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2117*              (RWorkspace: need M)
2118*
2119               CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2120     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2121     $                      IERR )
2122               IF( WNTUO .OR. WNTUAS ) THEN
2123*
2124*                 If left singular vectors desired, generate Q
2125*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
2126*                 (RWorkspace: 0)
2127*
2128                  CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2129     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2130               END IF
2131               IRWORK = IE + M
2132               NRU = 0
2133               IF( WNTUO .OR. WNTUAS )
2134     $            NRU = M
2135*
2136*              Perform bidiagonal QR iteration, computing left singular
2137*              vectors of A in A if desired
2138*              (CWorkspace: 0)
2139*              (RWorkspace: need BDSPAC)
2140*
2141               CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
2142     $                      A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
2143*
2144*              If left singular vectors desired in U, copy them there
2145*
2146               IF( WNTUAS )
2147     $            CALL CLACPY( 'F', M, M, A, LDA, U, LDU )
2148*
2149            ELSE IF( WNTVO .AND. WNTUN ) THEN
2150*
2151*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2152*              M right singular vectors to be overwritten on A and
2153*              no left singular vectors to be computed
2154*
2155               IF( LWORK.GE.M*M+3*M ) THEN
2156*
2157*                 Sufficient workspace for a fast algorithm
2158*
2159                  IR = 1
2160                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2161*
2162*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2163*
2164                     LDWRKU = LDA
2165                     CHUNK = N
2166                     LDWRKR = LDA
2167                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2168*
2169*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2170*
2171                     LDWRKU = LDA
2172                     CHUNK = N
2173                     LDWRKR = M
2174                  ELSE
2175*
2176*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2177*
2178                     LDWRKU = M
2179                     CHUNK = ( LWORK-M*M ) / M
2180                     LDWRKR = M
2181                  END IF
2182                  ITAU = IR + LDWRKR*M
2183                  IWORK = ITAU + M
2184*
2185*                 Compute A=L*Q
2186*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2187*                 (RWorkspace: 0)
2188*
2189                  CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2190     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2191*
2192*                 Copy L to WORK(IR) and zero out above it
2193*
2194                  CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2195                  CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2196     $                         WORK( IR+LDWRKR ), LDWRKR )
2197*
2198*                 Generate Q in A
2199*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2200*                 (RWorkspace: 0)
2201*
2202                  CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2203     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2204                  IE = 1
2205                  ITAUQ = ITAU
2206                  ITAUP = ITAUQ + M
2207                  IWORK = ITAUP + M
2208*
2209*                 Bidiagonalize L in WORK(IR)
2210*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2211*                 (RWorkspace: need M)
2212*
2213                  CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
2214     $                         WORK( ITAUQ ), WORK( ITAUP ),
2215     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2216*
2217*                 Generate right vectors bidiagonalizing L
2218*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2219*                 (RWorkspace: 0)
2220*
2221                  CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2222     $                         WORK( ITAUP ), WORK( IWORK ),
2223     $                         LWORK-IWORK+1, IERR )
2224                  IRWORK = IE + M
2225*
2226*                 Perform bidiagonal QR iteration, computing right
2227*                 singular vectors of L in WORK(IR)
2228*                 (CWorkspace: need M*M)
2229*                 (RWorkspace: need BDSPAC)
2230*
2231                  CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2232     $                         WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2233     $                         RWORK( IRWORK ), INFO )
2234                  IU = ITAUQ
2235*
2236*                 Multiply right singular vectors of L in WORK(IR) by Q
2237*                 in A, storing result in WORK(IU) and copying to A
2238*                 (CWorkspace: need M*M+M, prefer M*M+M*N)
2239*                 (RWorkspace: 0)
2240*
2241                  DO 30 I = 1, N, CHUNK
2242                     BLK = MIN( N-I+1, CHUNK )
2243                     CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2244     $                           LDWRKR, A( 1, I ), LDA, CZERO,
2245     $                           WORK( IU ), LDWRKU )
2246                     CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2247     $                            A( 1, I ), LDA )
2248   30             CONTINUE
2249*
2250               ELSE
2251*
2252*                 Insufficient workspace for a fast algorithm
2253*
2254                  IE = 1
2255                  ITAUQ = 1
2256                  ITAUP = ITAUQ + M
2257                  IWORK = ITAUP + M
2258*
2259*                 Bidiagonalize A
2260*                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2261*                 (RWorkspace: need M)
2262*
2263                  CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
2264     $                         WORK( ITAUQ ), WORK( ITAUP ),
2265     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2266*
2267*                 Generate right vectors bidiagonalizing A
2268*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
2269*                 (RWorkspace: 0)
2270*
2271                  CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2272     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2273                  IRWORK = IE + M
2274*
2275*                 Perform bidiagonal QR iteration, computing right
2276*                 singular vectors of A in A
2277*                 (CWorkspace: 0)
2278*                 (RWorkspace: need BDSPAC)
2279*
2280                  CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
2281     $                         CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
2282*
2283               END IF
2284*
2285            ELSE IF( WNTVO .AND. WNTUAS ) THEN
2286*
2287*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2288*              M right singular vectors to be overwritten on A and
2289*              M left singular vectors to be computed in U
2290*
2291               IF( LWORK.GE.M*M+3*M ) THEN
2292*
2293*                 Sufficient workspace for a fast algorithm
2294*
2295                  IR = 1
2296                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2297*
2298*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2299*
2300                     LDWRKU = LDA
2301                     CHUNK = N
2302                     LDWRKR = LDA
2303                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2304*
2305*                    WORK(IU) is LDA by N and WORK(IR) is M by M
2306*
2307                     LDWRKU = LDA
2308                     CHUNK = N
2309                     LDWRKR = M
2310                  ELSE
2311*
2312*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2313*
2314                     LDWRKU = M
2315                     CHUNK = ( LWORK-M*M ) / M
2316                     LDWRKR = M
2317                  END IF
2318                  ITAU = IR + LDWRKR*M
2319                  IWORK = ITAU + M
2320*
2321*                 Compute A=L*Q
2322*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2323*                 (RWorkspace: 0)
2324*
2325                  CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2326     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2327*
2328*                 Copy L to U, zeroing about above it
2329*
2330                  CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2331                  CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2332     $                         LDU )
2333*
2334*                 Generate Q in A
2335*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2336*                 (RWorkspace: 0)
2337*
2338                  CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2339     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2340                  IE = 1
2341                  ITAUQ = ITAU
2342                  ITAUP = ITAUQ + M
2343                  IWORK = ITAUP + M
2344*
2345*                 Bidiagonalize L in U, copying result to WORK(IR)
2346*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2347*                 (RWorkspace: need M)
2348*
2349                  CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2350     $                         WORK( ITAUQ ), WORK( ITAUP ),
2351     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2352                  CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2353*
2354*                 Generate right vectors bidiagonalizing L in WORK(IR)
2355*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2356*                 (RWorkspace: 0)
2357*
2358                  CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2359     $                         WORK( ITAUP ), WORK( IWORK ),
2360     $                         LWORK-IWORK+1, IERR )
2361*
2362*                 Generate left vectors bidiagonalizing L in U
2363*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2364*                 (RWorkspace: 0)
2365*
2366                  CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2367     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2368                  IRWORK = IE + M
2369*
2370*                 Perform bidiagonal QR iteration, computing left
2371*                 singular vectors of L in U, and computing right
2372*                 singular vectors of L in WORK(IR)
2373*                 (CWorkspace: need M*M)
2374*                 (RWorkspace: need BDSPAC)
2375*
2376                  CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2377     $                         WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
2378     $                         RWORK( IRWORK ), INFO )
2379                  IU = ITAUQ
2380*
2381*                 Multiply right singular vectors of L in WORK(IR) by Q
2382*                 in A, storing result in WORK(IU) and copying to A
2383*                 (CWorkspace: need M*M+M, prefer M*M+M*N))
2384*                 (RWorkspace: 0)
2385*
2386                  DO 40 I = 1, N, CHUNK
2387                     BLK = MIN( N-I+1, CHUNK )
2388                     CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2389     $                           LDWRKR, A( 1, I ), LDA, CZERO,
2390     $                           WORK( IU ), LDWRKU )
2391                     CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2392     $                            A( 1, I ), LDA )
2393   40             CONTINUE
2394*
2395               ELSE
2396*
2397*                 Insufficient workspace for a fast algorithm
2398*
2399                  ITAU = 1
2400                  IWORK = ITAU + M
2401*
2402*                 Compute A=L*Q
2403*                 (CWorkspace: need 2*M, prefer M+M*NB)
2404*                 (RWorkspace: 0)
2405*
2406                  CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2407     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2408*
2409*                 Copy L to U, zeroing out above it
2410*
2411                  CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2412                  CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2413     $                         LDU )
2414*
2415*                 Generate Q in A
2416*                 (CWorkspace: need 2*M, prefer M+M*NB)
2417*                 (RWorkspace: 0)
2418*
2419                  CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2420     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2421                  IE = 1
2422                  ITAUQ = ITAU
2423                  ITAUP = ITAUQ + M
2424                  IWORK = ITAUP + M
2425*
2426*                 Bidiagonalize L in U
2427*                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2428*                 (RWorkspace: need M)
2429*
2430                  CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2431     $                         WORK( ITAUQ ), WORK( ITAUP ),
2432     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2433*
2434*                 Multiply right vectors bidiagonalizing L by Q in A
2435*                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2436*                 (RWorkspace: 0)
2437*
2438                  CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2439     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2440     $                         LWORK-IWORK+1, IERR )
2441*
2442*                 Generate left vectors bidiagonalizing L in U
2443*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
2444*                 (RWorkspace: 0)
2445*
2446                  CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2447     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2448                  IRWORK = IE + M
2449*
2450*                 Perform bidiagonal QR iteration, computing left
2451*                 singular vectors of A in U and computing right
2452*                 singular vectors of A in A
2453*                 (CWorkspace: 0)
2454*                 (RWorkspace: need BDSPAC)
2455*
2456                  CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
2457     $                         U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
2458*
2459               END IF
2460*
2461            ELSE IF( WNTVS ) THEN
2462*
2463               IF( WNTUN ) THEN
2464*
2465*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2466*                 M right singular vectors to be computed in VT and
2467*                 no left singular vectors to be computed
2468*
2469                  IF( LWORK.GE.M*M+3*M ) THEN
2470*
2471*                    Sufficient workspace for a fast algorithm
2472*
2473                     IR = 1
2474                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2475*
2476*                       WORK(IR) is LDA by M
2477*
2478                        LDWRKR = LDA
2479                     ELSE
2480*
2481*                       WORK(IR) is M by M
2482*
2483                        LDWRKR = M
2484                     END IF
2485                     ITAU = IR + LDWRKR*M
2486                     IWORK = ITAU + M
2487*
2488*                    Compute A=L*Q
2489*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2490*                    (RWorkspace: 0)
2491*
2492                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2493     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2494*
2495*                    Copy L to WORK(IR), zeroing out above it
2496*
2497                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
2498     $                            LDWRKR )
2499                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2500     $                            WORK( IR+LDWRKR ), LDWRKR )
2501*
2502*                    Generate Q in A
2503*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2504*                    (RWorkspace: 0)
2505*
2506                     CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2507     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2508                     IE = 1
2509                     ITAUQ = ITAU
2510                     ITAUP = ITAUQ + M
2511                     IWORK = ITAUP + M
2512*
2513*                    Bidiagonalize L in WORK(IR)
2514*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2515*                    (RWorkspace: need M)
2516*
2517                     CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
2518     $                            RWORK( IE ), WORK( ITAUQ ),
2519     $                            WORK( ITAUP ), WORK( IWORK ),
2520     $                            LWORK-IWORK+1, IERR )
2521*
2522*                    Generate right vectors bidiagonalizing L in
2523*                    WORK(IR)
2524*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2525*                    (RWorkspace: 0)
2526*
2527                     CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2528     $                            WORK( ITAUP ), WORK( IWORK ),
2529     $                            LWORK-IWORK+1, IERR )
2530                     IRWORK = IE + M
2531*
2532*                    Perform bidiagonal QR iteration, computing right
2533*                    singular vectors of L in WORK(IR)
2534*                    (CWorkspace: need M*M)
2535*                    (RWorkspace: need BDSPAC)
2536*
2537                     CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2538     $                            WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2539     $                            RWORK( IRWORK ), INFO )
2540*
2541*                    Multiply right singular vectors of L in WORK(IR) by
2542*                    Q in A, storing result in VT
2543*                    (CWorkspace: need M*M)
2544*                    (RWorkspace: 0)
2545*
2546                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
2547     $                           LDWRKR, A, LDA, CZERO, VT, LDVT )
2548*
2549                  ELSE
2550*
2551*                    Insufficient workspace for a fast algorithm
2552*
2553                     ITAU = 1
2554                     IWORK = ITAU + M
2555*
2556*                    Compute A=L*Q
2557*                    (CWorkspace: need 2*M, prefer M+M*NB)
2558*                    (RWorkspace: 0)
2559*
2560                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2561     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2562*
2563*                    Copy result to VT
2564*
2565                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2566*
2567*                    Generate Q in VT
2568*                    (CWorkspace: need 2*M, prefer M+M*NB)
2569*                    (RWorkspace: 0)
2570*
2571                     CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2572     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2573                     IE = 1
2574                     ITAUQ = ITAU
2575                     ITAUP = ITAUQ + M
2576                     IWORK = ITAUP + M
2577*
2578*                    Zero out above L in A
2579*
2580                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2581     $                            A( 1, 2 ), LDA )
2582*
2583*                    Bidiagonalize L in A
2584*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2585*                    (RWorkspace: need M)
2586*
2587                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
2588     $                            WORK( ITAUQ ), WORK( ITAUP ),
2589     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2590*
2591*                    Multiply right vectors bidiagonalizing L by Q in VT
2592*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2593*                    (RWorkspace: 0)
2594*
2595                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2596     $                            WORK( ITAUP ), VT, LDVT,
2597     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2598                     IRWORK = IE + M
2599*
2600*                    Perform bidiagonal QR iteration, computing right
2601*                    singular vectors of A in VT
2602*                    (CWorkspace: 0)
2603*                    (RWorkspace: need BDSPAC)
2604*
2605                     CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
2606     $                            LDVT, CDUM, 1, CDUM, 1,
2607     $                            RWORK( IRWORK ), INFO )
2608*
2609                  END IF
2610*
2611               ELSE IF( WNTUO ) THEN
2612*
2613*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2614*                 M right singular vectors to be computed in VT and
2615*                 M left singular vectors to be overwritten on A
2616*
2617                  IF( LWORK.GE.2*M*M+3*M ) THEN
2618*
2619*                    Sufficient workspace for a fast algorithm
2620*
2621                     IU = 1
2622                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2623*
2624*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2625*
2626                        LDWRKU = LDA
2627                        IR = IU + LDWRKU*M
2628                        LDWRKR = LDA
2629                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2630*
2631*                       WORK(IU) is LDA by M and WORK(IR) is M by M
2632*
2633                        LDWRKU = LDA
2634                        IR = IU + LDWRKU*M
2635                        LDWRKR = M
2636                     ELSE
2637*
2638*                       WORK(IU) is M by M and WORK(IR) is M by M
2639*
2640                        LDWRKU = M
2641                        IR = IU + LDWRKU*M
2642                        LDWRKR = M
2643                     END IF
2644                     ITAU = IR + LDWRKR*M
2645                     IWORK = ITAU + M
2646*
2647*                    Compute A=L*Q
2648*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2649*                    (RWorkspace: 0)
2650*
2651                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2652     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2653*
2654*                    Copy L to WORK(IU), zeroing out below it
2655*
2656                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
2657     $                            LDWRKU )
2658                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2659     $                            WORK( IU+LDWRKU ), LDWRKU )
2660*
2661*                    Generate Q in A
2662*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2663*                    (RWorkspace: 0)
2664*
2665                     CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2666     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2667                     IE = 1
2668                     ITAUQ = ITAU
2669                     ITAUP = ITAUQ + M
2670                     IWORK = ITAUP + M
2671*
2672*                    Bidiagonalize L in WORK(IU), copying result to
2673*                    WORK(IR)
2674*                    (CWorkspace: need   2*M*M+3*M,
2675*                                 prefer 2*M*M+2*M+2*M*NB)
2676*                    (RWorkspace: need   M)
2677*
2678                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
2679     $                            RWORK( IE ), WORK( ITAUQ ),
2680     $                            WORK( ITAUP ), WORK( IWORK ),
2681     $                            LWORK-IWORK+1, IERR )
2682                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2683     $                            WORK( IR ), LDWRKR )
2684*
2685*                    Generate right bidiagonalizing vectors in WORK(IU)
2686*                    (CWorkspace: need   2*M*M+3*M-1,
2687*                                 prefer 2*M*M+2*M+(M-1)*NB)
2688*                    (RWorkspace: 0)
2689*
2690                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2691     $                            WORK( ITAUP ), WORK( IWORK ),
2692     $                            LWORK-IWORK+1, IERR )
2693*
2694*                    Generate left bidiagonalizing vectors in WORK(IR)
2695*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2696*                    (RWorkspace: 0)
2697*
2698                     CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2699     $                            WORK( ITAUQ ), WORK( IWORK ),
2700     $                            LWORK-IWORK+1, IERR )
2701                     IRWORK = IE + M
2702*
2703*                    Perform bidiagonal QR iteration, computing left
2704*                    singular vectors of L in WORK(IR) and computing
2705*                    right singular vectors of L in WORK(IU)
2706*                    (CWorkspace: need 2*M*M)
2707*                    (RWorkspace: need BDSPAC)
2708*
2709                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2710     $                            WORK( IU ), LDWRKU, WORK( IR ),
2711     $                            LDWRKR, CDUM, 1, RWORK( IRWORK ),
2712     $                            INFO )
2713*
2714*                    Multiply right singular vectors of L in WORK(IU) by
2715*                    Q in A, storing result in VT
2716*                    (CWorkspace: need M*M)
2717*                    (RWorkspace: 0)
2718*
2719                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2720     $                           LDWRKU, A, LDA, CZERO, VT, LDVT )
2721*
2722*                    Copy left singular vectors of L to A
2723*                    (CWorkspace: need M*M)
2724*                    (RWorkspace: 0)
2725*
2726                     CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2727     $                            LDA )
2728*
2729                  ELSE
2730*
2731*                    Insufficient workspace for a fast algorithm
2732*
2733                     ITAU = 1
2734                     IWORK = ITAU + M
2735*
2736*                    Compute A=L*Q, copying result to VT
2737*                    (CWorkspace: need 2*M, prefer M+M*NB)
2738*                    (RWorkspace: 0)
2739*
2740                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2741     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2742                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2743*
2744*                    Generate Q in VT
2745*                    (CWorkspace: need 2*M, prefer M+M*NB)
2746*                    (RWorkspace: 0)
2747*
2748                     CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2749     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2750                     IE = 1
2751                     ITAUQ = ITAU
2752                     ITAUP = ITAUQ + M
2753                     IWORK = ITAUP + M
2754*
2755*                    Zero out above L in A
2756*
2757                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2758     $                            A( 1, 2 ), LDA )
2759*
2760*                    Bidiagonalize L in A
2761*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2762*                    (RWorkspace: need M)
2763*
2764                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
2765     $                            WORK( ITAUQ ), WORK( ITAUP ),
2766     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2767*
2768*                    Multiply right vectors bidiagonalizing L by Q in VT
2769*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2770*                    (RWorkspace: 0)
2771*
2772                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2773     $                            WORK( ITAUP ), VT, LDVT,
2774     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2775*
2776*                    Generate left bidiagonalizing vectors of L in A
2777*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
2778*                    (RWorkspace: 0)
2779*
2780                     CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2781     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2782                     IRWORK = IE + M
2783*
2784*                    Perform bidiagonal QR iteration, computing left
2785*                    singular vectors of A in A and computing right
2786*                    singular vectors of A in VT
2787*                    (CWorkspace: 0)
2788*                    (RWorkspace: need BDSPAC)
2789*
2790                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2791     $                            LDVT, A, LDA, CDUM, 1,
2792     $                            RWORK( IRWORK ), INFO )
2793*
2794                  END IF
2795*
2796               ELSE IF( WNTUAS ) THEN
2797*
2798*                 Path 6t(N much larger than M, JOBU='S' or 'A',
2799*                         JOBVT='S')
2800*                 M right singular vectors to be computed in VT and
2801*                 M left singular vectors to be computed in U
2802*
2803                  IF( LWORK.GE.M*M+3*M ) THEN
2804*
2805*                    Sufficient workspace for a fast algorithm
2806*
2807                     IU = 1
2808                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2809*
2810*                       WORK(IU) is LDA by N
2811*
2812                        LDWRKU = LDA
2813                     ELSE
2814*
2815*                       WORK(IU) is LDA by M
2816*
2817                        LDWRKU = M
2818                     END IF
2819                     ITAU = IU + LDWRKU*M
2820                     IWORK = ITAU + M
2821*
2822*                    Compute A=L*Q
2823*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2824*                    (RWorkspace: 0)
2825*
2826                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2827     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2828*
2829*                    Copy L to WORK(IU), zeroing out above it
2830*
2831                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
2832     $                            LDWRKU )
2833                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2834     $                            WORK( IU+LDWRKU ), LDWRKU )
2835*
2836*                    Generate Q in A
2837*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2838*                    (RWorkspace: 0)
2839*
2840                     CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2841     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2842                     IE = 1
2843                     ITAUQ = ITAU
2844                     ITAUP = ITAUQ + M
2845                     IWORK = ITAUP + M
2846*
2847*                    Bidiagonalize L in WORK(IU), copying result to U
2848*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2849*                    (RWorkspace: need M)
2850*
2851                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
2852     $                            RWORK( IE ), WORK( ITAUQ ),
2853     $                            WORK( ITAUP ), WORK( IWORK ),
2854     $                            LWORK-IWORK+1, IERR )
2855                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2856     $                            LDU )
2857*
2858*                    Generate right bidiagonalizing vectors in WORK(IU)
2859*                    (CWorkspace: need   M*M+3*M-1,
2860*                                 prefer M*M+2*M+(M-1)*NB)
2861*                    (RWorkspace: 0)
2862*
2863                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2864     $                            WORK( ITAUP ), WORK( IWORK ),
2865     $                            LWORK-IWORK+1, IERR )
2866*
2867*                    Generate left bidiagonalizing vectors in U
2868*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2869*                    (RWorkspace: 0)
2870*
2871                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2872     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2873                     IRWORK = IE + M
2874*
2875*                    Perform bidiagonal QR iteration, computing left
2876*                    singular vectors of L in U and computing right
2877*                    singular vectors of L in WORK(IU)
2878*                    (CWorkspace: need M*M)
2879*                    (RWorkspace: need BDSPAC)
2880*
2881                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2882     $                            WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
2883     $                            RWORK( IRWORK ), INFO )
2884*
2885*                    Multiply right singular vectors of L in WORK(IU) by
2886*                    Q in A, storing result in VT
2887*                    (CWorkspace: need M*M)
2888*                    (RWorkspace: 0)
2889*
2890                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2891     $                           LDWRKU, A, LDA, CZERO, VT, LDVT )
2892*
2893                  ELSE
2894*
2895*                    Insufficient workspace for a fast algorithm
2896*
2897                     ITAU = 1
2898                     IWORK = ITAU + M
2899*
2900*                    Compute A=L*Q, copying result to VT
2901*                    (CWorkspace: need 2*M, prefer M+M*NB)
2902*                    (RWorkspace: 0)
2903*
2904                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2905     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2906                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2907*
2908*                    Generate Q in VT
2909*                    (CWorkspace: need 2*M, prefer M+M*NB)
2910*                    (RWorkspace: 0)
2911*
2912                     CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2913     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2914*
2915*                    Copy L to U, zeroing out above it
2916*
2917                     CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2918                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2919     $                            U( 1, 2 ), LDU )
2920                     IE = 1
2921                     ITAUQ = ITAU
2922                     ITAUP = ITAUQ + M
2923                     IWORK = ITAUP + M
2924*
2925*                    Bidiagonalize L in U
2926*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2927*                    (RWorkspace: need M)
2928*
2929                     CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2930     $                            WORK( ITAUQ ), WORK( ITAUP ),
2931     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2932*
2933*                    Multiply right bidiagonalizing vectors in U by Q
2934*                    in VT
2935*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2936*                    (RWorkspace: 0)
2937*
2938                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2939     $                            WORK( ITAUP ), VT, LDVT,
2940     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2941*
2942*                    Generate left bidiagonalizing vectors in U
2943*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
2944*                    (RWorkspace: 0)
2945*
2946                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2947     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2948                     IRWORK = IE + M
2949*
2950*                    Perform bidiagonal QR iteration, computing left
2951*                    singular vectors of A in U and computing right
2952*                    singular vectors of A in VT
2953*                    (CWorkspace: 0)
2954*                    (RWorkspace: need BDSPAC)
2955*
2956                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2957     $                            LDVT, U, LDU, CDUM, 1,
2958     $                            RWORK( IRWORK ), INFO )
2959*
2960                  END IF
2961*
2962               END IF
2963*
2964            ELSE IF( WNTVA ) THEN
2965*
2966               IF( WNTUN ) THEN
2967*
2968*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2969*                 N right singular vectors to be computed in VT and
2970*                 no left singular vectors to be computed
2971*
2972                  IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
2973*
2974*                    Sufficient workspace for a fast algorithm
2975*
2976                     IR = 1
2977                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
2978*
2979*                       WORK(IR) is LDA by M
2980*
2981                        LDWRKR = LDA
2982                     ELSE
2983*
2984*                       WORK(IR) is M by M
2985*
2986                        LDWRKR = M
2987                     END IF
2988                     ITAU = IR + LDWRKR*M
2989                     IWORK = ITAU + M
2990*
2991*                    Compute A=L*Q, copying result to VT
2992*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2993*                    (RWorkspace: 0)
2994*
2995                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2996     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2997                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2998*
2999*                    Copy L to WORK(IR), zeroing out above it
3000*
3001                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
3002     $                            LDWRKR )
3003                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3004     $                            WORK( IR+LDWRKR ), LDWRKR )
3005*
3006*                    Generate Q in VT
3007*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3008*                    (RWorkspace: 0)
3009*
3010                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3011     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3012                     IE = 1
3013                     ITAUQ = ITAU
3014                     ITAUP = ITAUQ + M
3015                     IWORK = ITAUP + M
3016*
3017*                    Bidiagonalize L in WORK(IR)
3018*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3019*                    (RWorkspace: need M)
3020*
3021                     CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
3022     $                            RWORK( IE ), WORK( ITAUQ ),
3023     $                            WORK( ITAUP ), WORK( IWORK ),
3024     $                            LWORK-IWORK+1, IERR )
3025*
3026*                    Generate right bidiagonalizing vectors in WORK(IR)
3027*                    (CWorkspace: need   M*M+3*M-1,
3028*                                 prefer M*M+2*M+(M-1)*NB)
3029*                    (RWorkspace: 0)
3030*
3031                     CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
3032     $                            WORK( ITAUP ), WORK( IWORK ),
3033     $                            LWORK-IWORK+1, IERR )
3034                     IRWORK = IE + M
3035*
3036*                    Perform bidiagonal QR iteration, computing right
3037*                    singular vectors of L in WORK(IR)
3038*                    (CWorkspace: need M*M)
3039*                    (RWorkspace: need BDSPAC)
3040*
3041                     CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
3042     $                            WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
3043     $                            RWORK( IRWORK ), INFO )
3044*
3045*                    Multiply right singular vectors of L in WORK(IR) by
3046*                    Q in VT, storing result in A
3047*                    (CWorkspace: need M*M)
3048*                    (RWorkspace: 0)
3049*
3050                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
3051     $                           LDWRKR, VT, LDVT, CZERO, A, LDA )
3052*
3053*                    Copy right singular vectors of A from A to VT
3054*
3055                     CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3056*
3057                  ELSE
3058*
3059*                    Insufficient workspace for a fast algorithm
3060*
3061                     ITAU = 1
3062                     IWORK = ITAU + M
3063*
3064*                    Compute A=L*Q, copying result to VT
3065*                    (CWorkspace: need 2*M, prefer M+M*NB)
3066*                    (RWorkspace: 0)
3067*
3068                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3069     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3070                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3071*
3072*                    Generate Q in VT
3073*                    (CWorkspace: need M+N, prefer M+N*NB)
3074*                    (RWorkspace: 0)
3075*
3076                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3077     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3078                     IE = 1
3079                     ITAUQ = ITAU
3080                     ITAUP = ITAUQ + M
3081                     IWORK = ITAUP + M
3082*
3083*                    Zero out above L in A
3084*
3085                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3086     $                            A( 1, 2 ), LDA )
3087*
3088*                    Bidiagonalize L in A
3089*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3090*                    (RWorkspace: need M)
3091*
3092                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
3093     $                            WORK( ITAUQ ), WORK( ITAUP ),
3094     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3095*
3096*                    Multiply right bidiagonalizing vectors in A by Q
3097*                    in VT
3098*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3099*                    (RWorkspace: 0)
3100*
3101                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3102     $                            WORK( ITAUP ), VT, LDVT,
3103     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3104                     IRWORK = IE + M
3105*
3106*                    Perform bidiagonal QR iteration, computing right
3107*                    singular vectors of A in VT
3108*                    (CWorkspace: 0)
3109*                    (RWorkspace: need BDSPAC)
3110*
3111                     CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
3112     $                            LDVT, CDUM, 1, CDUM, 1,
3113     $                            RWORK( IRWORK ), INFO )
3114*
3115                  END IF
3116*
3117               ELSE IF( WNTUO ) THEN
3118*
3119*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3120*                 N right singular vectors to be computed in VT and
3121*                 M left singular vectors to be overwritten on A
3122*
3123                  IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
3124*
3125*                    Sufficient workspace for a fast algorithm
3126*
3127                     IU = 1
3128                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3129*
3130*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
3131*
3132                        LDWRKU = LDA
3133                        IR = IU + LDWRKU*M
3134                        LDWRKR = LDA
3135                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3136*
3137*                       WORK(IU) is LDA by M and WORK(IR) is M by M
3138*
3139                        LDWRKU = LDA
3140                        IR = IU + LDWRKU*M
3141                        LDWRKR = M
3142                     ELSE
3143*
3144*                       WORK(IU) is M by M and WORK(IR) is M by M
3145*
3146                        LDWRKU = M
3147                        IR = IU + LDWRKU*M
3148                        LDWRKR = M
3149                     END IF
3150                     ITAU = IR + LDWRKR*M
3151                     IWORK = ITAU + M
3152*
3153*                    Compute A=L*Q, copying result to VT
3154*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3155*                    (RWorkspace: 0)
3156*
3157                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3158     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3159                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3160*
3161*                    Generate Q in VT
3162*                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3163*                    (RWorkspace: 0)
3164*
3165                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3166     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3167*
3168*                    Copy L to WORK(IU), zeroing out above it
3169*
3170                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
3171     $                            LDWRKU )
3172                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3173     $                            WORK( IU+LDWRKU ), LDWRKU )
3174                     IE = 1
3175                     ITAUQ = ITAU
3176                     ITAUP = ITAUQ + M
3177                     IWORK = ITAUP + M
3178*
3179*                    Bidiagonalize L in WORK(IU), copying result to
3180*                    WORK(IR)
3181*                    (CWorkspace: need   2*M*M+3*M,
3182*                                 prefer 2*M*M+2*M+2*M*NB)
3183*                    (RWorkspace: need   M)
3184*
3185                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
3186     $                            RWORK( IE ), WORK( ITAUQ ),
3187     $                            WORK( ITAUP ), WORK( IWORK ),
3188     $                            LWORK-IWORK+1, IERR )
3189                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3190     $                            WORK( IR ), LDWRKR )
3191*
3192*                    Generate right bidiagonalizing vectors in WORK(IU)
3193*                    (CWorkspace: need   2*M*M+3*M-1,
3194*                                 prefer 2*M*M+2*M+(M-1)*NB)
3195*                    (RWorkspace: 0)
3196*
3197                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3198     $                            WORK( ITAUP ), WORK( IWORK ),
3199     $                            LWORK-IWORK+1, IERR )
3200*
3201*                    Generate left bidiagonalizing vectors in WORK(IR)
3202*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3203*                    (RWorkspace: 0)
3204*
3205                     CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3206     $                            WORK( ITAUQ ), WORK( IWORK ),
3207     $                            LWORK-IWORK+1, IERR )
3208                     IRWORK = IE + M
3209*
3210*                    Perform bidiagonal QR iteration, computing left
3211*                    singular vectors of L in WORK(IR) and computing
3212*                    right singular vectors of L in WORK(IU)
3213*                    (CWorkspace: need 2*M*M)
3214*                    (RWorkspace: need BDSPAC)
3215*
3216                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3217     $                            WORK( IU ), LDWRKU, WORK( IR ),
3218     $                            LDWRKR, CDUM, 1, RWORK( IRWORK ),
3219     $                            INFO )
3220*
3221*                    Multiply right singular vectors of L in WORK(IU) by
3222*                    Q in VT, storing result in A
3223*                    (CWorkspace: need M*M)
3224*                    (RWorkspace: 0)
3225*
3226                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3227     $                           LDWRKU, VT, LDVT, CZERO, A, LDA )
3228*
3229*                    Copy right singular vectors of A from A to VT
3230*
3231                     CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3232*
3233*                    Copy left singular vectors of A from WORK(IR) to A
3234*
3235                     CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3236     $                            LDA )
3237*
3238                  ELSE
3239*
3240*                    Insufficient workspace for a fast algorithm
3241*
3242                     ITAU = 1
3243                     IWORK = ITAU + M
3244*
3245*                    Compute A=L*Q, copying result to VT
3246*                    (CWorkspace: need 2*M, prefer M+M*NB)
3247*                    (RWorkspace: 0)
3248*
3249                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3250     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3251                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3252*
3253*                    Generate Q in VT
3254*                    (CWorkspace: need M+N, prefer M+N*NB)
3255*                    (RWorkspace: 0)
3256*
3257                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3258     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3259                     IE = 1
3260                     ITAUQ = ITAU
3261                     ITAUP = ITAUQ + M
3262                     IWORK = ITAUP + M
3263*
3264*                    Zero out above L in A
3265*
3266                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3267     $                            A( 1, 2 ), LDA )
3268*
3269*                    Bidiagonalize L in A
3270*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3271*                    (RWorkspace: need M)
3272*
3273                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
3274     $                            WORK( ITAUQ ), WORK( ITAUP ),
3275     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3276*
3277*                    Multiply right bidiagonalizing vectors in A by Q
3278*                    in VT
3279*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3280*                    (RWorkspace: 0)
3281*
3282                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3283     $                            WORK( ITAUP ), VT, LDVT,
3284     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3285*
3286*                    Generate left bidiagonalizing vectors in A
3287*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
3288*                    (RWorkspace: 0)
3289*
3290                     CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3291     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3292                     IRWORK = IE + M
3293*
3294*                    Perform bidiagonal QR iteration, computing left
3295*                    singular vectors of A in A and computing right
3296*                    singular vectors of A in VT
3297*                    (CWorkspace: 0)
3298*                    (RWorkspace: need BDSPAC)
3299*
3300                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3301     $                            LDVT, A, LDA, CDUM, 1,
3302     $                            RWORK( IRWORK ), INFO )
3303*
3304                  END IF
3305*
3306               ELSE IF( WNTUAS ) THEN
3307*
3308*                 Path 9t(N much larger than M, JOBU='S' or 'A',
3309*                         JOBVT='A')
3310*                 N right singular vectors to be computed in VT and
3311*                 M left singular vectors to be computed in U
3312*
3313                  IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3314*
3315*                    Sufficient workspace for a fast algorithm
3316*
3317                     IU = 1
3318                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
3319*
3320*                       WORK(IU) is LDA by M
3321*
3322                        LDWRKU = LDA
3323                     ELSE
3324*
3325*                       WORK(IU) is M by M
3326*
3327                        LDWRKU = M
3328                     END IF
3329                     ITAU = IU + LDWRKU*M
3330                     IWORK = ITAU + M
3331*
3332*                    Compute A=L*Q, copying result to VT
3333*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3334*                    (RWorkspace: 0)
3335*
3336                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3337     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3338                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3339*
3340*                    Generate Q in VT
3341*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3342*                    (RWorkspace: 0)
3343*
3344                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3345     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3346*
3347*                    Copy L to WORK(IU), zeroing out above it
3348*
3349                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
3350     $                            LDWRKU )
3351                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3352     $                            WORK( IU+LDWRKU ), LDWRKU )
3353                     IE = 1
3354                     ITAUQ = ITAU
3355                     ITAUP = ITAUQ + M
3356                     IWORK = ITAUP + M
3357*
3358*                    Bidiagonalize L in WORK(IU), copying result to U
3359*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3360*                    (RWorkspace: need M)
3361*
3362                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
3363     $                            RWORK( IE ), WORK( ITAUQ ),
3364     $                            WORK( ITAUP ), WORK( IWORK ),
3365     $                            LWORK-IWORK+1, IERR )
3366                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3367     $                            LDU )
3368*
3369*                    Generate right bidiagonalizing vectors in WORK(IU)
3370*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3371*                    (RWorkspace: 0)
3372*
3373                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3374     $                            WORK( ITAUP ), WORK( IWORK ),
3375     $                            LWORK-IWORK+1, IERR )
3376*
3377*                    Generate left bidiagonalizing vectors in U
3378*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3379*                    (RWorkspace: 0)
3380*
3381                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3382     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3383                     IRWORK = IE + M
3384*
3385*                    Perform bidiagonal QR iteration, computing left
3386*                    singular vectors of L in U and computing right
3387*                    singular vectors of L in WORK(IU)
3388*                    (CWorkspace: need M*M)
3389*                    (RWorkspace: need BDSPAC)
3390*
3391                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3392     $                            WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
3393     $                            RWORK( IRWORK ), INFO )
3394*
3395*                    Multiply right singular vectors of L in WORK(IU) by
3396*                    Q in VT, storing result in A
3397*                    (CWorkspace: need M*M)
3398*                    (RWorkspace: 0)
3399*
3400                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3401     $                           LDWRKU, VT, LDVT, CZERO, A, LDA )
3402*
3403*                    Copy right singular vectors of A from A to VT
3404*
3405                     CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3406*
3407                  ELSE
3408*
3409*                    Insufficient workspace for a fast algorithm
3410*
3411                     ITAU = 1
3412                     IWORK = ITAU + M
3413*
3414*                    Compute A=L*Q, copying result to VT
3415*                    (CWorkspace: need 2*M, prefer M+M*NB)
3416*                    (RWorkspace: 0)
3417*
3418                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3419     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3420                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3421*
3422*                    Generate Q in VT
3423*                    (CWorkspace: need M+N, prefer M+N*NB)
3424*                    (RWorkspace: 0)
3425*
3426                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3427     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3428*
3429*                    Copy L to U, zeroing out above it
3430*
3431                     CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3432                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3433     $                            U( 1, 2 ), LDU )
3434                     IE = 1
3435                     ITAUQ = ITAU
3436                     ITAUP = ITAUQ + M
3437                     IWORK = ITAUP + M
3438*
3439*                    Bidiagonalize L in U
3440*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3441*                    (RWorkspace: need M)
3442*
3443                     CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
3444     $                            WORK( ITAUQ ), WORK( ITAUP ),
3445     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3446*
3447*                    Multiply right bidiagonalizing vectors in U by Q
3448*                    in VT
3449*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3450*                    (RWorkspace: 0)
3451*
3452                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3453     $                            WORK( ITAUP ), VT, LDVT,
3454     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3455*
3456*                    Generate left bidiagonalizing vectors in U
3457*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
3458*                    (RWorkspace: 0)
3459*
3460                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3461     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3462                     IRWORK = IE + M
3463*
3464*                    Perform bidiagonal QR iteration, computing left
3465*                    singular vectors of A in U and computing right
3466*                    singular vectors of A in VT
3467*                    (CWorkspace: 0)
3468*                    (RWorkspace: need BDSPAC)
3469*
3470                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3471     $                            LDVT, U, LDU, CDUM, 1,
3472     $                            RWORK( IRWORK ), INFO )
3473*
3474                  END IF
3475*
3476               END IF
3477*
3478            END IF
3479*
3480         ELSE
3481*
3482*           N .LT. MNTHR
3483*
3484*           Path 10t(N greater than M, but not much larger)
3485*           Reduce to bidiagonal form without LQ decomposition
3486*
3487            IE = 1
3488            ITAUQ = 1
3489            ITAUP = ITAUQ + M
3490            IWORK = ITAUP + M
3491*
3492*           Bidiagonalize A
3493*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3494*           (RWorkspace: M)
3495*
3496            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
3497     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3498     $                   IERR )
3499            IF( WNTUAS ) THEN
3500*
3501*              If left singular vectors desired in U, copy result to U
3502*              and generate left bidiagonalizing vectors in U
3503*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3504*              (RWorkspace: 0)
3505*
3506               CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3507               CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3508     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3509            END IF
3510            IF( WNTVAS ) THEN
3511*
3512*              If right singular vectors desired in VT, copy result to
3513*              VT and generate right bidiagonalizing vectors in VT
3514*              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3515*              (RWorkspace: 0)
3516*
3517               CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3518               IF( WNTVA )
3519     $            NRVT = N
3520               IF( WNTVS )
3521     $            NRVT = M
3522               CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3523     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3524            END IF
3525            IF( WNTUO ) THEN
3526*
3527*              If left singular vectors desired in A, generate left
3528*              bidiagonalizing vectors in A
3529*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3530*              (RWorkspace: 0)
3531*
3532               CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3533     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3534            END IF
3535            IF( WNTVO ) THEN
3536*
3537*              If right singular vectors desired in A, generate right
3538*              bidiagonalizing vectors in A
3539*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
3540*              (RWorkspace: 0)
3541*
3542               CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3543     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3544            END IF
3545            IRWORK = IE + M
3546            IF( WNTUAS .OR. WNTUO )
3547     $         NRU = M
3548            IF( WNTUN )
3549     $         NRU = 0
3550            IF( WNTVAS .OR. WNTVO )
3551     $         NCVT = N
3552            IF( WNTVN )
3553     $         NCVT = 0
3554            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3555*
3556*              Perform bidiagonal QR iteration, if desired, computing
3557*              left singular vectors in U and computing right singular
3558*              vectors in VT
3559*              (CWorkspace: 0)
3560*              (RWorkspace: need BDSPAC)
3561*
3562               CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3563     $                      LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
3564     $                      INFO )
3565            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3566*
3567*              Perform bidiagonal QR iteration, if desired, computing
3568*              left singular vectors in U and computing right singular
3569*              vectors in A
3570*              (CWorkspace: 0)
3571*              (RWorkspace: need BDSPAC)
3572*
3573               CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
3574     $                      LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
3575     $                      INFO )
3576            ELSE
3577*
3578*              Perform bidiagonal QR iteration, if desired, computing
3579*              left singular vectors in A and computing right singular
3580*              vectors in VT
3581*              (CWorkspace: 0)
3582*              (RWorkspace: need BDSPAC)
3583*
3584               CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3585     $                      LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
3586     $                      INFO )
3587            END IF
3588*
3589         END IF
3590*
3591      END IF
3592*
3593*     Undo scaling if necessary
3594*
3595      IF( ISCL.EQ.1 ) THEN
3596         IF( ANRM.GT.BIGNUM )
3597     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3598     $                   IERR )
3599         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3600     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
3601     $                   RWORK( IE ), MINMN, IERR )
3602         IF( ANRM.LT.SMLNUM )
3603     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3604     $                   IERR )
3605         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3606     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
3607     $                   RWORK( IE ), MINMN, IERR )
3608      END IF
3609*
3610*     Return optimal workspace in WORK(1)
3611*
3612      WORK( 1 ) = MAXWRK
3613*
3614      RETURN
3615*
3616*     End of CGESVD
3617*
3618      END
3619