1*> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b> 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download DGESVD + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, 22* WORK, LWORK, INFO ) 23* 24* .. Scalar Arguments .. 25* CHARACTER JOBU, JOBVT 26* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N 27* .. 28* .. Array Arguments .. 29* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), 30* $ VT( LDVT, * ), WORK( * ) 31* .. 32* 33* 34*> \par Purpose: 35* ============= 36*> 37*> \verbatim 38*> 39*> DGESVD computes the singular value decomposition (SVD) of a real 40*> M-by-N matrix A, optionally computing the left and/or right singular 41*> vectors. The SVD is written 42*> 43*> A = U * SIGMA * transpose(V) 44*> 45*> where SIGMA is an M-by-N matrix which is zero except for its 46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and 47*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA 48*> are the singular values of A; they are real and non-negative, and 49*> are returned in descending order. The first min(m,n) columns of 50*> U and V are the left and right singular vectors of A. 51*> 52*> Note that the routine returns V**T, not V. 53*> \endverbatim 54* 55* Arguments: 56* ========== 57* 58*> \param[in] JOBU 59*> \verbatim 60*> JOBU is CHARACTER*1 61*> Specifies options for computing all or part of the matrix U: 62*> = 'A': all M columns of U are returned in array U: 63*> = 'S': the first min(m,n) columns of U (the left singular 64*> vectors) are returned in the array U; 65*> = 'O': the first min(m,n) columns of U (the left singular 66*> vectors) are overwritten on the array A; 67*> = 'N': no columns of U (no left singular vectors) are 68*> computed. 69*> \endverbatim 70*> 71*> \param[in] JOBVT 72*> \verbatim 73*> JOBVT is CHARACTER*1 74*> Specifies options for computing all or part of the matrix 75*> V**T: 76*> = 'A': all N rows of V**T are returned in the array VT; 77*> = 'S': the first min(m,n) rows of V**T (the right singular 78*> vectors) are returned in the array VT; 79*> = 'O': the first min(m,n) rows of V**T (the right singular 80*> vectors) are overwritten on the array A; 81*> = 'N': no rows of V**T (no right singular vectors) are 82*> computed. 83*> 84*> JOBVT and JOBU cannot both be 'O'. 85*> \endverbatim 86*> 87*> \param[in] M 88*> \verbatim 89*> M is INTEGER 90*> The number of rows of the input matrix A. M >= 0. 91*> \endverbatim 92*> 93*> \param[in] N 94*> \verbatim 95*> N is INTEGER 96*> The number of columns of the input matrix A. N >= 0. 97*> \endverbatim 98*> 99*> \param[in,out] A 100*> \verbatim 101*> A is DOUBLE PRECISION array, dimension (LDA,N) 102*> On entry, the M-by-N matrix A. 103*> On exit, 104*> if JOBU = 'O', A is overwritten with the first min(m,n) 105*> columns of U (the left singular vectors, 106*> stored columnwise); 107*> if JOBVT = 'O', A is overwritten with the first min(m,n) 108*> rows of V**T (the right singular vectors, 109*> stored rowwise); 110*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A 111*> are destroyed. 112*> \endverbatim 113*> 114*> \param[in] LDA 115*> \verbatim 116*> LDA is INTEGER 117*> The leading dimension of the array A. LDA >= max(1,M). 118*> \endverbatim 119*> 120*> \param[out] S 121*> \verbatim 122*> S is DOUBLE PRECISION array, dimension (min(M,N)) 123*> The singular values of A, sorted so that S(i) >= S(i+1). 124*> \endverbatim 125*> 126*> \param[out] U 127*> \verbatim 128*> U is DOUBLE PRECISION array, dimension (LDU,UCOL) 129*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. 130*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U; 131*> if JOBU = 'S', U contains the first min(m,n) columns of U 132*> (the left singular vectors, stored columnwise); 133*> if JOBU = 'N' or 'O', U is not referenced. 134*> \endverbatim 135*> 136*> \param[in] LDU 137*> \verbatim 138*> LDU is INTEGER 139*> The leading dimension of the array U. LDU >= 1; if 140*> JOBU = 'S' or 'A', LDU >= M. 141*> \endverbatim 142*> 143*> \param[out] VT 144*> \verbatim 145*> VT is DOUBLE PRECISION array, dimension (LDVT,N) 146*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix 147*> V**T; 148*> if JOBVT = 'S', VT contains the first min(m,n) rows of 149*> V**T (the right singular vectors, stored rowwise); 150*> if JOBVT = 'N' or 'O', VT is not referenced. 151*> \endverbatim 152*> 153*> \param[in] LDVT 154*> \verbatim 155*> LDVT is INTEGER 156*> The leading dimension of the array VT. LDVT >= 1; if 157*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). 158*> \endverbatim 159*> 160*> \param[out] WORK 161*> \verbatim 162*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) 163*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; 164*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged 165*> superdiagonal elements of an upper bidiagonal matrix B 166*> whose diagonal is in S (not necessarily sorted). B 167*> satisfies A = U * B * VT, so it has the same singular values 168*> as A, and singular vectors related by U and VT. 169*> \endverbatim 170*> 171*> \param[in] LWORK 172*> \verbatim 173*> LWORK is INTEGER 174*> The dimension of the array WORK. 175*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): 176*> - PATH 1 (M much larger than N, JOBU='N') 177*> - PATH 1t (N much larger than M, JOBVT='N') 178*> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths 179*> For good performance, LWORK should generally be larger. 180*> 181*> If LWORK = -1, then a workspace query is assumed; the routine 182*> only calculates the optimal size of the WORK array, returns 183*> this value as the first entry of the WORK array, and no error 184*> message related to LWORK is issued by XERBLA. 185*> \endverbatim 186*> 187*> \param[out] INFO 188*> \verbatim 189*> INFO is INTEGER 190*> = 0: successful exit. 191*> < 0: if INFO = -i, the i-th argument had an illegal value. 192*> > 0: if DBDSQR did not converge, INFO specifies how many 193*> superdiagonals of an intermediate bidiagonal form B 194*> did not converge to zero. See the description of WORK 195*> above for details. 196*> \endverbatim 197* 198* Authors: 199* ======== 200* 201*> \author Univ. of Tennessee 202*> \author Univ. of California Berkeley 203*> \author Univ. of Colorado Denver 204*> \author NAG Ltd. 205* 206*> \date April 2012 207* 208*> \ingroup doubleGEsing 209* 210* ===================================================================== 211 SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, 212 $ VT, LDVT, WORK, LWORK, INFO ) 213* 214* -- LAPACK driver routine (version 3.4.1) -- 215* -- LAPACK is a software package provided by Univ. of Tennessee, -- 216* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 217* April 2012 218* 219* .. Scalar Arguments .. 220 CHARACTER JOBU, JOBVT 221 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N 222* .. 223* .. Array Arguments .. 224 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), 225 $ VT( LDVT, * ), WORK( * ) 226* .. 227* 228* ===================================================================== 229* 230* .. Parameters .. 231 DOUBLE PRECISION ZERO, ONE 232 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) 233* .. 234* .. Local Scalars .. 235 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, 236 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS 237 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, 238 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, 239 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, 240 $ NRVT, WRKBL 241 INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M, 242 $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q, 243 $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M 244 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM 245* .. 246* .. Local Arrays .. 247 DOUBLE PRECISION DUM( 1 ) 248* .. 249* .. External Subroutines .. 250 EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY, 251 $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR, 252 $ XERBLA 253* .. 254* .. External Functions .. 255 LOGICAL LSAME 256 INTEGER ILAENV 257 DOUBLE PRECISION DLAMCH, DLANGE 258 EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE 259* .. 260* .. Intrinsic Functions .. 261 INTRINSIC MAX, MIN, SQRT 262* .. 263* .. Executable Statements .. 264* 265* Test the input arguments 266* 267 INFO = 0 268 MINMN = MIN( M, N ) 269 WNTUA = LSAME( JOBU, 'A' ) 270 WNTUS = LSAME( JOBU, 'S' ) 271 WNTUAS = WNTUA .OR. WNTUS 272 WNTUO = LSAME( JOBU, 'O' ) 273 WNTUN = LSAME( JOBU, 'N' ) 274 WNTVA = LSAME( JOBVT, 'A' ) 275 WNTVS = LSAME( JOBVT, 'S' ) 276 WNTVAS = WNTVA .OR. WNTVS 277 WNTVO = LSAME( JOBVT, 'O' ) 278 WNTVN = LSAME( JOBVT, 'N' ) 279 LQUERY = ( LWORK.EQ.-1 ) 280* 281 IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN 282 INFO = -1 283 ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. 284 $ ( WNTVO .AND. WNTUO ) ) THEN 285 INFO = -2 286 ELSE IF( M.LT.0 ) THEN 287 INFO = -3 288 ELSE IF( N.LT.0 ) THEN 289 INFO = -4 290 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN 291 INFO = -6 292 ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN 293 INFO = -9 294 ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. 295 $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN 296 INFO = -11 297 END IF 298* 299* Compute workspace 300* (Note: Comments in the code beginning "Workspace:" describe the 301* minimal amount of workspace needed at that point in the code, 302* as well as the preferred amount for good performance. 303* NB refers to the optimal block size for the immediately 304* following subroutine, as returned by ILAENV.) 305* 306 IF( INFO.EQ.0 ) THEN 307 MINWRK = 1 308 MAXWRK = 1 309 IF( M.GE.N .AND. MINMN.GT.0 ) THEN 310* 311* Compute space needed for DBDSQR 312* 313 MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) 314 BDSPAC = 5*N 315* Compute space needed for DGEQRF 316 CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) 317 LWORK_DGEQRF=DUM(1) 318* Compute space needed for DORGQR 319 CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) 320 LWORK_DORGQR_N=DUM(1) 321 CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) 322 LWORK_DORGQR_M=DUM(1) 323* Compute space needed for DGEBRD 324 CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), 325 $ DUM(1), DUM(1), -1, IERR ) 326 LWORK_DGEBRD=DUM(1) 327* Compute space needed for DORGBR P 328 CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1), 329 $ DUM(1), -1, IERR ) 330 LWORK_DORGBR_P=DUM(1) 331* Compute space needed for DORGBR Q 332 CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1), 333 $ DUM(1), -1, IERR ) 334 LWORK_DORGBR_Q=DUM(1) 335* 336 IF( M.GE.MNTHR ) THEN 337 IF( WNTUN ) THEN 338* 339* Path 1 (M much larger than N, JOBU='N') 340* 341 MAXWRK = N + LWORK_DGEQRF 342 MAXWRK = MAX( MAXWRK, 3*N+LWORK_DGEBRD ) 343 IF( WNTVO .OR. WNTVAS ) 344 $ MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P ) 345 MAXWRK = MAX( MAXWRK, BDSPAC ) 346 MINWRK = MAX( 4*N, BDSPAC ) 347 ELSE IF( WNTUO .AND. WNTVN ) THEN 348* 349* Path 2 (M much larger than N, JOBU='O', JOBVT='N') 350* 351 WRKBL = N + LWORK_DGEQRF 352 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) 353 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 354 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 355 WRKBL = MAX( WRKBL, BDSPAC ) 356 MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) 357 MINWRK = MAX( 3*N+M, BDSPAC ) 358 ELSE IF( WNTUO .AND. WNTVAS ) THEN 359* 360* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 361* 'A') 362* 363 WRKBL = N + LWORK_DGEQRF 364 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) 365 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 366 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 367 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) 368 WRKBL = MAX( WRKBL, BDSPAC ) 369 MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) 370 MINWRK = MAX( 3*N+M, BDSPAC ) 371 ELSE IF( WNTUS .AND. WNTVN ) THEN 372* 373* Path 4 (M much larger than N, JOBU='S', JOBVT='N') 374* 375 WRKBL = N + LWORK_DGEQRF 376 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) 377 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 378 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 379 WRKBL = MAX( WRKBL, BDSPAC ) 380 MAXWRK = N*N + WRKBL 381 MINWRK = MAX( 3*N+M, BDSPAC ) 382 ELSE IF( WNTUS .AND. WNTVO ) THEN 383* 384* Path 5 (M much larger than N, JOBU='S', JOBVT='O') 385* 386 WRKBL = N + LWORK_DGEQRF 387 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) 388 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 389 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 390 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) 391 WRKBL = MAX( WRKBL, BDSPAC ) 392 MAXWRK = 2*N*N + WRKBL 393 MINWRK = MAX( 3*N+M, BDSPAC ) 394 ELSE IF( WNTUS .AND. WNTVAS ) THEN 395* 396* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or 397* 'A') 398* 399 WRKBL = N + LWORK_DGEQRF 400 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_N ) 401 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 402 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 403 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) 404 WRKBL = MAX( WRKBL, BDSPAC ) 405 MAXWRK = N*N + WRKBL 406 MINWRK = MAX( 3*N+M, BDSPAC ) 407 ELSE IF( WNTUA .AND. WNTVN ) THEN 408* 409* Path 7 (M much larger than N, JOBU='A', JOBVT='N') 410* 411 WRKBL = N + LWORK_DGEQRF 412 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M ) 413 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 414 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 415 WRKBL = MAX( WRKBL, BDSPAC ) 416 MAXWRK = N*N + WRKBL 417 MINWRK = MAX( 3*N+M, BDSPAC ) 418 ELSE IF( WNTUA .AND. WNTVO ) THEN 419* 420* Path 8 (M much larger than N, JOBU='A', JOBVT='O') 421* 422 WRKBL = N + LWORK_DGEQRF 423 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M ) 424 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 425 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 426 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) 427 WRKBL = MAX( WRKBL, BDSPAC ) 428 MAXWRK = 2*N*N + WRKBL 429 MINWRK = MAX( 3*N+M, BDSPAC ) 430 ELSE IF( WNTUA .AND. WNTVAS ) THEN 431* 432* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or 433* 'A') 434* 435 WRKBL = N + LWORK_DGEQRF 436 WRKBL = MAX( WRKBL, N+LWORK_DORGQR_M ) 437 WRKBL = MAX( WRKBL, 3*N+LWORK_DGEBRD ) 438 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_Q ) 439 WRKBL = MAX( WRKBL, 3*N+LWORK_DORGBR_P ) 440 WRKBL = MAX( WRKBL, BDSPAC ) 441 MAXWRK = N*N + WRKBL 442 MINWRK = MAX( 3*N+M, BDSPAC ) 443 END IF 444 ELSE 445* 446* Path 10 (M at least N, but not much larger) 447* 448 CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), 449 $ DUM(1), DUM(1), -1, IERR ) 450 LWORK_DGEBRD=DUM(1) 451 MAXWRK = 3*N + LWORK_DGEBRD 452 IF( WNTUS .OR. WNTUO ) THEN 453 CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1), 454 $ DUM(1), -1, IERR ) 455 LWORK_DORGBR_Q=DUM(1) 456 MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q ) 457 END IF 458 IF( WNTUA ) THEN 459 CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1), 460 $ DUM(1), -1, IERR ) 461 LWORK_DORGBR_Q=DUM(1) 462 MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_Q ) 463 END IF 464 IF( .NOT.WNTVN ) THEN 465 MAXWRK = MAX( MAXWRK, 3*N+LWORK_DORGBR_P ) 466 END IF 467 MAXWRK = MAX( MAXWRK, BDSPAC ) 468 MINWRK = MAX( 3*N+M, BDSPAC ) 469 END IF 470 ELSE IF( MINMN.GT.0 ) THEN 471* 472* Compute space needed for DBDSQR 473* 474 MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) 475 BDSPAC = 5*M 476* Compute space needed for DGELQF 477 CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) 478 LWORK_DGELQF=DUM(1) 479* Compute space needed for DORGLQ 480 CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) 481 LWORK_DORGLQ_N=DUM(1) 482 CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) 483 LWORK_DORGLQ_M=DUM(1) 484* Compute space needed for DGEBRD 485 CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), 486 $ DUM(1), DUM(1), -1, IERR ) 487 LWORK_DGEBRD=DUM(1) 488* Compute space needed for DORGBR P 489 CALL DORGBR( 'P', M, M, M, A, N, DUM(1), 490 $ DUM(1), -1, IERR ) 491 LWORK_DORGBR_P=DUM(1) 492* Compute space needed for DORGBR Q 493 CALL DORGBR( 'Q', M, M, M, A, N, DUM(1), 494 $ DUM(1), -1, IERR ) 495 LWORK_DORGBR_Q=DUM(1) 496 IF( N.GE.MNTHR ) THEN 497 IF( WNTVN ) THEN 498* 499* Path 1t(N much larger than M, JOBVT='N') 500* 501 MAXWRK = M + LWORK_DGELQF 502 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DGEBRD ) 503 IF( WNTUO .OR. WNTUAS ) 504 $ MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q ) 505 MAXWRK = MAX( MAXWRK, BDSPAC ) 506 MINWRK = MAX( 4*M, BDSPAC ) 507 ELSE IF( WNTVO .AND. WNTUN ) THEN 508* 509* Path 2t(N much larger than M, JOBU='N', JOBVT='O') 510* 511 WRKBL = M + LWORK_DGELQF 512 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) 513 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 514 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 515 WRKBL = MAX( WRKBL, BDSPAC ) 516 MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) 517 MINWRK = MAX( 3*M+N, BDSPAC ) 518 ELSE IF( WNTVO .AND. WNTUAS ) THEN 519* 520* Path 3t(N much larger than M, JOBU='S' or 'A', 521* JOBVT='O') 522* 523 WRKBL = M + LWORK_DGELQF 524 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) 525 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 526 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 527 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) 528 WRKBL = MAX( WRKBL, BDSPAC ) 529 MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) 530 MINWRK = MAX( 3*M+N, BDSPAC ) 531 ELSE IF( WNTVS .AND. WNTUN ) THEN 532* 533* Path 4t(N much larger than M, JOBU='N', JOBVT='S') 534* 535 WRKBL = M + LWORK_DGELQF 536 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) 537 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 538 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 539 WRKBL = MAX( WRKBL, BDSPAC ) 540 MAXWRK = M*M + WRKBL 541 MINWRK = MAX( 3*M+N, BDSPAC ) 542 ELSE IF( WNTVS .AND. WNTUO ) THEN 543* 544* Path 5t(N much larger than M, JOBU='O', JOBVT='S') 545* 546 WRKBL = M + LWORK_DGELQF 547 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) 548 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 549 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 550 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) 551 WRKBL = MAX( WRKBL, BDSPAC ) 552 MAXWRK = 2*M*M + WRKBL 553 MINWRK = MAX( 3*M+N, BDSPAC ) 554 ELSE IF( WNTVS .AND. WNTUAS ) THEN 555* 556* Path 6t(N much larger than M, JOBU='S' or 'A', 557* JOBVT='S') 558* 559 WRKBL = M + LWORK_DGELQF 560 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_M ) 561 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 562 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 563 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) 564 WRKBL = MAX( WRKBL, BDSPAC ) 565 MAXWRK = M*M + WRKBL 566 MINWRK = MAX( 3*M+N, BDSPAC ) 567 ELSE IF( WNTVA .AND. WNTUN ) THEN 568* 569* Path 7t(N much larger than M, JOBU='N', JOBVT='A') 570* 571 WRKBL = M + LWORK_DGELQF 572 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N ) 573 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 574 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 575 WRKBL = MAX( WRKBL, BDSPAC ) 576 MAXWRK = M*M + WRKBL 577 MINWRK = MAX( 3*M+N, BDSPAC ) 578 ELSE IF( WNTVA .AND. WNTUO ) THEN 579* 580* Path 8t(N much larger than M, JOBU='O', JOBVT='A') 581* 582 WRKBL = M + LWORK_DGELQF 583 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N ) 584 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 585 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 586 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) 587 WRKBL = MAX( WRKBL, BDSPAC ) 588 MAXWRK = 2*M*M + WRKBL 589 MINWRK = MAX( 3*M+N, BDSPAC ) 590 ELSE IF( WNTVA .AND. WNTUAS ) THEN 591* 592* Path 9t(N much larger than M, JOBU='S' or 'A', 593* JOBVT='A') 594* 595 WRKBL = M + LWORK_DGELQF 596 WRKBL = MAX( WRKBL, M+LWORK_DORGLQ_N ) 597 WRKBL = MAX( WRKBL, 3*M+LWORK_DGEBRD ) 598 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_P ) 599 WRKBL = MAX( WRKBL, 3*M+LWORK_DORGBR_Q ) 600 WRKBL = MAX( WRKBL, BDSPAC ) 601 MAXWRK = M*M + WRKBL 602 MINWRK = MAX( 3*M+N, BDSPAC ) 603 END IF 604 ELSE 605* 606* Path 10t(N greater than M, but not much larger) 607* 608 CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), 609 $ DUM(1), DUM(1), -1, IERR ) 610 LWORK_DGEBRD=DUM(1) 611 MAXWRK = 3*M + LWORK_DGEBRD 612 IF( WNTVS .OR. WNTVO ) THEN 613* Compute space needed for DORGBR P 614 CALL DORGBR( 'P', M, N, M, A, N, DUM(1), 615 $ DUM(1), -1, IERR ) 616 LWORK_DORGBR_P=DUM(1) 617 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P ) 618 END IF 619 IF( WNTVA ) THEN 620 CALL DORGBR( 'P', N, N, M, A, N, DUM(1), 621 $ DUM(1), -1, IERR ) 622 LWORK_DORGBR_P=DUM(1) 623 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_P ) 624 END IF 625 IF( .NOT.WNTUN ) THEN 626 MAXWRK = MAX( MAXWRK, 3*M+LWORK_DORGBR_Q ) 627 END IF 628 MAXWRK = MAX( MAXWRK, BDSPAC ) 629 MINWRK = MAX( 3*M+N, BDSPAC ) 630 END IF 631 END IF 632 MAXWRK = MAX( MAXWRK, MINWRK ) 633 WORK( 1 ) = MAXWRK 634* 635 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN 636 INFO = -13 637 END IF 638 END IF 639* 640 IF( INFO.NE.0 ) THEN 641 CALL XERBLA( 'DGESVD', -INFO ) 642 RETURN 643 ELSE IF( LQUERY ) THEN 644 RETURN 645 END IF 646* 647* Quick return if possible 648* 649 IF( M.EQ.0 .OR. N.EQ.0 ) THEN 650 RETURN 651 END IF 652* 653* Get machine constants 654* 655 EPS = DLAMCH( 'P' ) 656 SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS 657 BIGNUM = ONE / SMLNUM 658* 659* Scale A if max element outside range [SMLNUM,BIGNUM] 660* 661 ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) 662 ISCL = 0 663 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN 664 ISCL = 1 665 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) 666 ELSE IF( ANRM.GT.BIGNUM ) THEN 667 ISCL = 1 668 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) 669 END IF 670* 671 IF( M.GE.N ) THEN 672* 673* A has at least as many rows as columns. If A has sufficiently 674* more rows than columns, first reduce using the QR 675* decomposition (if sufficient workspace available) 676* 677 IF( M.GE.MNTHR ) THEN 678* 679 IF( WNTUN ) THEN 680* 681* Path 1 (M much larger than N, JOBU='N') 682* No left singular vectors to be computed 683* 684 ITAU = 1 685 IWORK = ITAU + N 686* 687* Compute A=Q*R 688* (Workspace: need 2*N, prefer N+N*NB) 689* 690 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), 691 $ LWORK-IWORK+1, IERR ) 692* 693* Zero out below R 694* 695 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) 696 IE = 1 697 ITAUQ = IE + N 698 ITAUP = ITAUQ + N 699 IWORK = ITAUP + N 700* 701* Bidiagonalize R in A 702* (Workspace: need 4*N, prefer 3*N+2*N*NB) 703* 704 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), 705 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, 706 $ IERR ) 707 NCVT = 0 708 IF( WNTVO .OR. WNTVAS ) THEN 709* 710* If right singular vectors desired, generate P'. 711* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 712* 713 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), 714 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 715 NCVT = N 716 END IF 717 IWORK = IE + N 718* 719* Perform bidiagonal QR iteration, computing right 720* singular vectors of A in A if desired 721* (Workspace: need BDSPAC) 722* 723 CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, 724 $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) 725* 726* If right singular vectors desired in VT, copy them there 727* 728 IF( WNTVAS ) 729 $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT ) 730* 731 ELSE IF( WNTUO .AND. WNTVN ) THEN 732* 733* Path 2 (M much larger than N, JOBU='O', JOBVT='N') 734* N left singular vectors to be overwritten on A and 735* no right singular vectors to be computed 736* 737 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN 738* 739* Sufficient workspace for a fast algorithm 740* 741 IR = 1 742 IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN 743* 744* WORK(IU) is LDA by N, WORK(IR) is LDA by N 745* 746 LDWRKU = LDA 747 LDWRKR = LDA 748 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN 749* 750* WORK(IU) is LDA by N, WORK(IR) is N by N 751* 752 LDWRKU = LDA 753 LDWRKR = N 754 ELSE 755* 756* WORK(IU) is LDWRKU by N, WORK(IR) is N by N 757* 758 LDWRKU = ( LWORK-N*N-N ) / N 759 LDWRKR = N 760 END IF 761 ITAU = IR + LDWRKR*N 762 IWORK = ITAU + N 763* 764* Compute A=Q*R 765* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 766* 767 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 768 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 769* 770* Copy R to WORK(IR) and zero out below it 771* 772 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) 773 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), 774 $ LDWRKR ) 775* 776* Generate Q in A 777* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 778* 779 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), 780 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 781 IE = ITAU 782 ITAUQ = IE + N 783 ITAUP = ITAUQ + N 784 IWORK = ITAUP + N 785* 786* Bidiagonalize R in WORK(IR) 787* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) 788* 789 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), 790 $ WORK( ITAUQ ), WORK( ITAUP ), 791 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 792* 793* Generate left vectors bidiagonalizing R 794* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) 795* 796 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, 797 $ WORK( ITAUQ ), WORK( IWORK ), 798 $ LWORK-IWORK+1, IERR ) 799 IWORK = IE + N 800* 801* Perform bidiagonal QR iteration, computing left 802* singular vectors of R in WORK(IR) 803* (Workspace: need N*N+BDSPAC) 804* 805 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, 806 $ WORK( IR ), LDWRKR, DUM, 1, 807 $ WORK( IWORK ), INFO ) 808 IU = IE + N 809* 810* Multiply Q in A by left singular vectors of R in 811* WORK(IR), storing result in WORK(IU) and copying to A 812* (Workspace: need N*N+2*N, prefer N*N+M*N+N) 813* 814 DO 10 I = 1, M, LDWRKU 815 CHUNK = MIN( M-I+1, LDWRKU ) 816 CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), 817 $ LDA, WORK( IR ), LDWRKR, ZERO, 818 $ WORK( IU ), LDWRKU ) 819 CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, 820 $ A( I, 1 ), LDA ) 821 10 CONTINUE 822* 823 ELSE 824* 825* Insufficient workspace for a fast algorithm 826* 827 IE = 1 828 ITAUQ = IE + N 829 ITAUP = ITAUQ + N 830 IWORK = ITAUP + N 831* 832* Bidiagonalize A 833* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) 834* 835 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), 836 $ WORK( ITAUQ ), WORK( ITAUP ), 837 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 838* 839* Generate left vectors bidiagonalizing A 840* (Workspace: need 4*N, prefer 3*N+N*NB) 841* 842 CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), 843 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 844 IWORK = IE + N 845* 846* Perform bidiagonal QR iteration, computing left 847* singular vectors of A in A 848* (Workspace: need BDSPAC) 849* 850 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, 851 $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) 852* 853 END IF 854* 855 ELSE IF( WNTUO .AND. WNTVAS ) THEN 856* 857* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') 858* N left singular vectors to be overwritten on A and 859* N right singular vectors to be computed in VT 860* 861 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN 862* 863* Sufficient workspace for a fast algorithm 864* 865 IR = 1 866 IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN 867* 868* WORK(IU) is LDA by N and WORK(IR) is LDA by N 869* 870 LDWRKU = LDA 871 LDWRKR = LDA 872 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN 873* 874* WORK(IU) is LDA by N and WORK(IR) is N by N 875* 876 LDWRKU = LDA 877 LDWRKR = N 878 ELSE 879* 880* WORK(IU) is LDWRKU by N and WORK(IR) is N by N 881* 882 LDWRKU = ( LWORK-N*N-N ) / N 883 LDWRKR = N 884 END IF 885 ITAU = IR + LDWRKR*N 886 IWORK = ITAU + N 887* 888* Compute A=Q*R 889* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 890* 891 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 892 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 893* 894* Copy R to VT, zeroing out below it 895* 896 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) 897 IF( N.GT.1 ) 898 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 899 $ VT( 2, 1 ), LDVT ) 900* 901* Generate Q in A 902* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 903* 904 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), 905 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 906 IE = ITAU 907 ITAUQ = IE + N 908 ITAUP = ITAUQ + N 909 IWORK = ITAUP + N 910* 911* Bidiagonalize R in VT, copying result to WORK(IR) 912* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) 913* 914 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), 915 $ WORK( ITAUQ ), WORK( ITAUP ), 916 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 917 CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) 918* 919* Generate left vectors bidiagonalizing R in WORK(IR) 920* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) 921* 922 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, 923 $ WORK( ITAUQ ), WORK( IWORK ), 924 $ LWORK-IWORK+1, IERR ) 925* 926* Generate right vectors bidiagonalizing R in VT 927* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) 928* 929 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 930 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 931 IWORK = IE + N 932* 933* Perform bidiagonal QR iteration, computing left 934* singular vectors of R in WORK(IR) and computing right 935* singular vectors of R in VT 936* (Workspace: need N*N+BDSPAC) 937* 938 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, 939 $ WORK( IR ), LDWRKR, DUM, 1, 940 $ WORK( IWORK ), INFO ) 941 IU = IE + N 942* 943* Multiply Q in A by left singular vectors of R in 944* WORK(IR), storing result in WORK(IU) and copying to A 945* (Workspace: need N*N+2*N, prefer N*N+M*N+N) 946* 947 DO 20 I = 1, M, LDWRKU 948 CHUNK = MIN( M-I+1, LDWRKU ) 949 CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), 950 $ LDA, WORK( IR ), LDWRKR, ZERO, 951 $ WORK( IU ), LDWRKU ) 952 CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, 953 $ A( I, 1 ), LDA ) 954 20 CONTINUE 955* 956 ELSE 957* 958* Insufficient workspace for a fast algorithm 959* 960 ITAU = 1 961 IWORK = ITAU + N 962* 963* Compute A=Q*R 964* (Workspace: need 2*N, prefer N+N*NB) 965* 966 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 967 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 968* 969* Copy R to VT, zeroing out below it 970* 971 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) 972 IF( N.GT.1 ) 973 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 974 $ VT( 2, 1 ), LDVT ) 975* 976* Generate Q in A 977* (Workspace: need 2*N, prefer N+N*NB) 978* 979 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), 980 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 981 IE = ITAU 982 ITAUQ = IE + N 983 ITAUP = ITAUQ + N 984 IWORK = ITAUP + N 985* 986* Bidiagonalize R in VT 987* (Workspace: need 4*N, prefer 3*N+2*N*NB) 988* 989 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), 990 $ WORK( ITAUQ ), WORK( ITAUP ), 991 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 992* 993* Multiply Q in A by left vectors bidiagonalizing R 994* (Workspace: need 3*N+M, prefer 3*N+M*NB) 995* 996 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, 997 $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), 998 $ LWORK-IWORK+1, IERR ) 999* 1000* Generate right vectors bidiagonalizing R in VT 1001* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 1002* 1003 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 1004 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1005 IWORK = IE + N 1006* 1007* Perform bidiagonal QR iteration, computing left 1008* singular vectors of A in A and computing right 1009* singular vectors of A in VT 1010* (Workspace: need BDSPAC) 1011* 1012 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, 1013 $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) 1014* 1015 END IF 1016* 1017 ELSE IF( WNTUS ) THEN 1018* 1019 IF( WNTVN ) THEN 1020* 1021* Path 4 (M much larger than N, JOBU='S', JOBVT='N') 1022* N left singular vectors to be computed in U and 1023* no right singular vectors to be computed 1024* 1025 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN 1026* 1027* Sufficient workspace for a fast algorithm 1028* 1029 IR = 1 1030 IF( LWORK.GE.WRKBL+LDA*N ) THEN 1031* 1032* WORK(IR) is LDA by N 1033* 1034 LDWRKR = LDA 1035 ELSE 1036* 1037* WORK(IR) is N by N 1038* 1039 LDWRKR = N 1040 END IF 1041 ITAU = IR + LDWRKR*N 1042 IWORK = ITAU + N 1043* 1044* Compute A=Q*R 1045* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 1046* 1047 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1048 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1049* 1050* Copy R to WORK(IR), zeroing out below it 1051* 1052 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), 1053 $ LDWRKR ) 1054 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1055 $ WORK( IR+1 ), LDWRKR ) 1056* 1057* Generate Q in A 1058* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 1059* 1060 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), 1061 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1062 IE = ITAU 1063 ITAUQ = IE + N 1064 ITAUP = ITAUQ + N 1065 IWORK = ITAUP + N 1066* 1067* Bidiagonalize R in WORK(IR) 1068* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) 1069* 1070 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, 1071 $ WORK( IE ), WORK( ITAUQ ), 1072 $ WORK( ITAUP ), WORK( IWORK ), 1073 $ LWORK-IWORK+1, IERR ) 1074* 1075* Generate left vectors bidiagonalizing R in WORK(IR) 1076* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) 1077* 1078 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, 1079 $ WORK( ITAUQ ), WORK( IWORK ), 1080 $ LWORK-IWORK+1, IERR ) 1081 IWORK = IE + N 1082* 1083* Perform bidiagonal QR iteration, computing left 1084* singular vectors of R in WORK(IR) 1085* (Workspace: need N*N+BDSPAC) 1086* 1087 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1088 $ 1, WORK( IR ), LDWRKR, DUM, 1, 1089 $ WORK( IWORK ), INFO ) 1090* 1091* Multiply Q in A by left singular vectors of R in 1092* WORK(IR), storing result in U 1093* (Workspace: need N*N) 1094* 1095 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, 1096 $ WORK( IR ), LDWRKR, ZERO, U, LDU ) 1097* 1098 ELSE 1099* 1100* Insufficient workspace for a fast algorithm 1101* 1102 ITAU = 1 1103 IWORK = ITAU + N 1104* 1105* Compute A=Q*R, copying result to U 1106* (Workspace: need 2*N, prefer N+N*NB) 1107* 1108 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1109 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1110 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1111* 1112* Generate Q in U 1113* (Workspace: need 2*N, prefer N+N*NB) 1114* 1115 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), 1116 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1117 IE = ITAU 1118 ITAUQ = IE + N 1119 ITAUP = ITAUQ + N 1120 IWORK = ITAUP + N 1121* 1122* Zero out below R in A 1123* 1124 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), 1125 $ LDA ) 1126* 1127* Bidiagonalize R in A 1128* (Workspace: need 4*N, prefer 3*N+2*N*NB) 1129* 1130 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), 1131 $ WORK( ITAUQ ), WORK( ITAUP ), 1132 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1133* 1134* Multiply Q in U by left vectors bidiagonalizing R 1135* (Workspace: need 3*N+M, prefer 3*N+M*NB) 1136* 1137 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, 1138 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), 1139 $ LWORK-IWORK+1, IERR ) 1140 IWORK = IE + N 1141* 1142* Perform bidiagonal QR iteration, computing left 1143* singular vectors of A in U 1144* (Workspace: need BDSPAC) 1145* 1146 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1147 $ 1, U, LDU, DUM, 1, WORK( IWORK ), 1148 $ INFO ) 1149* 1150 END IF 1151* 1152 ELSE IF( WNTVO ) THEN 1153* 1154* Path 5 (M much larger than N, JOBU='S', JOBVT='O') 1155* N left singular vectors to be computed in U and 1156* N right singular vectors to be overwritten on A 1157* 1158 IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN 1159* 1160* Sufficient workspace for a fast algorithm 1161* 1162 IU = 1 1163 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN 1164* 1165* WORK(IU) is LDA by N and WORK(IR) is LDA by N 1166* 1167 LDWRKU = LDA 1168 IR = IU + LDWRKU*N 1169 LDWRKR = LDA 1170 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN 1171* 1172* WORK(IU) is LDA by N and WORK(IR) is N by N 1173* 1174 LDWRKU = LDA 1175 IR = IU + LDWRKU*N 1176 LDWRKR = N 1177 ELSE 1178* 1179* WORK(IU) is N by N and WORK(IR) is N by N 1180* 1181 LDWRKU = N 1182 IR = IU + LDWRKU*N 1183 LDWRKR = N 1184 END IF 1185 ITAU = IR + LDWRKR*N 1186 IWORK = ITAU + N 1187* 1188* Compute A=Q*R 1189* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) 1190* 1191 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1192 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1193* 1194* Copy R to WORK(IU), zeroing out below it 1195* 1196 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), 1197 $ LDWRKU ) 1198 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1199 $ WORK( IU+1 ), LDWRKU ) 1200* 1201* Generate Q in A 1202* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) 1203* 1204 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), 1205 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1206 IE = ITAU 1207 ITAUQ = IE + N 1208 ITAUP = ITAUQ + N 1209 IWORK = ITAUP + N 1210* 1211* Bidiagonalize R in WORK(IU), copying result to 1212* WORK(IR) 1213* (Workspace: need 2*N*N+4*N, 1214* prefer 2*N*N+3*N+2*N*NB) 1215* 1216 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, 1217 $ WORK( IE ), WORK( ITAUQ ), 1218 $ WORK( ITAUP ), WORK( IWORK ), 1219 $ LWORK-IWORK+1, IERR ) 1220 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, 1221 $ WORK( IR ), LDWRKR ) 1222* 1223* Generate left bidiagonalizing vectors in WORK(IU) 1224* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) 1225* 1226 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, 1227 $ WORK( ITAUQ ), WORK( IWORK ), 1228 $ LWORK-IWORK+1, IERR ) 1229* 1230* Generate right bidiagonalizing vectors in WORK(IR) 1231* (Workspace: need 2*N*N+4*N-1, 1232* prefer 2*N*N+3*N+(N-1)*NB) 1233* 1234 CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, 1235 $ WORK( ITAUP ), WORK( IWORK ), 1236 $ LWORK-IWORK+1, IERR ) 1237 IWORK = IE + N 1238* 1239* Perform bidiagonal QR iteration, computing left 1240* singular vectors of R in WORK(IU) and computing 1241* right singular vectors of R in WORK(IR) 1242* (Workspace: need 2*N*N+BDSPAC) 1243* 1244 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), 1245 $ WORK( IR ), LDWRKR, WORK( IU ), 1246 $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) 1247* 1248* Multiply Q in A by left singular vectors of R in 1249* WORK(IU), storing result in U 1250* (Workspace: need N*N) 1251* 1252 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, 1253 $ WORK( IU ), LDWRKU, ZERO, U, LDU ) 1254* 1255* Copy right singular vectors of R to A 1256* (Workspace: need N*N) 1257* 1258 CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, 1259 $ LDA ) 1260* 1261 ELSE 1262* 1263* Insufficient workspace for a fast algorithm 1264* 1265 ITAU = 1 1266 IWORK = ITAU + N 1267* 1268* Compute A=Q*R, copying result to U 1269* (Workspace: need 2*N, prefer N+N*NB) 1270* 1271 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1272 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1273 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1274* 1275* Generate Q in U 1276* (Workspace: need 2*N, prefer N+N*NB) 1277* 1278 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), 1279 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1280 IE = ITAU 1281 ITAUQ = IE + N 1282 ITAUP = ITAUQ + N 1283 IWORK = ITAUP + N 1284* 1285* Zero out below R in A 1286* 1287 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), 1288 $ LDA ) 1289* 1290* Bidiagonalize R in A 1291* (Workspace: need 4*N, prefer 3*N+2*N*NB) 1292* 1293 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), 1294 $ WORK( ITAUQ ), WORK( ITAUP ), 1295 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1296* 1297* Multiply Q in U by left vectors bidiagonalizing R 1298* (Workspace: need 3*N+M, prefer 3*N+M*NB) 1299* 1300 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, 1301 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), 1302 $ LWORK-IWORK+1, IERR ) 1303* 1304* Generate right vectors bidiagonalizing R in A 1305* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 1306* 1307 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), 1308 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1309 IWORK = IE + N 1310* 1311* Perform bidiagonal QR iteration, computing left 1312* singular vectors of A in U and computing right 1313* singular vectors of A in A 1314* (Workspace: need BDSPAC) 1315* 1316 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, 1317 $ LDA, U, LDU, DUM, 1, WORK( IWORK ), 1318 $ INFO ) 1319* 1320 END IF 1321* 1322 ELSE IF( WNTVAS ) THEN 1323* 1324* Path 6 (M much larger than N, JOBU='S', JOBVT='S' 1325* or 'A') 1326* N left singular vectors to be computed in U and 1327* N right singular vectors to be computed in VT 1328* 1329 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN 1330* 1331* Sufficient workspace for a fast algorithm 1332* 1333 IU = 1 1334 IF( LWORK.GE.WRKBL+LDA*N ) THEN 1335* 1336* WORK(IU) is LDA by N 1337* 1338 LDWRKU = LDA 1339 ELSE 1340* 1341* WORK(IU) is N by N 1342* 1343 LDWRKU = N 1344 END IF 1345 ITAU = IU + LDWRKU*N 1346 IWORK = ITAU + N 1347* 1348* Compute A=Q*R 1349* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 1350* 1351 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1352 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1353* 1354* Copy R to WORK(IU), zeroing out below it 1355* 1356 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), 1357 $ LDWRKU ) 1358 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1359 $ WORK( IU+1 ), LDWRKU ) 1360* 1361* Generate Q in A 1362* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 1363* 1364 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), 1365 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1366 IE = ITAU 1367 ITAUQ = IE + N 1368 ITAUP = ITAUQ + N 1369 IWORK = ITAUP + N 1370* 1371* Bidiagonalize R in WORK(IU), copying result to VT 1372* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) 1373* 1374 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, 1375 $ WORK( IE ), WORK( ITAUQ ), 1376 $ WORK( ITAUP ), WORK( IWORK ), 1377 $ LWORK-IWORK+1, IERR ) 1378 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, 1379 $ LDVT ) 1380* 1381* Generate left bidiagonalizing vectors in WORK(IU) 1382* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) 1383* 1384 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, 1385 $ WORK( ITAUQ ), WORK( IWORK ), 1386 $ LWORK-IWORK+1, IERR ) 1387* 1388* Generate right bidiagonalizing vectors in VT 1389* (Workspace: need N*N+4*N-1, 1390* prefer N*N+3*N+(N-1)*NB) 1391* 1392 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 1393 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1394 IWORK = IE + N 1395* 1396* Perform bidiagonal QR iteration, computing left 1397* singular vectors of R in WORK(IU) and computing 1398* right singular vectors of R in VT 1399* (Workspace: need N*N+BDSPAC) 1400* 1401 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, 1402 $ LDVT, WORK( IU ), LDWRKU, DUM, 1, 1403 $ WORK( IWORK ), INFO ) 1404* 1405* Multiply Q in A by left singular vectors of R in 1406* WORK(IU), storing result in U 1407* (Workspace: need N*N) 1408* 1409 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, 1410 $ WORK( IU ), LDWRKU, ZERO, U, LDU ) 1411* 1412 ELSE 1413* 1414* Insufficient workspace for a fast algorithm 1415* 1416 ITAU = 1 1417 IWORK = ITAU + N 1418* 1419* Compute A=Q*R, copying result to U 1420* (Workspace: need 2*N, prefer N+N*NB) 1421* 1422 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1423 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1424 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1425* 1426* Generate Q in U 1427* (Workspace: need 2*N, prefer N+N*NB) 1428* 1429 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), 1430 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1431* 1432* Copy R to VT, zeroing out below it 1433* 1434 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) 1435 IF( N.GT.1 ) 1436 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1437 $ VT( 2, 1 ), LDVT ) 1438 IE = ITAU 1439 ITAUQ = IE + N 1440 ITAUP = ITAUQ + N 1441 IWORK = ITAUP + N 1442* 1443* Bidiagonalize R in VT 1444* (Workspace: need 4*N, prefer 3*N+2*N*NB) 1445* 1446 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), 1447 $ WORK( ITAUQ ), WORK( ITAUP ), 1448 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1449* 1450* Multiply Q in U by left bidiagonalizing vectors 1451* in VT 1452* (Workspace: need 3*N+M, prefer 3*N+M*NB) 1453* 1454 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, 1455 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), 1456 $ LWORK-IWORK+1, IERR ) 1457* 1458* Generate right bidiagonalizing vectors in VT 1459* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 1460* 1461 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 1462 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1463 IWORK = IE + N 1464* 1465* Perform bidiagonal QR iteration, computing left 1466* singular vectors of A in U and computing right 1467* singular vectors of A in VT 1468* (Workspace: need BDSPAC) 1469* 1470 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, 1471 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), 1472 $ INFO ) 1473* 1474 END IF 1475* 1476 END IF 1477* 1478 ELSE IF( WNTUA ) THEN 1479* 1480 IF( WNTVN ) THEN 1481* 1482* Path 7 (M much larger than N, JOBU='A', JOBVT='N') 1483* M left singular vectors to be computed in U and 1484* no right singular vectors to be computed 1485* 1486 IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN 1487* 1488* Sufficient workspace for a fast algorithm 1489* 1490 IR = 1 1491 IF( LWORK.GE.WRKBL+LDA*N ) THEN 1492* 1493* WORK(IR) is LDA by N 1494* 1495 LDWRKR = LDA 1496 ELSE 1497* 1498* WORK(IR) is N by N 1499* 1500 LDWRKR = N 1501 END IF 1502 ITAU = IR + LDWRKR*N 1503 IWORK = ITAU + N 1504* 1505* Compute A=Q*R, copying result to U 1506* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 1507* 1508 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1509 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1510 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1511* 1512* Copy R to WORK(IR), zeroing out below it 1513* 1514 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), 1515 $ LDWRKR ) 1516 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1517 $ WORK( IR+1 ), LDWRKR ) 1518* 1519* Generate Q in U 1520* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) 1521* 1522 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), 1523 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1524 IE = ITAU 1525 ITAUQ = IE + N 1526 ITAUP = ITAUQ + N 1527 IWORK = ITAUP + N 1528* 1529* Bidiagonalize R in WORK(IR) 1530* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) 1531* 1532 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, 1533 $ WORK( IE ), WORK( ITAUQ ), 1534 $ WORK( ITAUP ), WORK( IWORK ), 1535 $ LWORK-IWORK+1, IERR ) 1536* 1537* Generate left bidiagonalizing vectors in WORK(IR) 1538* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) 1539* 1540 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, 1541 $ WORK( ITAUQ ), WORK( IWORK ), 1542 $ LWORK-IWORK+1, IERR ) 1543 IWORK = IE + N 1544* 1545* Perform bidiagonal QR iteration, computing left 1546* singular vectors of R in WORK(IR) 1547* (Workspace: need N*N+BDSPAC) 1548* 1549 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1550 $ 1, WORK( IR ), LDWRKR, DUM, 1, 1551 $ WORK( IWORK ), INFO ) 1552* 1553* Multiply Q in U by left singular vectors of R in 1554* WORK(IR), storing result in A 1555* (Workspace: need N*N) 1556* 1557 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, 1558 $ WORK( IR ), LDWRKR, ZERO, A, LDA ) 1559* 1560* Copy left singular vectors of A from A to U 1561* 1562 CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) 1563* 1564 ELSE 1565* 1566* Insufficient workspace for a fast algorithm 1567* 1568 ITAU = 1 1569 IWORK = ITAU + N 1570* 1571* Compute A=Q*R, copying result to U 1572* (Workspace: need 2*N, prefer N+N*NB) 1573* 1574 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1575 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1576 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1577* 1578* Generate Q in U 1579* (Workspace: need N+M, prefer N+M*NB) 1580* 1581 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), 1582 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1583 IE = ITAU 1584 ITAUQ = IE + N 1585 ITAUP = ITAUQ + N 1586 IWORK = ITAUP + N 1587* 1588* Zero out below R in A 1589* 1590 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), 1591 $ LDA ) 1592* 1593* Bidiagonalize R in A 1594* (Workspace: need 4*N, prefer 3*N+2*N*NB) 1595* 1596 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), 1597 $ WORK( ITAUQ ), WORK( ITAUP ), 1598 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1599* 1600* Multiply Q in U by left bidiagonalizing vectors 1601* in A 1602* (Workspace: need 3*N+M, prefer 3*N+M*NB) 1603* 1604 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, 1605 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), 1606 $ LWORK-IWORK+1, IERR ) 1607 IWORK = IE + N 1608* 1609* Perform bidiagonal QR iteration, computing left 1610* singular vectors of A in U 1611* (Workspace: need BDSPAC) 1612* 1613 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1614 $ 1, U, LDU, DUM, 1, WORK( IWORK ), 1615 $ INFO ) 1616* 1617 END IF 1618* 1619 ELSE IF( WNTVO ) THEN 1620* 1621* Path 8 (M much larger than N, JOBU='A', JOBVT='O') 1622* M left singular vectors to be computed in U and 1623* N right singular vectors to be overwritten on A 1624* 1625 IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN 1626* 1627* Sufficient workspace for a fast algorithm 1628* 1629 IU = 1 1630 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN 1631* 1632* WORK(IU) is LDA by N and WORK(IR) is LDA by N 1633* 1634 LDWRKU = LDA 1635 IR = IU + LDWRKU*N 1636 LDWRKR = LDA 1637 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN 1638* 1639* WORK(IU) is LDA by N and WORK(IR) is N by N 1640* 1641 LDWRKU = LDA 1642 IR = IU + LDWRKU*N 1643 LDWRKR = N 1644 ELSE 1645* 1646* WORK(IU) is N by N and WORK(IR) is N by N 1647* 1648 LDWRKU = N 1649 IR = IU + LDWRKU*N 1650 LDWRKR = N 1651 END IF 1652 ITAU = IR + LDWRKR*N 1653 IWORK = ITAU + N 1654* 1655* Compute A=Q*R, copying result to U 1656* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) 1657* 1658 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1659 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1660 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1661* 1662* Generate Q in U 1663* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) 1664* 1665 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), 1666 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1667* 1668* Copy R to WORK(IU), zeroing out below it 1669* 1670 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), 1671 $ LDWRKU ) 1672 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1673 $ WORK( IU+1 ), LDWRKU ) 1674 IE = ITAU 1675 ITAUQ = IE + N 1676 ITAUP = ITAUQ + N 1677 IWORK = ITAUP + N 1678* 1679* Bidiagonalize R in WORK(IU), copying result to 1680* WORK(IR) 1681* (Workspace: need 2*N*N+4*N, 1682* prefer 2*N*N+3*N+2*N*NB) 1683* 1684 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, 1685 $ WORK( IE ), WORK( ITAUQ ), 1686 $ WORK( ITAUP ), WORK( IWORK ), 1687 $ LWORK-IWORK+1, IERR ) 1688 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, 1689 $ WORK( IR ), LDWRKR ) 1690* 1691* Generate left bidiagonalizing vectors in WORK(IU) 1692* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) 1693* 1694 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, 1695 $ WORK( ITAUQ ), WORK( IWORK ), 1696 $ LWORK-IWORK+1, IERR ) 1697* 1698* Generate right bidiagonalizing vectors in WORK(IR) 1699* (Workspace: need 2*N*N+4*N-1, 1700* prefer 2*N*N+3*N+(N-1)*NB) 1701* 1702 CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, 1703 $ WORK( ITAUP ), WORK( IWORK ), 1704 $ LWORK-IWORK+1, IERR ) 1705 IWORK = IE + N 1706* 1707* Perform bidiagonal QR iteration, computing left 1708* singular vectors of R in WORK(IU) and computing 1709* right singular vectors of R in WORK(IR) 1710* (Workspace: need 2*N*N+BDSPAC) 1711* 1712 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), 1713 $ WORK( IR ), LDWRKR, WORK( IU ), 1714 $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) 1715* 1716* Multiply Q in U by left singular vectors of R in 1717* WORK(IU), storing result in A 1718* (Workspace: need N*N) 1719* 1720 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, 1721 $ WORK( IU ), LDWRKU, ZERO, A, LDA ) 1722* 1723* Copy left singular vectors of A from A to U 1724* 1725 CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) 1726* 1727* Copy right singular vectors of R from WORK(IR) to A 1728* 1729 CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, 1730 $ LDA ) 1731* 1732 ELSE 1733* 1734* Insufficient workspace for a fast algorithm 1735* 1736 ITAU = 1 1737 IWORK = ITAU + N 1738* 1739* Compute A=Q*R, copying result to U 1740* (Workspace: need 2*N, prefer N+N*NB) 1741* 1742 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1743 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1744 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1745* 1746* Generate Q in U 1747* (Workspace: need N+M, prefer N+M*NB) 1748* 1749 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), 1750 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1751 IE = ITAU 1752 ITAUQ = IE + N 1753 ITAUP = ITAUQ + N 1754 IWORK = ITAUP + N 1755* 1756* Zero out below R in A 1757* 1758 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), 1759 $ LDA ) 1760* 1761* Bidiagonalize R in A 1762* (Workspace: need 4*N, prefer 3*N+2*N*NB) 1763* 1764 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), 1765 $ WORK( ITAUQ ), WORK( ITAUP ), 1766 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1767* 1768* Multiply Q in U by left bidiagonalizing vectors 1769* in A 1770* (Workspace: need 3*N+M, prefer 3*N+M*NB) 1771* 1772 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, 1773 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), 1774 $ LWORK-IWORK+1, IERR ) 1775* 1776* Generate right bidiagonalizing vectors in A 1777* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 1778* 1779 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), 1780 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1781 IWORK = IE + N 1782* 1783* Perform bidiagonal QR iteration, computing left 1784* singular vectors of A in U and computing right 1785* singular vectors of A in A 1786* (Workspace: need BDSPAC) 1787* 1788 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, 1789 $ LDA, U, LDU, DUM, 1, WORK( IWORK ), 1790 $ INFO ) 1791* 1792 END IF 1793* 1794 ELSE IF( WNTVAS ) THEN 1795* 1796* Path 9 (M much larger than N, JOBU='A', JOBVT='S' 1797* or 'A') 1798* M left singular vectors to be computed in U and 1799* N right singular vectors to be computed in VT 1800* 1801 IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN 1802* 1803* Sufficient workspace for a fast algorithm 1804* 1805 IU = 1 1806 IF( LWORK.GE.WRKBL+LDA*N ) THEN 1807* 1808* WORK(IU) is LDA by N 1809* 1810 LDWRKU = LDA 1811 ELSE 1812* 1813* WORK(IU) is N by N 1814* 1815 LDWRKU = N 1816 END IF 1817 ITAU = IU + LDWRKU*N 1818 IWORK = ITAU + N 1819* 1820* Compute A=Q*R, copying result to U 1821* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) 1822* 1823 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1824 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1825 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1826* 1827* Generate Q in U 1828* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) 1829* 1830 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), 1831 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1832* 1833* Copy R to WORK(IU), zeroing out below it 1834* 1835 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), 1836 $ LDWRKU ) 1837 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1838 $ WORK( IU+1 ), LDWRKU ) 1839 IE = ITAU 1840 ITAUQ = IE + N 1841 ITAUP = ITAUQ + N 1842 IWORK = ITAUP + N 1843* 1844* Bidiagonalize R in WORK(IU), copying result to VT 1845* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) 1846* 1847 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, 1848 $ WORK( IE ), WORK( ITAUQ ), 1849 $ WORK( ITAUP ), WORK( IWORK ), 1850 $ LWORK-IWORK+1, IERR ) 1851 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, 1852 $ LDVT ) 1853* 1854* Generate left bidiagonalizing vectors in WORK(IU) 1855* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) 1856* 1857 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, 1858 $ WORK( ITAUQ ), WORK( IWORK ), 1859 $ LWORK-IWORK+1, IERR ) 1860* 1861* Generate right bidiagonalizing vectors in VT 1862* (Workspace: need N*N+4*N-1, 1863* prefer N*N+3*N+(N-1)*NB) 1864* 1865 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 1866 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1867 IWORK = IE + N 1868* 1869* Perform bidiagonal QR iteration, computing left 1870* singular vectors of R in WORK(IU) and computing 1871* right singular vectors of R in VT 1872* (Workspace: need N*N+BDSPAC) 1873* 1874 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, 1875 $ LDVT, WORK( IU ), LDWRKU, DUM, 1, 1876 $ WORK( IWORK ), INFO ) 1877* 1878* Multiply Q in U by left singular vectors of R in 1879* WORK(IU), storing result in A 1880* (Workspace: need N*N) 1881* 1882 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, 1883 $ WORK( IU ), LDWRKU, ZERO, A, LDA ) 1884* 1885* Copy left singular vectors of A from A to U 1886* 1887 CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) 1888* 1889 ELSE 1890* 1891* Insufficient workspace for a fast algorithm 1892* 1893 ITAU = 1 1894 IWORK = ITAU + N 1895* 1896* Compute A=Q*R, copying result to U 1897* (Workspace: need 2*N, prefer N+N*NB) 1898* 1899 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), 1900 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1901 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1902* 1903* Generate Q in U 1904* (Workspace: need N+M, prefer N+M*NB) 1905* 1906 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), 1907 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1908* 1909* Copy R from A to VT, zeroing out below it 1910* 1911 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) 1912 IF( N.GT.1 ) 1913 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, 1914 $ VT( 2, 1 ), LDVT ) 1915 IE = ITAU 1916 ITAUQ = IE + N 1917 ITAUP = ITAUQ + N 1918 IWORK = ITAUP + N 1919* 1920* Bidiagonalize R in VT 1921* (Workspace: need 4*N, prefer 3*N+2*N*NB) 1922* 1923 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), 1924 $ WORK( ITAUQ ), WORK( ITAUP ), 1925 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1926* 1927* Multiply Q in U by left bidiagonalizing vectors 1928* in VT 1929* (Workspace: need 3*N+M, prefer 3*N+M*NB) 1930* 1931 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, 1932 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), 1933 $ LWORK-IWORK+1, IERR ) 1934* 1935* Generate right bidiagonalizing vectors in VT 1936* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 1937* 1938 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 1939 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1940 IWORK = IE + N 1941* 1942* Perform bidiagonal QR iteration, computing left 1943* singular vectors of A in U and computing right 1944* singular vectors of A in VT 1945* (Workspace: need BDSPAC) 1946* 1947 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, 1948 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), 1949 $ INFO ) 1950* 1951 END IF 1952* 1953 END IF 1954* 1955 END IF 1956* 1957 ELSE 1958* 1959* M .LT. MNTHR 1960* 1961* Path 10 (M at least N, but not much larger) 1962* Reduce to bidiagonal form without QR decomposition 1963* 1964 IE = 1 1965 ITAUQ = IE + N 1966 ITAUP = ITAUQ + N 1967 IWORK = ITAUP + N 1968* 1969* Bidiagonalize A 1970* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) 1971* 1972 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), 1973 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, 1974 $ IERR ) 1975 IF( WNTUAS ) THEN 1976* 1977* If left singular vectors desired in U, copy result to U 1978* and generate left bidiagonalizing vectors in U 1979* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) 1980* 1981 CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) 1982 IF( WNTUS ) 1983 $ NCU = N 1984 IF( WNTUA ) 1985 $ NCU = M 1986 CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), 1987 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1988 END IF 1989 IF( WNTVAS ) THEN 1990* 1991* If right singular vectors desired in VT, copy result to 1992* VT and generate right bidiagonalizing vectors in VT 1993* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 1994* 1995 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) 1996 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), 1997 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 1998 END IF 1999 IF( WNTUO ) THEN 2000* 2001* If left singular vectors desired in A, generate left 2002* bidiagonalizing vectors in A 2003* (Workspace: need 4*N, prefer 3*N+N*NB) 2004* 2005 CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), 2006 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2007 END IF 2008 IF( WNTVO ) THEN 2009* 2010* If right singular vectors desired in A, generate right 2011* bidiagonalizing vectors in A 2012* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) 2013* 2014 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), 2015 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2016 END IF 2017 IWORK = IE + N 2018 IF( WNTUAS .OR. WNTUO ) 2019 $ NRU = M 2020 IF( WNTUN ) 2021 $ NRU = 0 2022 IF( WNTVAS .OR. WNTVO ) 2023 $ NCVT = N 2024 IF( WNTVN ) 2025 $ NCVT = 0 2026 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN 2027* 2028* Perform bidiagonal QR iteration, if desired, computing 2029* left singular vectors in U and computing right singular 2030* vectors in VT 2031* (Workspace: need BDSPAC) 2032* 2033 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, 2034 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) 2035 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN 2036* 2037* Perform bidiagonal QR iteration, if desired, computing 2038* left singular vectors in U and computing right singular 2039* vectors in A 2040* (Workspace: need BDSPAC) 2041* 2042 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, 2043 $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) 2044 ELSE 2045* 2046* Perform bidiagonal QR iteration, if desired, computing 2047* left singular vectors in A and computing right singular 2048* vectors in VT 2049* (Workspace: need BDSPAC) 2050* 2051 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, 2052 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) 2053 END IF 2054* 2055 END IF 2056* 2057 ELSE 2058* 2059* A has more columns than rows. If A has sufficiently more 2060* columns than rows, first reduce using the LQ decomposition (if 2061* sufficient workspace available) 2062* 2063 IF( N.GE.MNTHR ) THEN 2064* 2065 IF( WNTVN ) THEN 2066* 2067* Path 1t(N much larger than M, JOBVT='N') 2068* No right singular vectors to be computed 2069* 2070 ITAU = 1 2071 IWORK = ITAU + M 2072* 2073* Compute A=L*Q 2074* (Workspace: need 2*M, prefer M+M*NB) 2075* 2076 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), 2077 $ LWORK-IWORK+1, IERR ) 2078* 2079* Zero out above L 2080* 2081 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) 2082 IE = 1 2083 ITAUQ = IE + M 2084 ITAUP = ITAUQ + M 2085 IWORK = ITAUP + M 2086* 2087* Bidiagonalize L in A 2088* (Workspace: need 4*M, prefer 3*M+2*M*NB) 2089* 2090 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), 2091 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, 2092 $ IERR ) 2093 IF( WNTUO .OR. WNTUAS ) THEN 2094* 2095* If left singular vectors desired, generate Q 2096* (Workspace: need 4*M, prefer 3*M+M*NB) 2097* 2098 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), 2099 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2100 END IF 2101 IWORK = IE + M 2102 NRU = 0 2103 IF( WNTUO .OR. WNTUAS ) 2104 $ NRU = M 2105* 2106* Perform bidiagonal QR iteration, computing left singular 2107* vectors of A in A if desired 2108* (Workspace: need BDSPAC) 2109* 2110 CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, 2111 $ LDA, DUM, 1, WORK( IWORK ), INFO ) 2112* 2113* If left singular vectors desired in U, copy them there 2114* 2115 IF( WNTUAS ) 2116 $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU ) 2117* 2118 ELSE IF( WNTVO .AND. WNTUN ) THEN 2119* 2120* Path 2t(N much larger than M, JOBU='N', JOBVT='O') 2121* M right singular vectors to be overwritten on A and 2122* no left singular vectors to be computed 2123* 2124 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN 2125* 2126* Sufficient workspace for a fast algorithm 2127* 2128 IR = 1 2129 IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN 2130* 2131* WORK(IU) is LDA by N and WORK(IR) is LDA by M 2132* 2133 LDWRKU = LDA 2134 CHUNK = N 2135 LDWRKR = LDA 2136 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN 2137* 2138* WORK(IU) is LDA by N and WORK(IR) is M by M 2139* 2140 LDWRKU = LDA 2141 CHUNK = N 2142 LDWRKR = M 2143 ELSE 2144* 2145* WORK(IU) is M by CHUNK and WORK(IR) is M by M 2146* 2147 LDWRKU = M 2148 CHUNK = ( LWORK-M*M-M ) / M 2149 LDWRKR = M 2150 END IF 2151 ITAU = IR + LDWRKR*M 2152 IWORK = ITAU + M 2153* 2154* Compute A=L*Q 2155* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2156* 2157 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2158 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2159* 2160* Copy L to WORK(IR) and zero out above it 2161* 2162 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) 2163 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 2164 $ WORK( IR+LDWRKR ), LDWRKR ) 2165* 2166* Generate Q in A 2167* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2168* 2169 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), 2170 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2171 IE = ITAU 2172 ITAUQ = IE + M 2173 ITAUP = ITAUQ + M 2174 IWORK = ITAUP + M 2175* 2176* Bidiagonalize L in WORK(IR) 2177* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) 2178* 2179 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), 2180 $ WORK( ITAUQ ), WORK( ITAUP ), 2181 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2182* 2183* Generate right vectors bidiagonalizing L 2184* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) 2185* 2186 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, 2187 $ WORK( ITAUP ), WORK( IWORK ), 2188 $ LWORK-IWORK+1, IERR ) 2189 IWORK = IE + M 2190* 2191* Perform bidiagonal QR iteration, computing right 2192* singular vectors of L in WORK(IR) 2193* (Workspace: need M*M+BDSPAC) 2194* 2195 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), 2196 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, 2197 $ WORK( IWORK ), INFO ) 2198 IU = IE + M 2199* 2200* Multiply right singular vectors of L in WORK(IR) by Q 2201* in A, storing result in WORK(IU) and copying to A 2202* (Workspace: need M*M+2*M, prefer M*M+M*N+M) 2203* 2204 DO 30 I = 1, N, CHUNK 2205 BLK = MIN( N-I+1, CHUNK ) 2206 CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), 2207 $ LDWRKR, A( 1, I ), LDA, ZERO, 2208 $ WORK( IU ), LDWRKU ) 2209 CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, 2210 $ A( 1, I ), LDA ) 2211 30 CONTINUE 2212* 2213 ELSE 2214* 2215* Insufficient workspace for a fast algorithm 2216* 2217 IE = 1 2218 ITAUQ = IE + M 2219 ITAUP = ITAUQ + M 2220 IWORK = ITAUP + M 2221* 2222* Bidiagonalize A 2223* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) 2224* 2225 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), 2226 $ WORK( ITAUQ ), WORK( ITAUP ), 2227 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2228* 2229* Generate right vectors bidiagonalizing A 2230* (Workspace: need 4*M, prefer 3*M+M*NB) 2231* 2232 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), 2233 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2234 IWORK = IE + M 2235* 2236* Perform bidiagonal QR iteration, computing right 2237* singular vectors of A in A 2238* (Workspace: need BDSPAC) 2239* 2240 CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, 2241 $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) 2242* 2243 END IF 2244* 2245 ELSE IF( WNTVO .AND. WNTUAS ) THEN 2246* 2247* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') 2248* M right singular vectors to be overwritten on A and 2249* M left singular vectors to be computed in U 2250* 2251 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN 2252* 2253* Sufficient workspace for a fast algorithm 2254* 2255 IR = 1 2256 IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN 2257* 2258* WORK(IU) is LDA by N and WORK(IR) is LDA by M 2259* 2260 LDWRKU = LDA 2261 CHUNK = N 2262 LDWRKR = LDA 2263 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN 2264* 2265* WORK(IU) is LDA by N and WORK(IR) is M by M 2266* 2267 LDWRKU = LDA 2268 CHUNK = N 2269 LDWRKR = M 2270 ELSE 2271* 2272* WORK(IU) is M by CHUNK and WORK(IR) is M by M 2273* 2274 LDWRKU = M 2275 CHUNK = ( LWORK-M*M-M ) / M 2276 LDWRKR = M 2277 END IF 2278 ITAU = IR + LDWRKR*M 2279 IWORK = ITAU + M 2280* 2281* Compute A=L*Q 2282* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2283* 2284 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2285 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2286* 2287* Copy L to U, zeroing about above it 2288* 2289 CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) 2290 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), 2291 $ LDU ) 2292* 2293* Generate Q in A 2294* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2295* 2296 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), 2297 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2298 IE = ITAU 2299 ITAUQ = IE + M 2300 ITAUP = ITAUQ + M 2301 IWORK = ITAUP + M 2302* 2303* Bidiagonalize L in U, copying result to WORK(IR) 2304* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) 2305* 2306 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), 2307 $ WORK( ITAUQ ), WORK( ITAUP ), 2308 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2309 CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) 2310* 2311* Generate right vectors bidiagonalizing L in WORK(IR) 2312* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) 2313* 2314 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, 2315 $ WORK( ITAUP ), WORK( IWORK ), 2316 $ LWORK-IWORK+1, IERR ) 2317* 2318* Generate left vectors bidiagonalizing L in U 2319* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) 2320* 2321 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), 2322 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2323 IWORK = IE + M 2324* 2325* Perform bidiagonal QR iteration, computing left 2326* singular vectors of L in U, and computing right 2327* singular vectors of L in WORK(IR) 2328* (Workspace: need M*M+BDSPAC) 2329* 2330 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), 2331 $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, 2332 $ WORK( IWORK ), INFO ) 2333 IU = IE + M 2334* 2335* Multiply right singular vectors of L in WORK(IR) by Q 2336* in A, storing result in WORK(IU) and copying to A 2337* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) 2338* 2339 DO 40 I = 1, N, CHUNK 2340 BLK = MIN( N-I+1, CHUNK ) 2341 CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), 2342 $ LDWRKR, A( 1, I ), LDA, ZERO, 2343 $ WORK( IU ), LDWRKU ) 2344 CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, 2345 $ A( 1, I ), LDA ) 2346 40 CONTINUE 2347* 2348 ELSE 2349* 2350* Insufficient workspace for a fast algorithm 2351* 2352 ITAU = 1 2353 IWORK = ITAU + M 2354* 2355* Compute A=L*Q 2356* (Workspace: need 2*M, prefer M+M*NB) 2357* 2358 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2359 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2360* 2361* Copy L to U, zeroing out above it 2362* 2363 CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) 2364 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), 2365 $ LDU ) 2366* 2367* Generate Q in A 2368* (Workspace: need 2*M, prefer M+M*NB) 2369* 2370 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), 2371 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2372 IE = ITAU 2373 ITAUQ = IE + M 2374 ITAUP = ITAUQ + M 2375 IWORK = ITAUP + M 2376* 2377* Bidiagonalize L in U 2378* (Workspace: need 4*M, prefer 3*M+2*M*NB) 2379* 2380 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), 2381 $ WORK( ITAUQ ), WORK( ITAUP ), 2382 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2383* 2384* Multiply right vectors bidiagonalizing L by Q in A 2385* (Workspace: need 3*M+N, prefer 3*M+N*NB) 2386* 2387 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, 2388 $ WORK( ITAUP ), A, LDA, WORK( IWORK ), 2389 $ LWORK-IWORK+1, IERR ) 2390* 2391* Generate left vectors bidiagonalizing L in U 2392* (Workspace: need 4*M, prefer 3*M+M*NB) 2393* 2394 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), 2395 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2396 IWORK = IE + M 2397* 2398* Perform bidiagonal QR iteration, computing left 2399* singular vectors of A in U and computing right 2400* singular vectors of A in A 2401* (Workspace: need BDSPAC) 2402* 2403 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, 2404 $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) 2405* 2406 END IF 2407* 2408 ELSE IF( WNTVS ) THEN 2409* 2410 IF( WNTUN ) THEN 2411* 2412* Path 4t(N much larger than M, JOBU='N', JOBVT='S') 2413* M right singular vectors to be computed in VT and 2414* no left singular vectors to be computed 2415* 2416 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN 2417* 2418* Sufficient workspace for a fast algorithm 2419* 2420 IR = 1 2421 IF( LWORK.GE.WRKBL+LDA*M ) THEN 2422* 2423* WORK(IR) is LDA by M 2424* 2425 LDWRKR = LDA 2426 ELSE 2427* 2428* WORK(IR) is M by M 2429* 2430 LDWRKR = M 2431 END IF 2432 ITAU = IR + LDWRKR*M 2433 IWORK = ITAU + M 2434* 2435* Compute A=L*Q 2436* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2437* 2438 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2439 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2440* 2441* Copy L to WORK(IR), zeroing out above it 2442* 2443 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), 2444 $ LDWRKR ) 2445 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 2446 $ WORK( IR+LDWRKR ), LDWRKR ) 2447* 2448* Generate Q in A 2449* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2450* 2451 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), 2452 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2453 IE = ITAU 2454 ITAUQ = IE + M 2455 ITAUP = ITAUQ + M 2456 IWORK = ITAUP + M 2457* 2458* Bidiagonalize L in WORK(IR) 2459* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) 2460* 2461 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, 2462 $ WORK( IE ), WORK( ITAUQ ), 2463 $ WORK( ITAUP ), WORK( IWORK ), 2464 $ LWORK-IWORK+1, IERR ) 2465* 2466* Generate right vectors bidiagonalizing L in 2467* WORK(IR) 2468* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) 2469* 2470 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, 2471 $ WORK( ITAUP ), WORK( IWORK ), 2472 $ LWORK-IWORK+1, IERR ) 2473 IWORK = IE + M 2474* 2475* Perform bidiagonal QR iteration, computing right 2476* singular vectors of L in WORK(IR) 2477* (Workspace: need M*M+BDSPAC) 2478* 2479 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), 2480 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, 2481 $ WORK( IWORK ), INFO ) 2482* 2483* Multiply right singular vectors of L in WORK(IR) by 2484* Q in A, storing result in VT 2485* (Workspace: need M*M) 2486* 2487 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), 2488 $ LDWRKR, A, LDA, ZERO, VT, LDVT ) 2489* 2490 ELSE 2491* 2492* Insufficient workspace for a fast algorithm 2493* 2494 ITAU = 1 2495 IWORK = ITAU + M 2496* 2497* Compute A=L*Q 2498* (Workspace: need 2*M, prefer M+M*NB) 2499* 2500 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2501 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2502* 2503* Copy result to VT 2504* 2505 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 2506* 2507* Generate Q in VT 2508* (Workspace: need 2*M, prefer M+M*NB) 2509* 2510 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), 2511 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2512 IE = ITAU 2513 ITAUQ = IE + M 2514 ITAUP = ITAUQ + M 2515 IWORK = ITAUP + M 2516* 2517* Zero out above L in A 2518* 2519 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), 2520 $ LDA ) 2521* 2522* Bidiagonalize L in A 2523* (Workspace: need 4*M, prefer 3*M+2*M*NB) 2524* 2525 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), 2526 $ WORK( ITAUQ ), WORK( ITAUP ), 2527 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2528* 2529* Multiply right vectors bidiagonalizing L by Q in VT 2530* (Workspace: need 3*M+N, prefer 3*M+N*NB) 2531* 2532 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, 2533 $ WORK( ITAUP ), VT, LDVT, 2534 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2535 IWORK = IE + M 2536* 2537* Perform bidiagonal QR iteration, computing right 2538* singular vectors of A in VT 2539* (Workspace: need BDSPAC) 2540* 2541 CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, 2542 $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), 2543 $ INFO ) 2544* 2545 END IF 2546* 2547 ELSE IF( WNTUO ) THEN 2548* 2549* Path 5t(N much larger than M, JOBU='O', JOBVT='S') 2550* M right singular vectors to be computed in VT and 2551* M left singular vectors to be overwritten on A 2552* 2553 IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN 2554* 2555* Sufficient workspace for a fast algorithm 2556* 2557 IU = 1 2558 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN 2559* 2560* WORK(IU) is LDA by M and WORK(IR) is LDA by M 2561* 2562 LDWRKU = LDA 2563 IR = IU + LDWRKU*M 2564 LDWRKR = LDA 2565 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN 2566* 2567* WORK(IU) is LDA by M and WORK(IR) is M by M 2568* 2569 LDWRKU = LDA 2570 IR = IU + LDWRKU*M 2571 LDWRKR = M 2572 ELSE 2573* 2574* WORK(IU) is M by M and WORK(IR) is M by M 2575* 2576 LDWRKU = M 2577 IR = IU + LDWRKU*M 2578 LDWRKR = M 2579 END IF 2580 ITAU = IR + LDWRKR*M 2581 IWORK = ITAU + M 2582* 2583* Compute A=L*Q 2584* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) 2585* 2586 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2587 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2588* 2589* Copy L to WORK(IU), zeroing out below it 2590* 2591 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), 2592 $ LDWRKU ) 2593 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 2594 $ WORK( IU+LDWRKU ), LDWRKU ) 2595* 2596* Generate Q in A 2597* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) 2598* 2599 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), 2600 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2601 IE = ITAU 2602 ITAUQ = IE + M 2603 ITAUP = ITAUQ + M 2604 IWORK = ITAUP + M 2605* 2606* Bidiagonalize L in WORK(IU), copying result to 2607* WORK(IR) 2608* (Workspace: need 2*M*M+4*M, 2609* prefer 2*M*M+3*M+2*M*NB) 2610* 2611 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, 2612 $ WORK( IE ), WORK( ITAUQ ), 2613 $ WORK( ITAUP ), WORK( IWORK ), 2614 $ LWORK-IWORK+1, IERR ) 2615 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, 2616 $ WORK( IR ), LDWRKR ) 2617* 2618* Generate right bidiagonalizing vectors in WORK(IU) 2619* (Workspace: need 2*M*M+4*M-1, 2620* prefer 2*M*M+3*M+(M-1)*NB) 2621* 2622 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, 2623 $ WORK( ITAUP ), WORK( IWORK ), 2624 $ LWORK-IWORK+1, IERR ) 2625* 2626* Generate left bidiagonalizing vectors in WORK(IR) 2627* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) 2628* 2629 CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, 2630 $ WORK( ITAUQ ), WORK( IWORK ), 2631 $ LWORK-IWORK+1, IERR ) 2632 IWORK = IE + M 2633* 2634* Perform bidiagonal QR iteration, computing left 2635* singular vectors of L in WORK(IR) and computing 2636* right singular vectors of L in WORK(IU) 2637* (Workspace: need 2*M*M+BDSPAC) 2638* 2639 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), 2640 $ WORK( IU ), LDWRKU, WORK( IR ), 2641 $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) 2642* 2643* Multiply right singular vectors of L in WORK(IU) by 2644* Q in A, storing result in VT 2645* (Workspace: need M*M) 2646* 2647 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), 2648 $ LDWRKU, A, LDA, ZERO, VT, LDVT ) 2649* 2650* Copy left singular vectors of L to A 2651* (Workspace: need M*M) 2652* 2653 CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, 2654 $ LDA ) 2655* 2656 ELSE 2657* 2658* Insufficient workspace for a fast algorithm 2659* 2660 ITAU = 1 2661 IWORK = ITAU + M 2662* 2663* Compute A=L*Q, copying result to VT 2664* (Workspace: need 2*M, prefer M+M*NB) 2665* 2666 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2667 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2668 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 2669* 2670* Generate Q in VT 2671* (Workspace: need 2*M, prefer M+M*NB) 2672* 2673 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), 2674 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2675 IE = ITAU 2676 ITAUQ = IE + M 2677 ITAUP = ITAUQ + M 2678 IWORK = ITAUP + M 2679* 2680* Zero out above L in A 2681* 2682 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), 2683 $ LDA ) 2684* 2685* Bidiagonalize L in A 2686* (Workspace: need 4*M, prefer 3*M+2*M*NB) 2687* 2688 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), 2689 $ WORK( ITAUQ ), WORK( ITAUP ), 2690 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2691* 2692* Multiply right vectors bidiagonalizing L by Q in VT 2693* (Workspace: need 3*M+N, prefer 3*M+N*NB) 2694* 2695 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, 2696 $ WORK( ITAUP ), VT, LDVT, 2697 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2698* 2699* Generate left bidiagonalizing vectors of L in A 2700* (Workspace: need 4*M, prefer 3*M+M*NB) 2701* 2702 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), 2703 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2704 IWORK = IE + M 2705* 2706* Perform bidiagonal QR iteration, compute left 2707* singular vectors of A in A and compute right 2708* singular vectors of A in VT 2709* (Workspace: need BDSPAC) 2710* 2711 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, 2712 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), 2713 $ INFO ) 2714* 2715 END IF 2716* 2717 ELSE IF( WNTUAS ) THEN 2718* 2719* Path 6t(N much larger than M, JOBU='S' or 'A', 2720* JOBVT='S') 2721* M right singular vectors to be computed in VT and 2722* M left singular vectors to be computed in U 2723* 2724 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN 2725* 2726* Sufficient workspace for a fast algorithm 2727* 2728 IU = 1 2729 IF( LWORK.GE.WRKBL+LDA*M ) THEN 2730* 2731* WORK(IU) is LDA by N 2732* 2733 LDWRKU = LDA 2734 ELSE 2735* 2736* WORK(IU) is LDA by M 2737* 2738 LDWRKU = M 2739 END IF 2740 ITAU = IU + LDWRKU*M 2741 IWORK = ITAU + M 2742* 2743* Compute A=L*Q 2744* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2745* 2746 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2747 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2748* 2749* Copy L to WORK(IU), zeroing out above it 2750* 2751 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), 2752 $ LDWRKU ) 2753 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 2754 $ WORK( IU+LDWRKU ), LDWRKU ) 2755* 2756* Generate Q in A 2757* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2758* 2759 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), 2760 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2761 IE = ITAU 2762 ITAUQ = IE + M 2763 ITAUP = ITAUQ + M 2764 IWORK = ITAUP + M 2765* 2766* Bidiagonalize L in WORK(IU), copying result to U 2767* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) 2768* 2769 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, 2770 $ WORK( IE ), WORK( ITAUQ ), 2771 $ WORK( ITAUP ), WORK( IWORK ), 2772 $ LWORK-IWORK+1, IERR ) 2773 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, 2774 $ LDU ) 2775* 2776* Generate right bidiagonalizing vectors in WORK(IU) 2777* (Workspace: need M*M+4*M-1, 2778* prefer M*M+3*M+(M-1)*NB) 2779* 2780 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, 2781 $ WORK( ITAUP ), WORK( IWORK ), 2782 $ LWORK-IWORK+1, IERR ) 2783* 2784* Generate left bidiagonalizing vectors in U 2785* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) 2786* 2787 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), 2788 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2789 IWORK = IE + M 2790* 2791* Perform bidiagonal QR iteration, computing left 2792* singular vectors of L in U and computing right 2793* singular vectors of L in WORK(IU) 2794* (Workspace: need M*M+BDSPAC) 2795* 2796 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), 2797 $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, 2798 $ WORK( IWORK ), INFO ) 2799* 2800* Multiply right singular vectors of L in WORK(IU) by 2801* Q in A, storing result in VT 2802* (Workspace: need M*M) 2803* 2804 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), 2805 $ LDWRKU, A, LDA, ZERO, VT, LDVT ) 2806* 2807 ELSE 2808* 2809* Insufficient workspace for a fast algorithm 2810* 2811 ITAU = 1 2812 IWORK = ITAU + M 2813* 2814* Compute A=L*Q, copying result to VT 2815* (Workspace: need 2*M, prefer M+M*NB) 2816* 2817 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2818 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2819 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 2820* 2821* Generate Q in VT 2822* (Workspace: need 2*M, prefer M+M*NB) 2823* 2824 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), 2825 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2826* 2827* Copy L to U, zeroing out above it 2828* 2829 CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) 2830 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), 2831 $ LDU ) 2832 IE = ITAU 2833 ITAUQ = IE + M 2834 ITAUP = ITAUQ + M 2835 IWORK = ITAUP + M 2836* 2837* Bidiagonalize L in U 2838* (Workspace: need 4*M, prefer 3*M+2*M*NB) 2839* 2840 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), 2841 $ WORK( ITAUQ ), WORK( ITAUP ), 2842 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2843* 2844* Multiply right bidiagonalizing vectors in U by Q 2845* in VT 2846* (Workspace: need 3*M+N, prefer 3*M+N*NB) 2847* 2848 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, 2849 $ WORK( ITAUP ), VT, LDVT, 2850 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2851* 2852* Generate left bidiagonalizing vectors in U 2853* (Workspace: need 4*M, prefer 3*M+M*NB) 2854* 2855 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), 2856 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2857 IWORK = IE + M 2858* 2859* Perform bidiagonal QR iteration, computing left 2860* singular vectors of A in U and computing right 2861* singular vectors of A in VT 2862* (Workspace: need BDSPAC) 2863* 2864 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, 2865 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), 2866 $ INFO ) 2867* 2868 END IF 2869* 2870 END IF 2871* 2872 ELSE IF( WNTVA ) THEN 2873* 2874 IF( WNTUN ) THEN 2875* 2876* Path 7t(N much larger than M, JOBU='N', JOBVT='A') 2877* N right singular vectors to be computed in VT and 2878* no left singular vectors to be computed 2879* 2880 IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN 2881* 2882* Sufficient workspace for a fast algorithm 2883* 2884 IR = 1 2885 IF( LWORK.GE.WRKBL+LDA*M ) THEN 2886* 2887* WORK(IR) is LDA by M 2888* 2889 LDWRKR = LDA 2890 ELSE 2891* 2892* WORK(IR) is M by M 2893* 2894 LDWRKR = M 2895 END IF 2896 ITAU = IR + LDWRKR*M 2897 IWORK = ITAU + M 2898* 2899* Compute A=L*Q, copying result to VT 2900* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 2901* 2902 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2903 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2904 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 2905* 2906* Copy L to WORK(IR), zeroing out above it 2907* 2908 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), 2909 $ LDWRKR ) 2910 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 2911 $ WORK( IR+LDWRKR ), LDWRKR ) 2912* 2913* Generate Q in VT 2914* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) 2915* 2916 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), 2917 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2918 IE = ITAU 2919 ITAUQ = IE + M 2920 ITAUP = ITAUQ + M 2921 IWORK = ITAUP + M 2922* 2923* Bidiagonalize L in WORK(IR) 2924* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) 2925* 2926 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, 2927 $ WORK( IE ), WORK( ITAUQ ), 2928 $ WORK( ITAUP ), WORK( IWORK ), 2929 $ LWORK-IWORK+1, IERR ) 2930* 2931* Generate right bidiagonalizing vectors in WORK(IR) 2932* (Workspace: need M*M+4*M-1, 2933* prefer M*M+3*M+(M-1)*NB) 2934* 2935 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, 2936 $ WORK( ITAUP ), WORK( IWORK ), 2937 $ LWORK-IWORK+1, IERR ) 2938 IWORK = IE + M 2939* 2940* Perform bidiagonal QR iteration, computing right 2941* singular vectors of L in WORK(IR) 2942* (Workspace: need M*M+BDSPAC) 2943* 2944 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), 2945 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, 2946 $ WORK( IWORK ), INFO ) 2947* 2948* Multiply right singular vectors of L in WORK(IR) by 2949* Q in VT, storing result in A 2950* (Workspace: need M*M) 2951* 2952 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), 2953 $ LDWRKR, VT, LDVT, ZERO, A, LDA ) 2954* 2955* Copy right singular vectors of A from A to VT 2956* 2957 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) 2958* 2959 ELSE 2960* 2961* Insufficient workspace for a fast algorithm 2962* 2963 ITAU = 1 2964 IWORK = ITAU + M 2965* 2966* Compute A=L*Q, copying result to VT 2967* (Workspace: need 2*M, prefer M+M*NB) 2968* 2969 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 2970 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2971 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 2972* 2973* Generate Q in VT 2974* (Workspace: need M+N, prefer M+N*NB) 2975* 2976 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), 2977 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2978 IE = ITAU 2979 ITAUQ = IE + M 2980 ITAUP = ITAUQ + M 2981 IWORK = ITAUP + M 2982* 2983* Zero out above L in A 2984* 2985 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), 2986 $ LDA ) 2987* 2988* Bidiagonalize L in A 2989* (Workspace: need 4*M, prefer 3*M+2*M*NB) 2990* 2991 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), 2992 $ WORK( ITAUQ ), WORK( ITAUP ), 2993 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 2994* 2995* Multiply right bidiagonalizing vectors in A by Q 2996* in VT 2997* (Workspace: need 3*M+N, prefer 3*M+N*NB) 2998* 2999 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, 3000 $ WORK( ITAUP ), VT, LDVT, 3001 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3002 IWORK = IE + M 3003* 3004* Perform bidiagonal QR iteration, computing right 3005* singular vectors of A in VT 3006* (Workspace: need BDSPAC) 3007* 3008 CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, 3009 $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), 3010 $ INFO ) 3011* 3012 END IF 3013* 3014 ELSE IF( WNTUO ) THEN 3015* 3016* Path 8t(N much larger than M, JOBU='O', JOBVT='A') 3017* N right singular vectors to be computed in VT and 3018* M left singular vectors to be overwritten on A 3019* 3020 IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN 3021* 3022* Sufficient workspace for a fast algorithm 3023* 3024 IU = 1 3025 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN 3026* 3027* WORK(IU) is LDA by M and WORK(IR) is LDA by M 3028* 3029 LDWRKU = LDA 3030 IR = IU + LDWRKU*M 3031 LDWRKR = LDA 3032 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN 3033* 3034* WORK(IU) is LDA by M and WORK(IR) is M by M 3035* 3036 LDWRKU = LDA 3037 IR = IU + LDWRKU*M 3038 LDWRKR = M 3039 ELSE 3040* 3041* WORK(IU) is M by M and WORK(IR) is M by M 3042* 3043 LDWRKU = M 3044 IR = IU + LDWRKU*M 3045 LDWRKR = M 3046 END IF 3047 ITAU = IR + LDWRKR*M 3048 IWORK = ITAU + M 3049* 3050* Compute A=L*Q, copying result to VT 3051* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) 3052* 3053 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 3054 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3055 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 3056* 3057* Generate Q in VT 3058* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) 3059* 3060 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), 3061 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3062* 3063* Copy L to WORK(IU), zeroing out above it 3064* 3065 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), 3066 $ LDWRKU ) 3067 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 3068 $ WORK( IU+LDWRKU ), LDWRKU ) 3069 IE = ITAU 3070 ITAUQ = IE + M 3071 ITAUP = ITAUQ + M 3072 IWORK = ITAUP + M 3073* 3074* Bidiagonalize L in WORK(IU), copying result to 3075* WORK(IR) 3076* (Workspace: need 2*M*M+4*M, 3077* prefer 2*M*M+3*M+2*M*NB) 3078* 3079 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, 3080 $ WORK( IE ), WORK( ITAUQ ), 3081 $ WORK( ITAUP ), WORK( IWORK ), 3082 $ LWORK-IWORK+1, IERR ) 3083 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, 3084 $ WORK( IR ), LDWRKR ) 3085* 3086* Generate right bidiagonalizing vectors in WORK(IU) 3087* (Workspace: need 2*M*M+4*M-1, 3088* prefer 2*M*M+3*M+(M-1)*NB) 3089* 3090 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, 3091 $ WORK( ITAUP ), WORK( IWORK ), 3092 $ LWORK-IWORK+1, IERR ) 3093* 3094* Generate left bidiagonalizing vectors in WORK(IR) 3095* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) 3096* 3097 CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, 3098 $ WORK( ITAUQ ), WORK( IWORK ), 3099 $ LWORK-IWORK+1, IERR ) 3100 IWORK = IE + M 3101* 3102* Perform bidiagonal QR iteration, computing left 3103* singular vectors of L in WORK(IR) and computing 3104* right singular vectors of L in WORK(IU) 3105* (Workspace: need 2*M*M+BDSPAC) 3106* 3107 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), 3108 $ WORK( IU ), LDWRKU, WORK( IR ), 3109 $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) 3110* 3111* Multiply right singular vectors of L in WORK(IU) by 3112* Q in VT, storing result in A 3113* (Workspace: need M*M) 3114* 3115 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), 3116 $ LDWRKU, VT, LDVT, ZERO, A, LDA ) 3117* 3118* Copy right singular vectors of A from A to VT 3119* 3120 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) 3121* 3122* Copy left singular vectors of A from WORK(IR) to A 3123* 3124 CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, 3125 $ LDA ) 3126* 3127 ELSE 3128* 3129* Insufficient workspace for a fast algorithm 3130* 3131 ITAU = 1 3132 IWORK = ITAU + M 3133* 3134* Compute A=L*Q, copying result to VT 3135* (Workspace: need 2*M, prefer M+M*NB) 3136* 3137 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 3138 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3139 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 3140* 3141* Generate Q in VT 3142* (Workspace: need M+N, prefer M+N*NB) 3143* 3144 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), 3145 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3146 IE = ITAU 3147 ITAUQ = IE + M 3148 ITAUP = ITAUQ + M 3149 IWORK = ITAUP + M 3150* 3151* Zero out above L in A 3152* 3153 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), 3154 $ LDA ) 3155* 3156* Bidiagonalize L in A 3157* (Workspace: need 4*M, prefer 3*M+2*M*NB) 3158* 3159 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), 3160 $ WORK( ITAUQ ), WORK( ITAUP ), 3161 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3162* 3163* Multiply right bidiagonalizing vectors in A by Q 3164* in VT 3165* (Workspace: need 3*M+N, prefer 3*M+N*NB) 3166* 3167 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, 3168 $ WORK( ITAUP ), VT, LDVT, 3169 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3170* 3171* Generate left bidiagonalizing vectors in A 3172* (Workspace: need 4*M, prefer 3*M+M*NB) 3173* 3174 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), 3175 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3176 IWORK = IE + M 3177* 3178* Perform bidiagonal QR iteration, computing left 3179* singular vectors of A in A and computing right 3180* singular vectors of A in VT 3181* (Workspace: need BDSPAC) 3182* 3183 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, 3184 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), 3185 $ INFO ) 3186* 3187 END IF 3188* 3189 ELSE IF( WNTUAS ) THEN 3190* 3191* Path 9t(N much larger than M, JOBU='S' or 'A', 3192* JOBVT='A') 3193* N right singular vectors to be computed in VT and 3194* M left singular vectors to be computed in U 3195* 3196 IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN 3197* 3198* Sufficient workspace for a fast algorithm 3199* 3200 IU = 1 3201 IF( LWORK.GE.WRKBL+LDA*M ) THEN 3202* 3203* WORK(IU) is LDA by M 3204* 3205 LDWRKU = LDA 3206 ELSE 3207* 3208* WORK(IU) is M by M 3209* 3210 LDWRKU = M 3211 END IF 3212 ITAU = IU + LDWRKU*M 3213 IWORK = ITAU + M 3214* 3215* Compute A=L*Q, copying result to VT 3216* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) 3217* 3218 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 3219 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3220 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 3221* 3222* Generate Q in VT 3223* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) 3224* 3225 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), 3226 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3227* 3228* Copy L to WORK(IU), zeroing out above it 3229* 3230 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), 3231 $ LDWRKU ) 3232 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, 3233 $ WORK( IU+LDWRKU ), LDWRKU ) 3234 IE = ITAU 3235 ITAUQ = IE + M 3236 ITAUP = ITAUQ + M 3237 IWORK = ITAUP + M 3238* 3239* Bidiagonalize L in WORK(IU), copying result to U 3240* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) 3241* 3242 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, 3243 $ WORK( IE ), WORK( ITAUQ ), 3244 $ WORK( ITAUP ), WORK( IWORK ), 3245 $ LWORK-IWORK+1, IERR ) 3246 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, 3247 $ LDU ) 3248* 3249* Generate right bidiagonalizing vectors in WORK(IU) 3250* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) 3251* 3252 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, 3253 $ WORK( ITAUP ), WORK( IWORK ), 3254 $ LWORK-IWORK+1, IERR ) 3255* 3256* Generate left bidiagonalizing vectors in U 3257* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) 3258* 3259 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), 3260 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3261 IWORK = IE + M 3262* 3263* Perform bidiagonal QR iteration, computing left 3264* singular vectors of L in U and computing right 3265* singular vectors of L in WORK(IU) 3266* (Workspace: need M*M+BDSPAC) 3267* 3268 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), 3269 $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, 3270 $ WORK( IWORK ), INFO ) 3271* 3272* Multiply right singular vectors of L in WORK(IU) by 3273* Q in VT, storing result in A 3274* (Workspace: need M*M) 3275* 3276 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), 3277 $ LDWRKU, VT, LDVT, ZERO, A, LDA ) 3278* 3279* Copy right singular vectors of A from A to VT 3280* 3281 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) 3282* 3283 ELSE 3284* 3285* Insufficient workspace for a fast algorithm 3286* 3287 ITAU = 1 3288 IWORK = ITAU + M 3289* 3290* Compute A=L*Q, copying result to VT 3291* (Workspace: need 2*M, prefer M+M*NB) 3292* 3293 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), 3294 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3295 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 3296* 3297* Generate Q in VT 3298* (Workspace: need M+N, prefer M+N*NB) 3299* 3300 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), 3301 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3302* 3303* Copy L to U, zeroing out above it 3304* 3305 CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) 3306 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), 3307 $ LDU ) 3308 IE = ITAU 3309 ITAUQ = IE + M 3310 ITAUP = ITAUQ + M 3311 IWORK = ITAUP + M 3312* 3313* Bidiagonalize L in U 3314* (Workspace: need 4*M, prefer 3*M+2*M*NB) 3315* 3316 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), 3317 $ WORK( ITAUQ ), WORK( ITAUP ), 3318 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3319* 3320* Multiply right bidiagonalizing vectors in U by Q 3321* in VT 3322* (Workspace: need 3*M+N, prefer 3*M+N*NB) 3323* 3324 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, 3325 $ WORK( ITAUP ), VT, LDVT, 3326 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3327* 3328* Generate left bidiagonalizing vectors in U 3329* (Workspace: need 4*M, prefer 3*M+M*NB) 3330* 3331 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), 3332 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3333 IWORK = IE + M 3334* 3335* Perform bidiagonal QR iteration, computing left 3336* singular vectors of A in U and computing right 3337* singular vectors of A in VT 3338* (Workspace: need BDSPAC) 3339* 3340 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, 3341 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), 3342 $ INFO ) 3343* 3344 END IF 3345* 3346 END IF 3347* 3348 END IF 3349* 3350 ELSE 3351* 3352* N .LT. MNTHR 3353* 3354* Path 10t(N greater than M, but not much larger) 3355* Reduce to bidiagonal form without LQ decomposition 3356* 3357 IE = 1 3358 ITAUQ = IE + M 3359 ITAUP = ITAUQ + M 3360 IWORK = ITAUP + M 3361* 3362* Bidiagonalize A 3363* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) 3364* 3365 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), 3366 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, 3367 $ IERR ) 3368 IF( WNTUAS ) THEN 3369* 3370* If left singular vectors desired in U, copy result to U 3371* and generate left bidiagonalizing vectors in U 3372* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) 3373* 3374 CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) 3375 CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), 3376 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3377 END IF 3378 IF( WNTVAS ) THEN 3379* 3380* If right singular vectors desired in VT, copy result to 3381* VT and generate right bidiagonalizing vectors in VT 3382* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) 3383* 3384 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) 3385 IF( WNTVA ) 3386 $ NRVT = N 3387 IF( WNTVS ) 3388 $ NRVT = M 3389 CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), 3390 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3391 END IF 3392 IF( WNTUO ) THEN 3393* 3394* If left singular vectors desired in A, generate left 3395* bidiagonalizing vectors in A 3396* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) 3397* 3398 CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), 3399 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3400 END IF 3401 IF( WNTVO ) THEN 3402* 3403* If right singular vectors desired in A, generate right 3404* bidiagonalizing vectors in A 3405* (Workspace: need 4*M, prefer 3*M+M*NB) 3406* 3407 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), 3408 $ WORK( IWORK ), LWORK-IWORK+1, IERR ) 3409 END IF 3410 IWORK = IE + M 3411 IF( WNTUAS .OR. WNTUO ) 3412 $ NRU = M 3413 IF( WNTUN ) 3414 $ NRU = 0 3415 IF( WNTVAS .OR. WNTVO ) 3416 $ NCVT = N 3417 IF( WNTVN ) 3418 $ NCVT = 0 3419 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN 3420* 3421* Perform bidiagonal QR iteration, if desired, computing 3422* left singular vectors in U and computing right singular 3423* vectors in VT 3424* (Workspace: need BDSPAC) 3425* 3426 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, 3427 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) 3428 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN 3429* 3430* Perform bidiagonal QR iteration, if desired, computing 3431* left singular vectors in U and computing right singular 3432* vectors in A 3433* (Workspace: need BDSPAC) 3434* 3435 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, 3436 $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) 3437 ELSE 3438* 3439* Perform bidiagonal QR iteration, if desired, computing 3440* left singular vectors in A and computing right singular 3441* vectors in VT 3442* (Workspace: need BDSPAC) 3443* 3444 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, 3445 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) 3446 END IF 3447* 3448 END IF 3449* 3450 END IF 3451* 3452* If DBDSQR failed to converge, copy unconverged superdiagonals 3453* to WORK( 2:MINMN ) 3454* 3455 IF( INFO.NE.0 ) THEN 3456 IF( IE.GT.2 ) THEN 3457 DO 50 I = 1, MINMN - 1 3458 WORK( I+1 ) = WORK( I+IE-1 ) 3459 50 CONTINUE 3460 END IF 3461 IF( IE.LT.2 ) THEN 3462 DO 60 I = MINMN - 1, 1, -1 3463 WORK( I+1 ) = WORK( I+IE-1 ) 3464 60 CONTINUE 3465 END IF 3466 END IF 3467* 3468* Undo scaling if necessary 3469* 3470 IF( ISCL.EQ.1 ) THEN 3471 IF( ANRM.GT.BIGNUM ) 3472 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, 3473 $ IERR ) 3474 IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) 3475 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), 3476 $ MINMN, IERR ) 3477 IF( ANRM.LT.SMLNUM ) 3478 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, 3479 $ IERR ) 3480 IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) 3481 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), 3482 $ MINMN, IERR ) 3483 END IF 3484* 3485* Return optimal workspace in WORK(1) 3486* 3487 WORK( 1 ) = MAXWRK 3488* 3489 RETURN 3490* 3491* End of DGESVD 3492* 3493 END 3494c $Id$ 3495