1*> \brief \b DLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc. 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download DLASD2 + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd2.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd2.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd2.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, 22* LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, 23* IDXC, IDXQ, COLTYP, INFO ) 24* 25* .. Scalar Arguments .. 26* INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE 27* DOUBLE PRECISION ALPHA, BETA 28* .. 29* .. Array Arguments .. 30* INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), 31* $ IDXQ( * ) 32* DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ), 33* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), 34* $ Z( * ) 35* .. 36* 37* 38*> \par Purpose: 39* ============= 40*> 41*> \verbatim 42*> 43*> DLASD2 merges the two sets of singular values together into a single 44*> sorted set. Then it tries to deflate the size of the problem. 45*> There are two ways in which deflation can occur: when two or more 46*> singular values are close together or if there is a tiny entry in the 47*> Z vector. For each such occurrence the order of the related secular 48*> equation problem is reduced by one. 49*> 50*> DLASD2 is called from DLASD1. 51*> \endverbatim 52* 53* Arguments: 54* ========== 55* 56*> \param[in] NL 57*> \verbatim 58*> NL is INTEGER 59*> The row dimension of the upper block. NL >= 1. 60*> \endverbatim 61*> 62*> \param[in] NR 63*> \verbatim 64*> NR is INTEGER 65*> The row dimension of the lower block. NR >= 1. 66*> \endverbatim 67*> 68*> \param[in] SQRE 69*> \verbatim 70*> SQRE is INTEGER 71*> = 0: the lower block is an NR-by-NR square matrix. 72*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. 73*> 74*> The bidiagonal matrix has N = NL + NR + 1 rows and 75*> M = N + SQRE >= N columns. 76*> \endverbatim 77*> 78*> \param[out] K 79*> \verbatim 80*> K is INTEGER 81*> Contains the dimension of the non-deflated matrix, 82*> This is the order of the related secular equation. 1 <= K <=N. 83*> \endverbatim 84*> 85*> \param[in,out] D 86*> \verbatim 87*> D is DOUBLE PRECISION array, dimension(N) 88*> On entry D contains the singular values of the two submatrices 89*> to be combined. On exit D contains the trailing (N-K) updated 90*> singular values (those which were deflated) sorted into 91*> increasing order. 92*> \endverbatim 93*> 94*> \param[out] Z 95*> \verbatim 96*> Z is DOUBLE PRECISION array, dimension(N) 97*> On exit Z contains the updating row vector in the secular 98*> equation. 99*> \endverbatim 100*> 101*> \param[in] ALPHA 102*> \verbatim 103*> ALPHA is DOUBLE PRECISION 104*> Contains the diagonal element associated with the added row. 105*> \endverbatim 106*> 107*> \param[in] BETA 108*> \verbatim 109*> BETA is DOUBLE PRECISION 110*> Contains the off-diagonal element associated with the added 111*> row. 112*> \endverbatim 113*> 114*> \param[in,out] U 115*> \verbatim 116*> U is DOUBLE PRECISION array, dimension(LDU,N) 117*> On entry U contains the left singular vectors of two 118*> submatrices in the two square blocks with corners at (1,1), 119*> (NL, NL), and (NL+2, NL+2), (N,N). 120*> On exit U contains the trailing (N-K) updated left singular 121*> vectors (those which were deflated) in its last N-K columns. 122*> \endverbatim 123*> 124*> \param[in] LDU 125*> \verbatim 126*> LDU is INTEGER 127*> The leading dimension of the array U. LDU >= N. 128*> \endverbatim 129*> 130*> \param[in,out] VT 131*> \verbatim 132*> VT is DOUBLE PRECISION array, dimension(LDVT,M) 133*> On entry VT**T contains the right singular vectors of two 134*> submatrices in the two square blocks with corners at (1,1), 135*> (NL+1, NL+1), and (NL+2, NL+2), (M,M). 136*> On exit VT**T contains the trailing (N-K) updated right singular 137*> vectors (those which were deflated) in its last N-K columns. 138*> In case SQRE =1, the last row of VT spans the right null 139*> space. 140*> \endverbatim 141*> 142*> \param[in] LDVT 143*> \verbatim 144*> LDVT is INTEGER 145*> The leading dimension of the array VT. LDVT >= M. 146*> \endverbatim 147*> 148*> \param[out] DSIGMA 149*> \verbatim 150*> DSIGMA is DOUBLE PRECISION array, dimension (N) 151*> Contains a copy of the diagonal elements (K-1 singular values 152*> and one zero) in the secular equation. 153*> \endverbatim 154*> 155*> \param[out] U2 156*> \verbatim 157*> U2 is DOUBLE PRECISION array, dimension(LDU2,N) 158*> Contains a copy of the first K-1 left singular vectors which 159*> will be used by DLASD3 in a matrix multiply (DGEMM) to solve 160*> for the new left singular vectors. U2 is arranged into four 161*> blocks. The first block contains a column with 1 at NL+1 and 162*> zero everywhere else; the second block contains non-zero 163*> entries only at and above NL; the third contains non-zero 164*> entries only below NL+1; and the fourth is dense. 165*> \endverbatim 166*> 167*> \param[in] LDU2 168*> \verbatim 169*> LDU2 is INTEGER 170*> The leading dimension of the array U2. LDU2 >= N. 171*> \endverbatim 172*> 173*> \param[out] VT2 174*> \verbatim 175*> VT2 is DOUBLE PRECISION array, dimension(LDVT2,N) 176*> VT2**T contains a copy of the first K right singular vectors 177*> which will be used by DLASD3 in a matrix multiply (DGEMM) to 178*> solve for the new right singular vectors. VT2 is arranged into 179*> three blocks. The first block contains a row that corresponds 180*> to the special 0 diagonal element in SIGMA; the second block 181*> contains non-zeros only at and before NL +1; the third block 182*> contains non-zeros only at and after NL +2. 183*> \endverbatim 184*> 185*> \param[in] LDVT2 186*> \verbatim 187*> LDVT2 is INTEGER 188*> The leading dimension of the array VT2. LDVT2 >= M. 189*> \endverbatim 190*> 191*> \param[out] IDXP 192*> \verbatim 193*> IDXP is INTEGER array, dimension(N) 194*> This will contain the permutation used to place deflated 195*> values of D at the end of the array. On output IDXP(2:K) 196*> points to the nondeflated D-values and IDXP(K+1:N) 197*> points to the deflated singular values. 198*> \endverbatim 199*> 200*> \param[out] IDX 201*> \verbatim 202*> IDX is INTEGER array, dimension(N) 203*> This will contain the permutation used to sort the contents of 204*> D into ascending order. 205*> \endverbatim 206*> 207*> \param[out] IDXC 208*> \verbatim 209*> IDXC is INTEGER array, dimension(N) 210*> This will contain the permutation used to arrange the columns 211*> of the deflated U matrix into three groups: the first group 212*> contains non-zero entries only at and above NL, the second 213*> contains non-zero entries only below NL+2, and the third is 214*> dense. 215*> \endverbatim 216*> 217*> \param[in,out] IDXQ 218*> \verbatim 219*> IDXQ is INTEGER array, dimension(N) 220*> This contains the permutation which separately sorts the two 221*> sub-problems in D into ascending order. Note that entries in 222*> the first hlaf of this permutation must first be moved one 223*> position backward; and entries in the second half 224*> must first have NL+1 added to their values. 225*> \endverbatim 226*> 227*> \param[out] COLTYP 228*> \verbatim 229*> COLTYP is INTEGER array, dimension(N) 230*> As workspace, this will contain a label which will indicate 231*> which of the following types a column in the U2 matrix or a 232*> row in the VT2 matrix is: 233*> 1 : non-zero in the upper half only 234*> 2 : non-zero in the lower half only 235*> 3 : dense 236*> 4 : deflated 237*> 238*> On exit, it is an array of dimension 4, with COLTYP(I) being 239*> the dimension of the I-th type columns. 240*> \endverbatim 241*> 242*> \param[out] INFO 243*> \verbatim 244*> INFO is INTEGER 245*> = 0: successful exit. 246*> < 0: if INFO = -i, the i-th argument had an illegal value. 247*> \endverbatim 248* 249* Authors: 250* ======== 251* 252*> \author Univ. of Tennessee 253*> \author Univ. of California Berkeley 254*> \author Univ. of Colorado Denver 255*> \author NAG Ltd. 256* 257*> \ingroup OTHERauxiliary 258* 259*> \par Contributors: 260* ================== 261*> 262*> Ming Gu and Huan Ren, Computer Science Division, University of 263*> California at Berkeley, USA 264*> 265* ===================================================================== 266 SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, 267 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, 268 $ IDXC, IDXQ, COLTYP, INFO ) 269* 270* -- LAPACK auxiliary routine -- 271* -- LAPACK is a software package provided by Univ. of Tennessee, -- 272* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 273* 274* .. Scalar Arguments .. 275 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE 276 DOUBLE PRECISION ALPHA, BETA 277* .. 278* .. Array Arguments .. 279 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), 280 $ IDXQ( * ) 281 DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ), 282 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), 283 $ Z( * ) 284* .. 285* 286* ===================================================================== 287* 288* .. Parameters .. 289 DOUBLE PRECISION ZERO, ONE, TWO, EIGHT 290 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, 291 $ EIGHT = 8.0D+0 ) 292* .. 293* .. Local Arrays .. 294 INTEGER CTOT( 4 ), PSM( 4 ) 295* .. 296* .. Local Scalars .. 297 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, 298 $ N, NLP1, NLP2 299 DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1 300* .. 301* .. External Functions .. 302 DOUBLE PRECISION DLAMCH, DLAPY2 303 EXTERNAL DLAMCH, DLAPY2 304* .. 305* .. External Subroutines .. 306 EXTERNAL DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA 307* .. 308* .. Intrinsic Functions .. 309 INTRINSIC ABS, MAX 310* .. 311* .. Executable Statements .. 312* 313* Test the input parameters. 314* 315 INFO = 0 316* 317 IF( NL.LT.1 ) THEN 318 INFO = -1 319 ELSE IF( NR.LT.1 ) THEN 320 INFO = -2 321 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN 322 INFO = -3 323 END IF 324* 325 N = NL + NR + 1 326 M = N + SQRE 327* 328 IF( LDU.LT.N ) THEN 329 INFO = -10 330 ELSE IF( LDVT.LT.M ) THEN 331 INFO = -12 332 ELSE IF( LDU2.LT.N ) THEN 333 INFO = -15 334 ELSE IF( LDVT2.LT.M ) THEN 335 INFO = -17 336 END IF 337 IF( INFO.NE.0 ) THEN 338 CALL XERBLA( 'DLASD2', -INFO ) 339 RETURN 340 END IF 341* 342 NLP1 = NL + 1 343 NLP2 = NL + 2 344* 345* Generate the first part of the vector Z; and move the singular 346* values in the first part of D one position backward. 347* 348 Z1 = ALPHA*VT( NLP1, NLP1 ) 349 Z( 1 ) = Z1 350 DO 10 I = NL, 1, -1 351 Z( I+1 ) = ALPHA*VT( I, NLP1 ) 352 D( I+1 ) = D( I ) 353 IDXQ( I+1 ) = IDXQ( I ) + 1 354 10 CONTINUE 355* 356* Generate the second part of the vector Z. 357* 358 DO 20 I = NLP2, M 359 Z( I ) = BETA*VT( I, NLP2 ) 360 20 CONTINUE 361* 362* Initialize some reference arrays. 363* 364 DO 30 I = 2, NLP1 365 COLTYP( I ) = 1 366 30 CONTINUE 367 DO 40 I = NLP2, N 368 COLTYP( I ) = 2 369 40 CONTINUE 370* 371* Sort the singular values into increasing order 372* 373 DO 50 I = NLP2, N 374 IDXQ( I ) = IDXQ( I ) + NLP1 375 50 CONTINUE 376* 377* DSIGMA, IDXC, IDXC, and the first column of U2 378* are used as storage space. 379* 380 DO 60 I = 2, N 381 DSIGMA( I ) = D( IDXQ( I ) ) 382 U2( I, 1 ) = Z( IDXQ( I ) ) 383 IDXC( I ) = COLTYP( IDXQ( I ) ) 384 60 CONTINUE 385* 386 CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) 387* 388 DO 70 I = 2, N 389 IDXI = 1 + IDX( I ) 390 D( I ) = DSIGMA( IDXI ) 391 Z( I ) = U2( IDXI, 1 ) 392 COLTYP( I ) = IDXC( IDXI ) 393 70 CONTINUE 394* 395* Calculate the allowable deflation tolerance 396* 397 EPS = DLAMCH( 'Epsilon' ) 398 TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) 399 TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) 400* 401* There are 2 kinds of deflation -- first a value in the z-vector 402* is small, second two (or more) singular values are very close 403* together (their difference is small). 404* 405* If the value in the z-vector is small, we simply permute the 406* array so that the corresponding singular value is moved to the 407* end. 408* 409* If two values in the D-vector are close, we perform a two-sided 410* rotation designed to make one of the corresponding z-vector 411* entries zero, and then permute the array so that the deflated 412* singular value is moved to the end. 413* 414* If there are multiple singular values then the problem deflates. 415* Here the number of equal singular values are found. As each equal 416* singular value is found, an elementary reflector is computed to 417* rotate the corresponding singular subspace so that the 418* corresponding components of Z are zero in this new basis. 419* 420 K = 1 421 K2 = N + 1 422 DO 80 J = 2, N 423 IF( ABS( Z( J ) ).LE.TOL ) THEN 424* 425* Deflate due to small z component. 426* 427 K2 = K2 - 1 428 IDXP( K2 ) = J 429 COLTYP( J ) = 4 430 IF( J.EQ.N ) 431 $ GO TO 120 432 ELSE 433 JPREV = J 434 GO TO 90 435 END IF 436 80 CONTINUE 437 90 CONTINUE 438 J = JPREV 439 100 CONTINUE 440 J = J + 1 441 IF( J.GT.N ) 442 $ GO TO 110 443 IF( ABS( Z( J ) ).LE.TOL ) THEN 444* 445* Deflate due to small z component. 446* 447 K2 = K2 - 1 448 IDXP( K2 ) = J 449 COLTYP( J ) = 4 450 ELSE 451* 452* Check if singular values are close enough to allow deflation. 453* 454 IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN 455* 456* Deflation is possible. 457* 458 S = Z( JPREV ) 459 C = Z( J ) 460* 461* Find sqrt(a**2+b**2) without overflow or 462* destructive underflow. 463* 464 TAU = DLAPY2( C, S ) 465 C = C / TAU 466 S = -S / TAU 467 Z( J ) = TAU 468 Z( JPREV ) = ZERO 469* 470* Apply back the Givens rotation to the left and right 471* singular vector matrices. 472* 473 IDXJP = IDXQ( IDX( JPREV )+1 ) 474 IDXJ = IDXQ( IDX( J )+1 ) 475 IF( IDXJP.LE.NLP1 ) THEN 476 IDXJP = IDXJP - 1 477 END IF 478 IF( IDXJ.LE.NLP1 ) THEN 479 IDXJ = IDXJ - 1 480 END IF 481 CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) 482 CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, 483 $ S ) 484 IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN 485 COLTYP( J ) = 3 486 END IF 487 COLTYP( JPREV ) = 4 488 K2 = K2 - 1 489 IDXP( K2 ) = JPREV 490 JPREV = J 491 ELSE 492 K = K + 1 493 U2( K, 1 ) = Z( JPREV ) 494 DSIGMA( K ) = D( JPREV ) 495 IDXP( K ) = JPREV 496 JPREV = J 497 END IF 498 END IF 499 GO TO 100 500 110 CONTINUE 501* 502* Record the last singular value. 503* 504 K = K + 1 505 U2( K, 1 ) = Z( JPREV ) 506 DSIGMA( K ) = D( JPREV ) 507 IDXP( K ) = JPREV 508* 509 120 CONTINUE 510* 511* Count up the total number of the various types of columns, then 512* form a permutation which positions the four column types into 513* four groups of uniform structure (although one or more of these 514* groups may be empty). 515* 516 DO 130 J = 1, 4 517 CTOT( J ) = 0 518 130 CONTINUE 519 DO 140 J = 2, N 520 CT = COLTYP( J ) 521 CTOT( CT ) = CTOT( CT ) + 1 522 140 CONTINUE 523* 524* PSM(*) = Position in SubMatrix (of types 1 through 4) 525* 526 PSM( 1 ) = 2 527 PSM( 2 ) = 2 + CTOT( 1 ) 528 PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) 529 PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) 530* 531* Fill out the IDXC array so that the permutation which it induces 532* will place all type-1 columns first, all type-2 columns next, 533* then all type-3's, and finally all type-4's, starting from the 534* second column. This applies similarly to the rows of VT. 535* 536 DO 150 J = 2, N 537 JP = IDXP( J ) 538 CT = COLTYP( JP ) 539 IDXC( PSM( CT ) ) = J 540 PSM( CT ) = PSM( CT ) + 1 541 150 CONTINUE 542* 543* Sort the singular values and corresponding singular vectors into 544* DSIGMA, U2, and VT2 respectively. The singular values/vectors 545* which were not deflated go into the first K slots of DSIGMA, U2, 546* and VT2 respectively, while those which were deflated go into the 547* last N - K slots, except that the first column/row will be treated 548* separately. 549* 550 DO 160 J = 2, N 551 JP = IDXP( J ) 552 DSIGMA( J ) = D( JP ) 553 IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) 554 IF( IDXJ.LE.NLP1 ) THEN 555 IDXJ = IDXJ - 1 556 END IF 557 CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) 558 CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) 559 160 CONTINUE 560* 561* Determine DSIGMA(1), DSIGMA(2) and Z(1) 562* 563 DSIGMA( 1 ) = ZERO 564 HLFTOL = TOL / TWO 565 IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) 566 $ DSIGMA( 2 ) = HLFTOL 567 IF( M.GT.N ) THEN 568 Z( 1 ) = DLAPY2( Z1, Z( M ) ) 569 IF( Z( 1 ).LE.TOL ) THEN 570 C = ONE 571 S = ZERO 572 Z( 1 ) = TOL 573 ELSE 574 C = Z1 / Z( 1 ) 575 S = Z( M ) / Z( 1 ) 576 END IF 577 ELSE 578 IF( ABS( Z1 ).LE.TOL ) THEN 579 Z( 1 ) = TOL 580 ELSE 581 Z( 1 ) = Z1 582 END IF 583 END IF 584* 585* Move the rest of the updating row to Z. 586* 587 CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) 588* 589* Determine the first column of U2, the first row of VT2 and the 590* last row of VT. 591* 592 CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) 593 U2( NLP1, 1 ) = ONE 594 IF( M.GT.N ) THEN 595 DO 170 I = 1, NLP1 596 VT( M, I ) = -S*VT( NLP1, I ) 597 VT2( 1, I ) = C*VT( NLP1, I ) 598 170 CONTINUE 599 DO 180 I = NLP2, M 600 VT2( 1, I ) = S*VT( M, I ) 601 VT( M, I ) = C*VT( M, I ) 602 180 CONTINUE 603 ELSE 604 CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) 605 END IF 606 IF( M.GT.N ) THEN 607 CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) 608 END IF 609* 610* The deflated singular values and their corresponding vectors go 611* into the back of D, U, and V respectively. 612* 613 IF( N.GT.K ) THEN 614 CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) 615 CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), 616 $ LDU ) 617 CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), 618 $ LDVT ) 619 END IF 620* 621* Copy CTOT into COLTYP for referencing in DLASD3. 622* 623 DO 190 J = 1, 4 624 COLTYP( J ) = CTOT( J ) 625 190 CONTINUE 626* 627 RETURN 628* 629* End of DLASD2 630* 631 END 632