1*> \brief \b DSYTRI_3X 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download DSYTRI_3X + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytri_3x.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytri_3x.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytri_3x.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) 22* 23* .. Scalar Arguments .. 24* CHARACTER UPLO 25* INTEGER INFO, LDA, N, NB 26* .. 27* .. Array Arguments .. 28* INTEGER IPIV( * ) 29* DOUBLE PRECISION A( LDA, * ), E( * ), WORK( N+NB+1, * ) 30* .. 31* 32* 33*> \par Purpose: 34* ============= 35*> 36*> \verbatim 37*> DSYTRI_3X computes the inverse of a real symmetric indefinite 38*> matrix A using the factorization computed by DSYTRF_RK or DSYTRF_BK: 39*> 40*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), 41*> 42*> where U (or L) is unit upper (or lower) triangular matrix, 43*> U**T (or L**T) is the transpose of U (or L), P is a permutation 44*> matrix, P**T is the transpose of P, and D is symmetric and block 45*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. 46*> 47*> This is the blocked version of the algorithm, calling Level 3 BLAS. 48*> \endverbatim 49* 50* Arguments: 51* ========== 52* 53*> \param[in] UPLO 54*> \verbatim 55*> UPLO is CHARACTER*1 56*> Specifies whether the details of the factorization are 57*> stored as an upper or lower triangular matrix. 58*> = 'U': Upper triangle of A is stored; 59*> = 'L': Lower triangle of A is stored. 60*> \endverbatim 61*> 62*> \param[in] N 63*> \verbatim 64*> N is INTEGER 65*> The order of the matrix A. N >= 0. 66*> \endverbatim 67*> 68*> \param[in,out] A 69*> \verbatim 70*> A is DOUBLE PRECISION array, dimension (LDA,N) 71*> On entry, diagonal of the block diagonal matrix D and 72*> factors U or L as computed by DSYTRF_RK and DSYTRF_BK: 73*> a) ONLY diagonal elements of the symmetric block diagonal 74*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); 75*> (superdiagonal (or subdiagonal) elements of D 76*> should be provided on entry in array E), and 77*> b) If UPLO = 'U': factor U in the superdiagonal part of A. 78*> If UPLO = 'L': factor L in the subdiagonal part of A. 79*> 80*> On exit, if INFO = 0, the symmetric inverse of the original 81*> matrix. 82*> If UPLO = 'U': the upper triangular part of the inverse 83*> is formed and the part of A below the diagonal is not 84*> referenced; 85*> If UPLO = 'L': the lower triangular part of the inverse 86*> is formed and the part of A above the diagonal is not 87*> referenced. 88*> \endverbatim 89*> 90*> \param[in] LDA 91*> \verbatim 92*> LDA is INTEGER 93*> The leading dimension of the array A. LDA >= max(1,N). 94*> \endverbatim 95*> 96*> \param[in] E 97*> \verbatim 98*> E is DOUBLE PRECISION array, dimension (N) 99*> On entry, contains the superdiagonal (or subdiagonal) 100*> elements of the symmetric block diagonal matrix D 101*> with 1-by-1 or 2-by-2 diagonal blocks, where 102*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; 103*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. 104*> 105*> NOTE: For 1-by-1 diagonal block D(k), where 106*> 1 <= k <= N, the element E(k) is not referenced in both 107*> UPLO = 'U' or UPLO = 'L' cases. 108*> \endverbatim 109*> 110*> \param[in] IPIV 111*> \verbatim 112*> IPIV is INTEGER array, dimension (N) 113*> Details of the interchanges and the block structure of D 114*> as determined by DSYTRF_RK or DSYTRF_BK. 115*> \endverbatim 116*> 117*> \param[out] WORK 118*> \verbatim 119*> WORK is DOUBLE PRECISION array, dimension (N+NB+1,NB+3). 120*> \endverbatim 121*> 122*> \param[in] NB 123*> \verbatim 124*> NB is INTEGER 125*> Block size. 126*> \endverbatim 127*> 128*> \param[out] INFO 129*> \verbatim 130*> INFO is INTEGER 131*> = 0: successful exit 132*> < 0: if INFO = -i, the i-th argument had an illegal value 133*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its 134*> inverse could not be computed. 135*> \endverbatim 136* 137* Authors: 138* ======== 139* 140*> \author Univ. of Tennessee 141*> \author Univ. of California Berkeley 142*> \author Univ. of Colorado Denver 143*> \author NAG Ltd. 144* 145*> \date June 2017 146* 147*> \ingroup doubleSYcomputational 148* 149*> \par Contributors: 150* ================== 151*> \verbatim 152*> 153*> June 2017, Igor Kozachenko, 154*> Computer Science Division, 155*> University of California, Berkeley 156*> 157*> \endverbatim 158* 159* ===================================================================== 160 SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) 161* 162* -- LAPACK computational routine (version 3.7.1) -- 163* -- LAPACK is a software package provided by Univ. of Tennessee, -- 164* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 165* June 2017 166* 167* .. Scalar Arguments .. 168 CHARACTER UPLO 169 INTEGER INFO, LDA, N, NB 170* .. 171* .. Array Arguments .. 172 INTEGER IPIV( * ) 173 DOUBLE PRECISION A( LDA, * ), E( * ), WORK( N+NB+1, * ) 174* .. 175* 176* ===================================================================== 177* 178* .. Parameters .. 179 DOUBLE PRECISION ONE, ZERO 180 PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) 181* .. 182* .. Local Scalars .. 183 LOGICAL UPPER 184 INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 185 DOUBLE PRECISION AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J, 186 $ U11_I_J, U11_IP1_J 187* .. 188* .. External Functions .. 189 LOGICAL LSAME 190 EXTERNAL LSAME 191* .. 192* .. External Subroutines .. 193 EXTERNAL DGEMM, DSYSWAPR, DTRTRI, DTRMM, XERBLA 194* .. 195* .. Intrinsic Functions .. 196 INTRINSIC ABS, MAX, MOD 197* .. 198* .. Executable Statements .. 199* 200* Test the input parameters. 201* 202 INFO = 0 203 UPPER = LSAME( UPLO, 'U' ) 204 IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN 205 INFO = -1 206 ELSE IF( N.LT.0 ) THEN 207 INFO = -2 208 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN 209 INFO = -4 210 END IF 211* 212* Quick return if possible 213* 214 IF( INFO.NE.0 ) THEN 215 CALL XERBLA( 'DSYTRI_3X', -INFO ) 216 RETURN 217 END IF 218 IF( N.EQ.0 ) 219 $ RETURN 220* 221* Workspace got Non-diag elements of D 222* 223 DO K = 1, N 224 WORK( K, 1 ) = E( K ) 225 END DO 226* 227* Check that the diagonal matrix D is nonsingular. 228* 229 IF( UPPER ) THEN 230* 231* Upper triangular storage: examine D from bottom to top 232* 233 DO INFO = N, 1, -1 234 IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) 235 $ RETURN 236 END DO 237 ELSE 238* 239* Lower triangular storage: examine D from top to bottom. 240* 241 DO INFO = 1, N 242 IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) 243 $ RETURN 244 END DO 245 END IF 246* 247 INFO = 0 248* 249* Splitting Workspace 250* U01 is a block ( N, NB+1 ) 251* The first element of U01 is in WORK( 1, 1 ) 252* U11 is a block ( NB+1, NB+1 ) 253* The first element of U11 is in WORK( N+1, 1 ) 254* 255 U11 = N 256* 257* INVD is a block ( N, 2 ) 258* The first element of INVD is in WORK( 1, INVD ) 259* 260 INVD = NB + 2 261 262 IF( UPPER ) THEN 263* 264* Begin Upper 265* 266* invA = P * inv(U**T) * inv(D) * inv(U) * P**T. 267* 268 CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO ) 269* 270* inv(D) and inv(D) * inv(U) 271* 272 K = 1 273 DO WHILE( K.LE.N ) 274 IF( IPIV( K ).GT.0 ) THEN 275* 1 x 1 diagonal NNB 276 WORK( K, INVD ) = ONE / A( K, K ) 277 WORK( K, INVD+1 ) = ZERO 278 ELSE 279* 2 x 2 diagonal NNB 280 T = WORK( K+1, 1 ) 281 AK = A( K, K ) / T 282 AKP1 = A( K+1, K+1 ) / T 283 AKKP1 = WORK( K+1, 1 ) / T 284 D = T*( AK*AKP1-ONE ) 285 WORK( K, INVD ) = AKP1 / D 286 WORK( K+1, INVD+1 ) = AK / D 287 WORK( K, INVD+1 ) = -AKKP1 / D 288 WORK( K+1, INVD ) = WORK( K, INVD+1 ) 289 K = K + 1 290 END IF 291 K = K + 1 292 END DO 293* 294* inv(U**T) = (inv(U))**T 295* 296* inv(U**T) * inv(D) * inv(U) 297* 298 CUT = N 299 DO WHILE( CUT.GT.0 ) 300 NNB = NB 301 IF( CUT.LE.NNB ) THEN 302 NNB = CUT 303 ELSE 304 ICOUNT = 0 305* count negative elements, 306 DO I = CUT+1-NNB, CUT 307 IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 308 END DO 309* need a even number for a clear cut 310 IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 311 END IF 312 313 CUT = CUT - NNB 314* 315* U01 Block 316* 317 DO I = 1, CUT 318 DO J = 1, NNB 319 WORK( I, J ) = A( I, CUT+J ) 320 END DO 321 END DO 322* 323* U11 Block 324* 325 DO I = 1, NNB 326 WORK( U11+I, I ) = ONE 327 DO J = 1, I-1 328 WORK( U11+I, J ) = ZERO 329 END DO 330 DO J = I+1, NNB 331 WORK( U11+I, J ) = A( CUT+I, CUT+J ) 332 END DO 333 END DO 334* 335* invD * U01 336* 337 I = 1 338 DO WHILE( I.LE.CUT ) 339 IF( IPIV( I ).GT.0 ) THEN 340 DO J = 1, NNB 341 WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) 342 END DO 343 ELSE 344 DO J = 1, NNB 345 U01_I_J = WORK( I, J ) 346 U01_IP1_J = WORK( I+1, J ) 347 WORK( I, J ) = WORK( I, INVD ) * U01_I_J 348 $ + WORK( I, INVD+1 ) * U01_IP1_J 349 WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J 350 $ + WORK( I+1, INVD+1 ) * U01_IP1_J 351 END DO 352 I = I + 1 353 END IF 354 I = I + 1 355 END DO 356* 357* invD1 * U11 358* 359 I = 1 360 DO WHILE ( I.LE.NNB ) 361 IF( IPIV( CUT+I ).GT.0 ) THEN 362 DO J = I, NNB 363 WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) 364 END DO 365 ELSE 366 DO J = I, NNB 367 U11_I_J = WORK(U11+I,J) 368 U11_IP1_J = WORK(U11+I+1,J) 369 WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) 370 $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) 371 WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J 372 $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J 373 END DO 374 I = I + 1 375 END IF 376 I = I + 1 377 END DO 378* 379* U11**T * invD1 * U11 -> U11 380* 381 CALL DTRMM( 'L', 'U', 'T', 'U', NNB, NNB, 382 $ ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), 383 $ N+NB+1 ) 384* 385 DO I = 1, NNB 386 DO J = I, NNB 387 A( CUT+I, CUT+J ) = WORK( U11+I, J ) 388 END DO 389 END DO 390* 391* U01**T * invD * U01 -> A( CUT+I, CUT+J ) 392* 393 CALL DGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ), 394 $ LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 ) 395 396* 397* U11 = U11**T * invD1 * U11 + U01**T * invD * U01 398* 399 DO I = 1, NNB 400 DO J = I, NNB 401 A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) 402 END DO 403 END DO 404* 405* U01 = U00**T * invD0 * U01 406* 407 CALL DTRMM( 'L', UPLO, 'T', 'U', CUT, NNB, 408 $ ONE, A, LDA, WORK, N+NB+1 ) 409 410* 411* Update U01 412* 413 DO I = 1, CUT 414 DO J = 1, NNB 415 A( I, CUT+J ) = WORK( I, J ) 416 END DO 417 END DO 418* 419* Next Block 420* 421 END DO 422* 423* Apply PERMUTATIONS P and P**T: 424* P * inv(U**T) * inv(D) * inv(U) * P**T. 425* Interchange rows and columns I and IPIV(I) in reverse order 426* from the formation order of IPIV vector for Upper case. 427* 428* ( We can use a loop over IPIV with increment 1, 429* since the ABS value of IPIV(I) represents the row (column) 430* index of the interchange with row (column) i in both 1x1 431* and 2x2 pivot cases, i.e. we don't need separate code branches 432* for 1x1 and 2x2 pivot cases ) 433* 434 DO I = 1, N 435 IP = ABS( IPIV( I ) ) 436 IF( IP.NE.I ) THEN 437 IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) 438 IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I ) 439 END IF 440 END DO 441* 442 ELSE 443* 444* Begin Lower 445* 446* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T. 447* 448 CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO ) 449* 450* inv(D) and inv(D) * inv(L) 451* 452 K = N 453 DO WHILE ( K .GE. 1 ) 454 IF( IPIV( K ).GT.0 ) THEN 455* 1 x 1 diagonal NNB 456 WORK( K, INVD ) = ONE / A( K, K ) 457 WORK( K, INVD+1 ) = ZERO 458 ELSE 459* 2 x 2 diagonal NNB 460 T = WORK( K-1, 1 ) 461 AK = A( K-1, K-1 ) / T 462 AKP1 = A( K, K ) / T 463 AKKP1 = WORK( K-1, 1 ) / T 464 D = T*( AK*AKP1-ONE ) 465 WORK( K-1, INVD ) = AKP1 / D 466 WORK( K, INVD ) = AK / D 467 WORK( K, INVD+1 ) = -AKKP1 / D 468 WORK( K-1, INVD+1 ) = WORK( K, INVD+1 ) 469 K = K - 1 470 END IF 471 K = K - 1 472 END DO 473* 474* inv(L**T) = (inv(L))**T 475* 476* inv(L**T) * inv(D) * inv(L) 477* 478 CUT = 0 479 DO WHILE( CUT.LT.N ) 480 NNB = NB 481 IF( (CUT + NNB).GT.N ) THEN 482 NNB = N - CUT 483 ELSE 484 ICOUNT = 0 485* count negative elements, 486 DO I = CUT + 1, CUT+NNB 487 IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 488 END DO 489* need a even number for a clear cut 490 IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 491 END IF 492* 493* L21 Block 494* 495 DO I = 1, N-CUT-NNB 496 DO J = 1, NNB 497 WORK( I, J ) = A( CUT+NNB+I, CUT+J ) 498 END DO 499 END DO 500* 501* L11 Block 502* 503 DO I = 1, NNB 504 WORK( U11+I, I) = ONE 505 DO J = I+1, NNB 506 WORK( U11+I, J ) = ZERO 507 END DO 508 DO J = 1, I-1 509 WORK( U11+I, J ) = A( CUT+I, CUT+J ) 510 END DO 511 END DO 512* 513* invD*L21 514* 515 I = N-CUT-NNB 516 DO WHILE( I.GE.1 ) 517 IF( IPIV( CUT+NNB+I ).GT.0 ) THEN 518 DO J = 1, NNB 519 WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) 520 END DO 521 ELSE 522 DO J = 1, NNB 523 U01_I_J = WORK(I,J) 524 U01_IP1_J = WORK(I-1,J) 525 WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ 526 $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J 527 WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ 528 $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J 529 END DO 530 I = I - 1 531 END IF 532 I = I - 1 533 END DO 534* 535* invD1*L11 536* 537 I = NNB 538 DO WHILE( I.GE.1 ) 539 IF( IPIV( CUT+I ).GT.0 ) THEN 540 DO J = 1, NNB 541 WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) 542 END DO 543 544 ELSE 545 DO J = 1, NNB 546 U11_I_J = WORK( U11+I, J ) 547 U11_IP1_J = WORK( U11+I-1, J ) 548 WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) 549 $ + WORK(CUT+I,INVD+1) * U11_IP1_J 550 WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J 551 $ + WORK(CUT+I-1,INVD) * U11_IP1_J 552 END DO 553 I = I - 1 554 END IF 555 I = I - 1 556 END DO 557* 558* L11**T * invD1 * L11 -> L11 559* 560 CALL DTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE, 561 $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), 562 $ N+NB+1 ) 563 564* 565 DO I = 1, NNB 566 DO J = 1, I 567 A( CUT+I, CUT+J ) = WORK( U11+I, J ) 568 END DO 569 END DO 570* 571 IF( (CUT+NNB).LT.N ) THEN 572* 573* L21**T * invD2*L21 -> A( CUT+I, CUT+J ) 574* 575 CALL DGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE, 576 $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, 577 $ ZERO, WORK( U11+1, 1 ), N+NB+1 ) 578 579* 580* L11 = L11**T * invD1 * L11 + U01**T * invD * U01 581* 582 DO I = 1, NNB 583 DO J = 1, I 584 A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) 585 END DO 586 END DO 587* 588* L01 = L22**T * invD2 * L21 589* 590 CALL DTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE, 591 $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, 592 $ N+NB+1 ) 593* 594* Update L21 595* 596 DO I = 1, N-CUT-NNB 597 DO J = 1, NNB 598 A( CUT+NNB+I, CUT+J ) = WORK( I, J ) 599 END DO 600 END DO 601* 602 ELSE 603* 604* L11 = L11**T * invD1 * L11 605* 606 DO I = 1, NNB 607 DO J = 1, I 608 A( CUT+I, CUT+J ) = WORK( U11+I, J ) 609 END DO 610 END DO 611 END IF 612* 613* Next Block 614* 615 CUT = CUT + NNB 616* 617 END DO 618* 619* Apply PERMUTATIONS P and P**T: 620* P * inv(L**T) * inv(D) * inv(L) * P**T. 621* Interchange rows and columns I and IPIV(I) in reverse order 622* from the formation order of IPIV vector for Lower case. 623* 624* ( We can use a loop over IPIV with increment -1, 625* since the ABS value of IPIV(I) represents the row (column) 626* index of the interchange with row (column) i in both 1x1 627* and 2x2 pivot cases, i.e. we don't need separate code branches 628* for 1x1 and 2x2 pivot cases ) 629* 630 DO I = N, 1, -1 631 IP = ABS( IPIV( I ) ) 632 IF( IP.NE.I ) THEN 633 IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) 634 IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I ) 635 END IF 636 END DO 637* 638 END IF 639* 640 RETURN 641* 642* End of DSYTRI_3X 643* 644 END 645 646