1*> \brief \b SSYTRI_3X 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download SSYTRI_3X + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytri_3x.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytri_3x.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytri_3x.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) 22* 23* .. Scalar Arguments .. 24* CHARACTER UPLO 25* INTEGER INFO, LDA, N, NB 26* .. 27* .. Array Arguments .. 28* INTEGER IPIV( * ) 29* REAL A( LDA, * ), E( * ), WORK( N+NB+1, * ) 30* .. 31* 32* 33*> \par Purpose: 34* ============= 35*> 36*> \verbatim 37*> SSYTRI_3X computes the inverse of a real symmetric indefinite 38*> matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK: 39*> 40*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), 41*> 42*> where U (or L) is unit upper (or lower) triangular matrix, 43*> U**T (or L**T) is the transpose of U (or L), P is a permutation 44*> matrix, P**T is the transpose of P, and D is symmetric and block 45*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. 46*> 47*> This is the blocked version of the algorithm, calling Level 3 BLAS. 48*> \endverbatim 49* 50* Arguments: 51* ========== 52* 53*> \param[in] UPLO 54*> \verbatim 55*> UPLO is CHARACTER*1 56*> Specifies whether the details of the factorization are 57*> stored as an upper or lower triangular matrix. 58*> = 'U': Upper triangle of A is stored; 59*> = 'L': Lower triangle of A is stored. 60*> \endverbatim 61*> 62*> \param[in] N 63*> \verbatim 64*> N is INTEGER 65*> The order of the matrix A. N >= 0. 66*> \endverbatim 67*> 68*> \param[in,out] A 69*> \verbatim 70*> A is REAL array, dimension (LDA,N) 71*> On entry, diagonal of the block diagonal matrix D and 72*> factors U or L as computed by SYTRF_RK and SSYTRF_BK: 73*> a) ONLY diagonal elements of the symmetric block diagonal 74*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); 75*> (superdiagonal (or subdiagonal) elements of D 76*> should be provided on entry in array E), and 77*> b) If UPLO = 'U': factor U in the superdiagonal part of A. 78*> If UPLO = 'L': factor L in the subdiagonal part of A. 79*> 80*> On exit, if INFO = 0, the symmetric inverse of the original 81*> matrix. 82*> If UPLO = 'U': the upper triangular part of the inverse 83*> is formed and the part of A below the diagonal is not 84*> referenced; 85*> If UPLO = 'L': the lower triangular part of the inverse 86*> is formed and the part of A above the diagonal is not 87*> referenced. 88*> \endverbatim 89*> 90*> \param[in] LDA 91*> \verbatim 92*> LDA is INTEGER 93*> The leading dimension of the array A. LDA >= max(1,N). 94*> \endverbatim 95*> 96*> \param[in] E 97*> \verbatim 98*> E is REAL array, dimension (N) 99*> On entry, contains the superdiagonal (or subdiagonal) 100*> elements of the symmetric block diagonal matrix D 101*> with 1-by-1 or 2-by-2 diagonal blocks, where 102*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; 103*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. 104*> 105*> NOTE: For 1-by-1 diagonal block D(k), where 106*> 1 <= k <= N, the element E(k) is not referenced in both 107*> UPLO = 'U' or UPLO = 'L' cases. 108*> \endverbatim 109*> 110*> \param[in] IPIV 111*> \verbatim 112*> IPIV is INTEGER array, dimension (N) 113*> Details of the interchanges and the block structure of D 114*> as determined by SSYTRF_RK or SSYTRF_BK. 115*> \endverbatim 116*> 117*> \param[out] WORK 118*> \verbatim 119*> WORK is REAL array, dimension (N+NB+1,NB+3). 120*> \endverbatim 121*> 122*> \param[in] NB 123*> \verbatim 124*> NB is INTEGER 125*> Block size. 126*> \endverbatim 127*> 128*> \param[out] INFO 129*> \verbatim 130*> INFO is INTEGER 131*> = 0: successful exit 132*> < 0: if INFO = -i, the i-th argument had an illegal value 133*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its 134*> inverse could not be computed. 135*> \endverbatim 136* 137* Authors: 138* ======== 139* 140*> \author Univ. of Tennessee 141*> \author Univ. of California Berkeley 142*> \author Univ. of Colorado Denver 143*> \author NAG Ltd. 144* 145*> \ingroup singleSYcomputational 146* 147*> \par Contributors: 148* ================== 149*> \verbatim 150*> 151*> June 2017, Igor Kozachenko, 152*> Computer Science Division, 153*> University of California, Berkeley 154*> 155*> \endverbatim 156* 157* ===================================================================== 158 SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) 159* 160* -- LAPACK computational routine -- 161* -- LAPACK is a software package provided by Univ. of Tennessee, -- 162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 163* 164* .. Scalar Arguments .. 165 CHARACTER UPLO 166 INTEGER INFO, LDA, N, NB 167* .. 168* .. Array Arguments .. 169 INTEGER IPIV( * ) 170 REAL A( LDA, * ), E( * ), WORK( N+NB+1, * ) 171* .. 172* 173* ===================================================================== 174* 175* .. Parameters .. 176 REAL ONE, ZERO 177 PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) 178* .. 179* .. Local Scalars .. 180 LOGICAL UPPER 181 INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 182 REAL AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J, 183 $ U11_I_J, U11_IP1_J 184* .. 185* .. External Functions .. 186 LOGICAL LSAME 187 EXTERNAL LSAME 188* .. 189* .. External Subroutines .. 190 EXTERNAL SGEMM, SSYSWAPR, STRTRI, STRMM, XERBLA 191* .. 192* .. Intrinsic Functions .. 193 INTRINSIC ABS, MAX, MOD 194* .. 195* .. Executable Statements .. 196* 197* Test the input parameters. 198* 199 INFO = 0 200 UPPER = LSAME( UPLO, 'U' ) 201 IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN 202 INFO = -1 203 ELSE IF( N.LT.0 ) THEN 204 INFO = -2 205 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN 206 INFO = -4 207 END IF 208* 209* Quick return if possible 210* 211 IF( INFO.NE.0 ) THEN 212 CALL XERBLA( 'SSYTRI_3X', -INFO ) 213 RETURN 214 END IF 215 IF( N.EQ.0 ) 216 $ RETURN 217* 218* Workspace got Non-diag elements of D 219* 220 DO K = 1, N 221 WORK( K, 1 ) = E( K ) 222 END DO 223* 224* Check that the diagonal matrix D is nonsingular. 225* 226 IF( UPPER ) THEN 227* 228* Upper triangular storage: examine D from bottom to top 229* 230 DO INFO = N, 1, -1 231 IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) 232 $ RETURN 233 END DO 234 ELSE 235* 236* Lower triangular storage: examine D from top to bottom. 237* 238 DO INFO = 1, N 239 IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) 240 $ RETURN 241 END DO 242 END IF 243* 244 INFO = 0 245* 246* Splitting Workspace 247* U01 is a block ( N, NB+1 ) 248* The first element of U01 is in WORK( 1, 1 ) 249* U11 is a block ( NB+1, NB+1 ) 250* The first element of U11 is in WORK( N+1, 1 ) 251* 252 U11 = N 253* 254* INVD is a block ( N, 2 ) 255* The first element of INVD is in WORK( 1, INVD ) 256* 257 INVD = NB + 2 258 259 IF( UPPER ) THEN 260* 261* Begin Upper 262* 263* invA = P * inv(U**T) * inv(D) * inv(U) * P**T. 264* 265 CALL STRTRI( UPLO, 'U', N, A, LDA, INFO ) 266* 267* inv(D) and inv(D) * inv(U) 268* 269 K = 1 270 DO WHILE( K.LE.N ) 271 IF( IPIV( K ).GT.0 ) THEN 272* 1 x 1 diagonal NNB 273 WORK( K, INVD ) = ONE / A( K, K ) 274 WORK( K, INVD+1 ) = ZERO 275 ELSE 276* 2 x 2 diagonal NNB 277 T = WORK( K+1, 1 ) 278 AK = A( K, K ) / T 279 AKP1 = A( K+1, K+1 ) / T 280 AKKP1 = WORK( K+1, 1 ) / T 281 D = T*( AK*AKP1-ONE ) 282 WORK( K, INVD ) = AKP1 / D 283 WORK( K+1, INVD+1 ) = AK / D 284 WORK( K, INVD+1 ) = -AKKP1 / D 285 WORK( K+1, INVD ) = WORK( K, INVD+1 ) 286 K = K + 1 287 END IF 288 K = K + 1 289 END DO 290* 291* inv(U**T) = (inv(U))**T 292* 293* inv(U**T) * inv(D) * inv(U) 294* 295 CUT = N 296 DO WHILE( CUT.GT.0 ) 297 NNB = NB 298 IF( CUT.LE.NNB ) THEN 299 NNB = CUT 300 ELSE 301 ICOUNT = 0 302* count negative elements, 303 DO I = CUT+1-NNB, CUT 304 IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 305 END DO 306* need a even number for a clear cut 307 IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 308 END IF 309 310 CUT = CUT - NNB 311* 312* U01 Block 313* 314 DO I = 1, CUT 315 DO J = 1, NNB 316 WORK( I, J ) = A( I, CUT+J ) 317 END DO 318 END DO 319* 320* U11 Block 321* 322 DO I = 1, NNB 323 WORK( U11+I, I ) = ONE 324 DO J = 1, I-1 325 WORK( U11+I, J ) = ZERO 326 END DO 327 DO J = I+1, NNB 328 WORK( U11+I, J ) = A( CUT+I, CUT+J ) 329 END DO 330 END DO 331* 332* invD * U01 333* 334 I = 1 335 DO WHILE( I.LE.CUT ) 336 IF( IPIV( I ).GT.0 ) THEN 337 DO J = 1, NNB 338 WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) 339 END DO 340 ELSE 341 DO J = 1, NNB 342 U01_I_J = WORK( I, J ) 343 U01_IP1_J = WORK( I+1, J ) 344 WORK( I, J ) = WORK( I, INVD ) * U01_I_J 345 $ + WORK( I, INVD+1 ) * U01_IP1_J 346 WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J 347 $ + WORK( I+1, INVD+1 ) * U01_IP1_J 348 END DO 349 I = I + 1 350 END IF 351 I = I + 1 352 END DO 353* 354* invD1 * U11 355* 356 I = 1 357 DO WHILE ( I.LE.NNB ) 358 IF( IPIV( CUT+I ).GT.0 ) THEN 359 DO J = I, NNB 360 WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) 361 END DO 362 ELSE 363 DO J = I, NNB 364 U11_I_J = WORK(U11+I,J) 365 U11_IP1_J = WORK(U11+I+1,J) 366 WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) 367 $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) 368 WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J 369 $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J 370 END DO 371 I = I + 1 372 END IF 373 I = I + 1 374 END DO 375* 376* U11**T * invD1 * U11 -> U11 377* 378 CALL STRMM( 'L', 'U', 'T', 'U', NNB, NNB, 379 $ ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), 380 $ N+NB+1 ) 381* 382 DO I = 1, NNB 383 DO J = I, NNB 384 A( CUT+I, CUT+J ) = WORK( U11+I, J ) 385 END DO 386 END DO 387* 388* U01**T * invD * U01 -> A( CUT+I, CUT+J ) 389* 390 CALL SGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ), 391 $ LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 ) 392 393* 394* U11 = U11**T * invD1 * U11 + U01**T * invD * U01 395* 396 DO I = 1, NNB 397 DO J = I, NNB 398 A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) 399 END DO 400 END DO 401* 402* U01 = U00**T * invD0 * U01 403* 404 CALL STRMM( 'L', UPLO, 'T', 'U', CUT, NNB, 405 $ ONE, A, LDA, WORK, N+NB+1 ) 406 407* 408* Update U01 409* 410 DO I = 1, CUT 411 DO J = 1, NNB 412 A( I, CUT+J ) = WORK( I, J ) 413 END DO 414 END DO 415* 416* Next Block 417* 418 END DO 419* 420* Apply PERMUTATIONS P and P**T: 421* P * inv(U**T) * inv(D) * inv(U) * P**T. 422* Interchange rows and columns I and IPIV(I) in reverse order 423* from the formation order of IPIV vector for Upper case. 424* 425* ( We can use a loop over IPIV with increment 1, 426* since the ABS value of IPIV(I) represents the row (column) 427* index of the interchange with row (column) i in both 1x1 428* and 2x2 pivot cases, i.e. we don't need separate code branches 429* for 1x1 and 2x2 pivot cases ) 430* 431 DO I = 1, N 432 IP = ABS( IPIV( I ) ) 433 IF( IP.NE.I ) THEN 434 IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) 435 IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) 436 END IF 437 END DO 438* 439 ELSE 440* 441* Begin Lower 442* 443* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T. 444* 445 CALL STRTRI( UPLO, 'U', N, A, LDA, INFO ) 446* 447* inv(D) and inv(D) * inv(L) 448* 449 K = N 450 DO WHILE ( K .GE. 1 ) 451 IF( IPIV( K ).GT.0 ) THEN 452* 1 x 1 diagonal NNB 453 WORK( K, INVD ) = ONE / A( K, K ) 454 WORK( K, INVD+1 ) = ZERO 455 ELSE 456* 2 x 2 diagonal NNB 457 T = WORK( K-1, 1 ) 458 AK = A( K-1, K-1 ) / T 459 AKP1 = A( K, K ) / T 460 AKKP1 = WORK( K-1, 1 ) / T 461 D = T*( AK*AKP1-ONE ) 462 WORK( K-1, INVD ) = AKP1 / D 463 WORK( K, INVD ) = AK / D 464 WORK( K, INVD+1 ) = -AKKP1 / D 465 WORK( K-1, INVD+1 ) = WORK( K, INVD+1 ) 466 K = K - 1 467 END IF 468 K = K - 1 469 END DO 470* 471* inv(L**T) = (inv(L))**T 472* 473* inv(L**T) * inv(D) * inv(L) 474* 475 CUT = 0 476 DO WHILE( CUT.LT.N ) 477 NNB = NB 478 IF( (CUT + NNB).GT.N ) THEN 479 NNB = N - CUT 480 ELSE 481 ICOUNT = 0 482* count negative elements, 483 DO I = CUT + 1, CUT+NNB 484 IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 485 END DO 486* need a even number for a clear cut 487 IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 488 END IF 489* 490* L21 Block 491* 492 DO I = 1, N-CUT-NNB 493 DO J = 1, NNB 494 WORK( I, J ) = A( CUT+NNB+I, CUT+J ) 495 END DO 496 END DO 497* 498* L11 Block 499* 500 DO I = 1, NNB 501 WORK( U11+I, I) = ONE 502 DO J = I+1, NNB 503 WORK( U11+I, J ) = ZERO 504 END DO 505 DO J = 1, I-1 506 WORK( U11+I, J ) = A( CUT+I, CUT+J ) 507 END DO 508 END DO 509* 510* invD*L21 511* 512 I = N-CUT-NNB 513 DO WHILE( I.GE.1 ) 514 IF( IPIV( CUT+NNB+I ).GT.0 ) THEN 515 DO J = 1, NNB 516 WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) 517 END DO 518 ELSE 519 DO J = 1, NNB 520 U01_I_J = WORK(I,J) 521 U01_IP1_J = WORK(I-1,J) 522 WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ 523 $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J 524 WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ 525 $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J 526 END DO 527 I = I - 1 528 END IF 529 I = I - 1 530 END DO 531* 532* invD1*L11 533* 534 I = NNB 535 DO WHILE( I.GE.1 ) 536 IF( IPIV( CUT+I ).GT.0 ) THEN 537 DO J = 1, NNB 538 WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) 539 END DO 540 541 ELSE 542 DO J = 1, NNB 543 U11_I_J = WORK( U11+I, J ) 544 U11_IP1_J = WORK( U11+I-1, J ) 545 WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) 546 $ + WORK(CUT+I,INVD+1) * U11_IP1_J 547 WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J 548 $ + WORK(CUT+I-1,INVD) * U11_IP1_J 549 END DO 550 I = I - 1 551 END IF 552 I = I - 1 553 END DO 554* 555* L11**T * invD1 * L11 -> L11 556* 557 CALL STRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE, 558 $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), 559 $ N+NB+1 ) 560 561* 562 DO I = 1, NNB 563 DO J = 1, I 564 A( CUT+I, CUT+J ) = WORK( U11+I, J ) 565 END DO 566 END DO 567* 568 IF( (CUT+NNB).LT.N ) THEN 569* 570* L21**T * invD2*L21 -> A( CUT+I, CUT+J ) 571* 572 CALL SGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE, 573 $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, 574 $ ZERO, WORK( U11+1, 1 ), N+NB+1 ) 575 576* 577* L11 = L11**T * invD1 * L11 + U01**T * invD * U01 578* 579 DO I = 1, NNB 580 DO J = 1, I 581 A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) 582 END DO 583 END DO 584* 585* L01 = L22**T * invD2 * L21 586* 587 CALL STRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE, 588 $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, 589 $ N+NB+1 ) 590* 591* Update L21 592* 593 DO I = 1, N-CUT-NNB 594 DO J = 1, NNB 595 A( CUT+NNB+I, CUT+J ) = WORK( I, J ) 596 END DO 597 END DO 598* 599 ELSE 600* 601* L11 = L11**T * invD1 * L11 602* 603 DO I = 1, NNB 604 DO J = 1, I 605 A( CUT+I, CUT+J ) = WORK( U11+I, J ) 606 END DO 607 END DO 608 END IF 609* 610* Next Block 611* 612 CUT = CUT + NNB 613* 614 END DO 615* 616* Apply PERMUTATIONS P and P**T: 617* P * inv(L**T) * inv(D) * inv(L) * P**T. 618* Interchange rows and columns I and IPIV(I) in reverse order 619* from the formation order of IPIV vector for Lower case. 620* 621* ( We can use a loop over IPIV with increment -1, 622* since the ABS value of IPIV(I) represents the row (column) 623* index of the interchange with row (column) i in both 1x1 624* and 2x2 pivot cases, i.e. we don't need separate code branches 625* for 1x1 and 2x2 pivot cases ) 626* 627 DO I = N, 1, -1 628 IP = ABS( IPIV( I ) ) 629 IF( IP.NE.I ) THEN 630 IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) 631 IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) 632 END IF 633 END DO 634* 635 END IF 636* 637 RETURN 638* 639* End of SSYTRI_3X 640* 641 END 642 643