1*> \brief \b CSYTRF_AA 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download CSYTRF_AA + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/csytrf_aa.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/csytrf_aa.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/csytrf_aa.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) 22* 23* .. Scalar Arguments .. 24* CHARACTER UPLO 25* INTEGER N, LDA, LWORK, INFO 26* .. 27* .. Array Arguments .. 28* INTEGER IPIV( * ) 29* COMPLEX A( LDA, * ), WORK( * ) 30* .. 31* 32*> \par Purpose: 33* ============= 34*> 35*> \verbatim 36*> 37*> CSYTRF_AA computes the factorization of a complex symmetric matrix A 38*> using the Aasen's algorithm. The form of the factorization is 39*> 40*> A = U**T*T*U or A = L*T*L**T 41*> 42*> where U (or L) is a product of permutation and unit upper (lower) 43*> triangular matrices, and T is a complex symmetric tridiagonal matrix. 44*> 45*> This is the blocked version of the algorithm, calling Level 3 BLAS. 46*> \endverbatim 47* 48* Arguments: 49* ========== 50* 51*> \param[in] UPLO 52*> \verbatim 53*> UPLO is CHARACTER*1 54*> = 'U': Upper triangle of A is stored; 55*> = 'L': Lower triangle of A is stored. 56*> \endverbatim 57*> 58*> \param[in] N 59*> \verbatim 60*> N is INTEGER 61*> The order of the matrix A. N >= 0. 62*> \endverbatim 63*> 64*> \param[in,out] A 65*> \verbatim 66*> A is COMPLEX array, dimension (LDA,N) 67*> On entry, the symmetric matrix A. If UPLO = 'U', the leading 68*> N-by-N upper triangular part of A contains the upper 69*> triangular part of the matrix A, and the strictly lower 70*> triangular part of A is not referenced. If UPLO = 'L', the 71*> leading N-by-N lower triangular part of A contains the lower 72*> triangular part of the matrix A, and the strictly upper 73*> triangular part of A is not referenced. 74*> 75*> On exit, the tridiagonal matrix is stored in the diagonals 76*> and the subdiagonals of A just below (or above) the diagonals, 77*> and L is stored below (or above) the subdiaonals, when UPLO 78*> is 'L' (or 'U'). 79*> \endverbatim 80*> 81*> \param[in] LDA 82*> \verbatim 83*> LDA is INTEGER 84*> The leading dimension of the array A. LDA >= max(1,N). 85*> \endverbatim 86*> 87*> \param[out] IPIV 88*> \verbatim 89*> IPIV is INTEGER array, dimension (N) 90*> On exit, it contains the details of the interchanges, i.e., 91*> the row and column k of A were interchanged with the 92*> row and column IPIV(k). 93*> \endverbatim 94*> 95*> \param[out] WORK 96*> \verbatim 97*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) 98*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 99*> \endverbatim 100*> 101*> \param[in] LWORK 102*> \verbatim 103*> LWORK is INTEGER 104*> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance 105*> LWORK >= N*(1+NB), where NB is the optimal blocksize. 106*> 107*> If LWORK = -1, then a workspace query is assumed; the routine 108*> only calculates the optimal size of the WORK array, returns 109*> this value as the first entry of the WORK array, and no error 110*> message related to LWORK is issued by XERBLA. 111*> \endverbatim 112*> 113*> \param[out] INFO 114*> \verbatim 115*> INFO is INTEGER 116*> = 0: successful exit 117*> < 0: if INFO = -i, the i-th argument had an illegal value. 118*> \endverbatim 119* 120* Authors: 121* ======== 122* 123*> \author Univ. of Tennessee 124*> \author Univ. of California Berkeley 125*> \author Univ. of Colorado Denver 126*> \author NAG Ltd. 127* 128*> \ingroup complexSYcomputational 129* 130* ===================================================================== 131 SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) 132* 133* -- LAPACK computational routine -- 134* -- LAPACK is a software package provided by Univ. of Tennessee, -- 135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 136* 137 IMPLICIT NONE 138* 139* .. Scalar Arguments .. 140 CHARACTER UPLO 141 INTEGER N, LDA, LWORK, INFO 142* .. 143* .. Array Arguments .. 144 INTEGER IPIV( * ) 145 COMPLEX A( LDA, * ), WORK( * ) 146* .. 147* 148* ===================================================================== 149* .. Parameters .. 150 COMPLEX ZERO, ONE 151 PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) 152* 153* .. Local Scalars .. 154 LOGICAL LQUERY, UPPER 155 INTEGER J, LWKOPT 156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB 157 COMPLEX ALPHA 158* .. 159* .. External Functions .. 160 LOGICAL LSAME 161 INTEGER ILAENV 162 EXTERNAL LSAME, ILAENV 163* .. 164* .. External Subroutines .. 165 EXTERNAL CLASYF_AA, CGEMM, CGEMV, CSCAL, CSWAP, CCOPY, 166 $ XERBLA 167* .. 168* .. Intrinsic Functions .. 169 INTRINSIC MAX 170* .. 171* .. Executable Statements .. 172* 173* Determine the block size 174* 175 NB = ILAENV( 1, 'CSYTRF_AA', UPLO, N, -1, -1, -1 ) 176* 177* Test the input parameters. 178* 179 INFO = 0 180 UPPER = LSAME( UPLO, 'U' ) 181 LQUERY = ( LWORK.EQ.-1 ) 182 IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN 183 INFO = -1 184 ELSE IF( N.LT.0 ) THEN 185 INFO = -2 186 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN 187 INFO = -4 188 ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN 189 INFO = -7 190 END IF 191* 192 IF( INFO.EQ.0 ) THEN 193 LWKOPT = (NB+1)*N 194 WORK( 1 ) = LWKOPT 195 END IF 196* 197 IF( INFO.NE.0 ) THEN 198 CALL XERBLA( 'CSYTRF_AA', -INFO ) 199 RETURN 200 ELSE IF( LQUERY ) THEN 201 RETURN 202 END IF 203* 204* Quick return 205* 206 IF ( N.EQ.0 ) THEN 207 RETURN 208 ENDIF 209 IPIV( 1 ) = 1 210 IF ( N.EQ.1 ) THEN 211 RETURN 212 END IF 213* 214* Adjust block size based on the workspace size 215* 216 IF( LWORK.LT.((1+NB)*N) ) THEN 217 NB = ( LWORK-N ) / N 218 END IF 219* 220 IF( UPPER ) THEN 221* 222* ..................................................... 223* Factorize A as U**T*D*U using the upper triangle of A 224* ..................................................... 225* 226* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) 227* 228 CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) 229* 230* J is the main loop index, increasing from 1 to N in steps of 231* JB, where JB is the number of columns factorized by CLASYF; 232* JB is either NB, or N-J+1 for the last block 233* 234 J = 0 235 10 CONTINUE 236 IF( J.GE.N ) 237 $ GO TO 20 238* 239* each step of the main loop 240* J is the last column of the previous panel 241* J1 is the first column of the current panel 242* K1 identifies if the previous column of the panel has been 243* explicitly stored, e.g., K1=1 for the first panel, and 244* K1=0 for the rest 245* 246 J1 = J + 1 247 JB = MIN( N-J1+1, NB ) 248 K1 = MAX(1, J)-J 249* 250* Panel factorization 251* 252 CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, 253 $ A( MAX(1, J), J+1 ), LDA, 254 $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) 255* 256* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot) 257* 258 DO J2 = J+2, MIN(N, J+JB+1) 259 IPIV( J2 ) = IPIV( J2 ) + J 260 IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN 261 CALL CSWAP( J1-K1-2, A( 1, J2 ), 1, 262 $ A( 1, IPIV(J2) ), 1 ) 263 END IF 264 END DO 265 J = J + JB 266* 267* Trailing submatrix update, where 268* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and 269* WORK stores the current block of the auxiriarly matrix H 270* 271 IF( J.LT.N ) THEN 272* 273* If first panel and JB=1 (NB=1), then nothing to do 274* 275 IF( J1.GT.1 .OR. JB.GT.1 ) THEN 276* 277* Merge rank-1 update with BLAS-3 update 278* 279 ALPHA = A( J, J+1 ) 280 A( J, J+1 ) = ONE 281 CALL CCOPY( N-J, A( J-1, J+1 ), LDA, 282 $ WORK( (J+1-J1+1)+JB*N ), 1 ) 283 CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) 284* 285* K1 identifies if the previous column of the panel has been 286* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, 287* while K1=0 and K2=1 for the rest 288* 289 IF( J1.GT.1 ) THEN 290* 291* Not first panel 292* 293 K2 = 1 294 ELSE 295* 296* First panel 297* 298 K2 = 0 299* 300* First update skips the first column 301* 302 JB = JB - 1 303 END IF 304* 305 DO J2 = J+1, N, NB 306 NJ = MIN( NB, N-J2+1 ) 307* 308* Update (J2, J2) diagonal block with CGEMV 309* 310 J3 = J2 311 DO MJ = NJ-1, 1, -1 312 CALL CGEMV( 'No transpose', MJ, JB+1, 313 $ -ONE, WORK( J3-J1+1+K1*N ), N, 314 $ A( J1-K2, J3 ), 1, 315 $ ONE, A( J3, J3 ), LDA ) 316 J3 = J3 + 1 317 END DO 318* 319* Update off-diagonal block of J2-th block row with CGEMM 320* 321 CALL CGEMM( 'Transpose', 'Transpose', 322 $ NJ, N-J3+1, JB+1, 323 $ -ONE, A( J1-K2, J2 ), LDA, 324 $ WORK( J3-J1+1+K1*N ), N, 325 $ ONE, A( J2, J3 ), LDA ) 326 END DO 327* 328* Recover T( J, J+1 ) 329* 330 A( J, J+1 ) = ALPHA 331 END IF 332* 333* WORK(J+1, 1) stores H(J+1, 1) 334* 335 CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) 336 END IF 337 GO TO 10 338 ELSE 339* 340* ..................................................... 341* Factorize A as L*D*L**T using the lower triangle of A 342* ..................................................... 343* 344* copy first column A(1:N, 1) into H(1:N, 1) 345* (stored in WORK(1:N)) 346* 347 CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) 348* 349* J is the main loop index, increasing from 1 to N in steps of 350* JB, where JB is the number of columns factorized by CLASYF; 351* JB is either NB, or N-J+1 for the last block 352* 353 J = 0 354 11 CONTINUE 355 IF( J.GE.N ) 356 $ GO TO 20 357* 358* each step of the main loop 359* J is the last column of the previous panel 360* J1 is the first column of the current panel 361* K1 identifies if the previous column of the panel has been 362* explicitly stored, e.g., K1=1 for the first panel, and 363* K1=0 for the rest 364* 365 J1 = J+1 366 JB = MIN( N-J1+1, NB ) 367 K1 = MAX(1, J)-J 368* 369* Panel factorization 370* 371 CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, 372 $ A( J+1, MAX(1, J) ), LDA, 373 $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) 374* 375* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot) 376* 377 DO J2 = J+2, MIN(N, J+JB+1) 378 IPIV( J2 ) = IPIV( J2 ) + J 379 IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN 380 CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA, 381 $ A( IPIV(J2), 1 ), LDA ) 382 END IF 383 END DO 384 J = J + JB 385* 386* Trailing submatrix update, where 387* A(J2+1, J1-1) stores L(J2+1, J1) and 388* WORK(J2+1, 1) stores H(J2+1, 1) 389* 390 IF( J.LT.N ) THEN 391* 392* if first panel and JB=1 (NB=1), then nothing to do 393* 394 IF( J1.GT.1 .OR. JB.GT.1 ) THEN 395* 396* Merge rank-1 update with BLAS-3 update 397* 398 ALPHA = A( J+1, J ) 399 A( J+1, J ) = ONE 400 CALL CCOPY( N-J, A( J+1, J-1 ), 1, 401 $ WORK( (J+1-J1+1)+JB*N ), 1 ) 402 CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) 403* 404* K1 identifies if the previous column of the panel has been 405* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, 406* while K1=0 and K2=1 for the rest 407* 408 IF( J1.GT.1 ) THEN 409* 410* Not first panel 411* 412 K2 = 1 413 ELSE 414* 415* First panel 416* 417 K2 = 0 418* 419* First update skips the first column 420* 421 JB = JB - 1 422 END IF 423* 424 DO J2 = J+1, N, NB 425 NJ = MIN( NB, N-J2+1 ) 426* 427* Update (J2, J2) diagonal block with CGEMV 428* 429 J3 = J2 430 DO MJ = NJ-1, 1, -1 431 CALL CGEMV( 'No transpose', MJ, JB+1, 432 $ -ONE, WORK( J3-J1+1+K1*N ), N, 433 $ A( J3, J1-K2 ), LDA, 434 $ ONE, A( J3, J3 ), 1 ) 435 J3 = J3 + 1 436 END DO 437* 438* Update off-diagonal block in J2-th block column with CGEMM 439* 440 CALL CGEMM( 'No transpose', 'Transpose', 441 $ N-J3+1, NJ, JB+1, 442 $ -ONE, WORK( J3-J1+1+K1*N ), N, 443 $ A( J2, J1-K2 ), LDA, 444 $ ONE, A( J3, J2 ), LDA ) 445 END DO 446* 447* Recover T( J+1, J ) 448* 449 A( J+1, J ) = ALPHA 450 END IF 451* 452* WORK(J+1, 1) stores H(J+1, 1) 453* 454 CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) 455 END IF 456 GO TO 11 457 END IF 458* 459 20 CONTINUE 460 WORK( 1 ) = LWKOPT 461 RETURN 462* 463* End of CSYTRF_AA 464* 465 END 466