1*> \brief \b ZHETRD_HE2HB 2* 3* @precisions fortran z -> s d c 4* 5* =========== DOCUMENTATION =========== 6* 7* Online html documentation available at 8* http://www.netlib.org/lapack/explore-html/ 9* 10*> \htmlonly 11*> Download ZHETRD_HE2HB + dependencies 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_he2hb.f"> 13*> [TGZ]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_he2hb.f"> 15*> [ZIP]</a> 16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_he2hb.f"> 17*> [TXT]</a> 18*> \endhtmlonly 19* 20* Definition: 21* =========== 22* 23* SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 24* WORK, LWORK, INFO ) 25* 26* IMPLICIT NONE 27* 28* .. Scalar Arguments .. 29* CHARACTER UPLO 30* INTEGER INFO, LDA, LDAB, LWORK, N, KD 31* .. 32* .. Array Arguments .. 33* COMPLEX*16 A( LDA, * ), AB( LDAB, * ), 34* TAU( * ), WORK( * ) 35* .. 36* 37* 38*> \par Purpose: 39* ============= 40*> 41*> \verbatim 42*> 43*> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian 44*> band-diagonal form AB by a unitary similarity transformation: 45*> Q**H * A * Q = AB. 46*> \endverbatim 47* 48* Arguments: 49* ========== 50* 51*> \param[in] UPLO 52*> \verbatim 53*> UPLO is CHARACTER*1 54*> = 'U': Upper triangle of A is stored; 55*> = 'L': Lower triangle of A is stored. 56*> \endverbatim 57*> 58*> \param[in] N 59*> \verbatim 60*> N is INTEGER 61*> The order of the matrix A. N >= 0. 62*> \endverbatim 63*> 64*> \param[in] KD 65*> \verbatim 66*> KD is INTEGER 67*> The number of superdiagonals of the reduced matrix if UPLO = 'U', 68*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. 69*> The reduced matrix is stored in the array AB. 70*> \endverbatim 71*> 72*> \param[in,out] A 73*> \verbatim 74*> A is COMPLEX*16 array, dimension (LDA,N) 75*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading 76*> N-by-N upper triangular part of A contains the upper 77*> triangular part of the matrix A, and the strictly lower 78*> triangular part of A is not referenced. If UPLO = 'L', the 79*> leading N-by-N lower triangular part of A contains the lower 80*> triangular part of the matrix A, and the strictly upper 81*> triangular part of A is not referenced. 82*> On exit, if UPLO = 'U', the diagonal and first superdiagonal 83*> of A are overwritten by the corresponding elements of the 84*> tridiagonal matrix T, and the elements above the first 85*> superdiagonal, with the array TAU, represent the unitary 86*> matrix Q as a product of elementary reflectors; if UPLO 87*> = 'L', the diagonal and first subdiagonal of A are over- 88*> written by the corresponding elements of the tridiagonal 89*> matrix T, and the elements below the first subdiagonal, with 90*> the array TAU, represent the unitary matrix Q as a product 91*> of elementary reflectors. See Further Details. 92*> \endverbatim 93*> 94*> \param[in] LDA 95*> \verbatim 96*> LDA is INTEGER 97*> The leading dimension of the array A. LDA >= max(1,N). 98*> \endverbatim 99*> 100*> \param[out] AB 101*> \verbatim 102*> AB is COMPLEX*16 array, dimension (LDAB,N) 103*> On exit, the upper or lower triangle of the Hermitian band 104*> matrix A, stored in the first KD+1 rows of the array. The 105*> j-th column of A is stored in the j-th column of the array AB 106*> as follows: 107*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; 108*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). 109*> \endverbatim 110*> 111*> \param[in] LDAB 112*> \verbatim 113*> LDAB is INTEGER 114*> The leading dimension of the array AB. LDAB >= KD+1. 115*> \endverbatim 116*> 117*> \param[out] TAU 118*> \verbatim 119*> TAU is COMPLEX*16 array, dimension (N-KD) 120*> The scalar factors of the elementary reflectors (see Further 121*> Details). 122*> \endverbatim 123*> 124*> \param[out] WORK 125*> \verbatim 126*> WORK is COMPLEX*16 array, dimension (LWORK) 127*> On exit, if INFO = 0, or if LWORK=-1, 128*> WORK(1) returns the size of LWORK. 129*> \endverbatim 130*> 131*> \param[in] LWORK 132*> \verbatim 133*> LWORK is INTEGER 134*> The dimension of the array WORK which should be calculated 135*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) 136*> If LWORK = -1, then a workspace query is assumed; the routine 137*> only calculates the optimal size of the WORK array, returns 138*> this value as the first entry of the WORK array, and no error 139*> message related to LWORK is issued by XERBLA. 140*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD 141*> where FACTOPTNB is the blocking used by the QR or LQ 142*> algorithm, usually FACTOPTNB=128 is a good choice otherwise 143*> putting LWORK=-1 will provide the size of WORK. 144*> \endverbatim 145*> 146*> \param[out] INFO 147*> \verbatim 148*> INFO is INTEGER 149*> = 0: successful exit 150*> < 0: if INFO = -i, the i-th argument had an illegal value 151*> \endverbatim 152* 153* Authors: 154* ======== 155* 156*> \author Univ. of Tennessee 157*> \author Univ. of California Berkeley 158*> \author Univ. of Colorado Denver 159*> \author NAG Ltd. 160* 161*> \ingroup complex16HEcomputational 162* 163*> \par Further Details: 164* ===================== 165*> 166*> \verbatim 167*> 168*> Implemented by Azzam Haidar. 169*> 170*> All details are available on technical report, SC11, SC13 papers. 171*> 172*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. 173*> Parallel reduction to condensed forms for symmetric eigenvalue problems 174*> using aggregated fine-grained and memory-aware kernels. In Proceedings 175*> of 2011 International Conference for High Performance Computing, 176*> Networking, Storage and Analysis (SC '11), New York, NY, USA, 177*> Article 8 , 11 pages. 178*> http://doi.acm.org/10.1145/2063384.2063394 179*> 180*> A. Haidar, J. Kurzak, P. Luszczek, 2013. 181*> An improved parallel singular value algorithm and its implementation 182*> for multicore hardware, In Proceedings of 2013 International Conference 183*> for High Performance Computing, Networking, Storage and Analysis (SC '13). 184*> Denver, Colorado, USA, 2013. 185*> Article 90, 12 pages. 186*> http://doi.acm.org/10.1145/2503210.2503292 187*> 188*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. 189*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure 190*> calculations based on fine-grained memory aware tasks. 191*> International Journal of High Performance Computing Applications. 192*> Volume 28 Issue 2, Pages 196-209, May 2014. 193*> http://hpc.sagepub.com/content/28/2/196 194*> 195*> \endverbatim 196*> 197*> \verbatim 198*> 199*> If UPLO = 'U', the matrix Q is represented as a product of elementary 200*> reflectors 201*> 202*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. 203*> 204*> Each H(i) has the form 205*> 206*> H(i) = I - tau * v * v**H 207*> 208*> where tau is a complex scalar, and v is a complex vector with 209*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in 210*> A(i,i+kd+1:n), and tau in TAU(i). 211*> 212*> If UPLO = 'L', the matrix Q is represented as a product of elementary 213*> reflectors 214*> 215*> Q = H(1) H(2) . . . H(k), where k = n-kd. 216*> 217*> Each H(i) has the form 218*> 219*> H(i) = I - tau * v * v**H 220*> 221*> where tau is a complex scalar, and v is a complex vector with 222*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in 223*> A(i+kd+2:n,i), and tau in TAU(i). 224*> 225*> The contents of A on exit are illustrated by the following examples 226*> with n = 5: 227*> 228*> if UPLO = 'U': if UPLO = 'L': 229*> 230*> ( ab ab/v1 v1 v1 v1 ) ( ab ) 231*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) 232*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) 233*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) 234*> ( ab ) ( v1 v2 v3 ab/v4 ab ) 235*> 236*> where d and e denote diagonal and off-diagonal elements of T, and vi 237*> denotes an element of the vector defining H(i). 238*> \endverbatim 239*> 240* ===================================================================== 241 SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 242 $ WORK, LWORK, INFO ) 243* 244 IMPLICIT NONE 245* 246* -- LAPACK computational routine -- 247* -- LAPACK is a software package provided by Univ. of Tennessee, -- 248* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 249* 250* .. Scalar Arguments .. 251 CHARACTER UPLO 252 INTEGER INFO, LDA, LDAB, LWORK, N, KD 253* .. 254* .. Array Arguments .. 255 COMPLEX*16 A( LDA, * ), AB( LDAB, * ), 256 $ TAU( * ), WORK( * ) 257* .. 258* 259* ===================================================================== 260* 261* .. Parameters .. 262 DOUBLE PRECISION RONE 263 COMPLEX*16 ZERO, ONE, HALF 264 PARAMETER ( RONE = 1.0D+0, 265 $ ZERO = ( 0.0D+0, 0.0D+0 ), 266 $ ONE = ( 1.0D+0, 0.0D+0 ), 267 $ HALF = ( 0.5D+0, 0.0D+0 ) ) 268* .. 269* .. Local Scalars .. 270 LOGICAL LQUERY, UPPER 271 INTEGER I, J, IINFO, LWMIN, PN, PK, LK, 272 $ LDT, LDW, LDS2, LDS1, 273 $ LS2, LS1, LW, LT, 274 $ TPOS, WPOS, S2POS, S1POS 275* .. 276* .. External Subroutines .. 277 EXTERNAL XERBLA, ZHER2K, ZHEMM, ZGEMM, ZCOPY, 278 $ ZLARFT, ZGELQF, ZGEQRF, ZLASET 279* .. 280* .. Intrinsic Functions .. 281 INTRINSIC MIN, MAX 282* .. 283* .. External Functions .. 284 LOGICAL LSAME 285 INTEGER ILAENV2STAGE 286 EXTERNAL LSAME, ILAENV2STAGE 287* .. 288* .. Executable Statements .. 289* 290* Determine the minimal workspace size required 291* and test the input parameters 292* 293 INFO = 0 294 UPPER = LSAME( UPLO, 'U' ) 295 LQUERY = ( LWORK.EQ.-1 ) 296 LWMIN = ILAENV2STAGE( 4, 'ZHETRD_HE2HB', '', N, KD, -1, -1 ) 297 298 IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN 299 INFO = -1 300 ELSE IF( N.LT.0 ) THEN 301 INFO = -2 302 ELSE IF( KD.LT.0 ) THEN 303 INFO = -3 304 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN 305 INFO = -5 306 ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN 307 INFO = -7 308 ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN 309 INFO = -10 310 END IF 311* 312 IF( INFO.NE.0 ) THEN 313 CALL XERBLA( 'ZHETRD_HE2HB', -INFO ) 314 RETURN 315 ELSE IF( LQUERY ) THEN 316 WORK( 1 ) = LWMIN 317 RETURN 318 END IF 319* 320* Quick return if possible 321* Copy the upper/lower portion of A into AB 322* 323 IF( N.LE.KD+1 ) THEN 324 IF( UPPER ) THEN 325 DO 100 I = 1, N 326 LK = MIN( KD+1, I ) 327 CALL ZCOPY( LK, A( I-LK+1, I ), 1, 328 $ AB( KD+1-LK+1, I ), 1 ) 329 100 CONTINUE 330 ELSE 331 DO 110 I = 1, N 332 LK = MIN( KD+1, N-I+1 ) 333 CALL ZCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) 334 110 CONTINUE 335 ENDIF 336 WORK( 1 ) = 1 337 RETURN 338 END IF 339* 340* Determine the pointer position for the workspace 341* 342 LDT = KD 343 LDS1 = KD 344 LT = LDT*KD 345 LW = N*KD 346 LS1 = LDS1*KD 347 LS2 = LWMIN - LT - LW - LS1 348* LS2 = N*MAX(KD,FACTOPTNB) 349 TPOS = 1 350 WPOS = TPOS + LT 351 S1POS = WPOS + LW 352 S2POS = S1POS + LS1 353 IF( UPPER ) THEN 354 LDW = KD 355 LDS2 = KD 356 ELSE 357 LDW = N 358 LDS2 = N 359 ENDIF 360* 361* 362* Set the workspace of the triangular matrix T to zero once such a 363* way every time T is generated the upper/lower portion will be always zero 364* 365 CALL ZLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) 366* 367 IF( UPPER ) THEN 368 DO 10 I = 1, N - KD, KD 369 PN = N-I-KD+1 370 PK = MIN( N-I-KD+1, KD ) 371* 372* Compute the LQ factorization of the current block 373* 374 CALL ZGELQF( KD, PN, A( I, I+KD ), LDA, 375 $ TAU( I ), WORK( S2POS ), LS2, IINFO ) 376* 377* Copy the upper portion of A into AB 378* 379 DO 20 J = I, I+PK-1 380 LK = MIN( KD, N-J ) + 1 381 CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) 382 20 CONTINUE 383* 384 CALL ZLASET( 'Lower', PK, PK, ZERO, ONE, 385 $ A( I, I+KD ), LDA ) 386* 387* Form the matrix T 388* 389 CALL ZLARFT( 'Forward', 'Rowwise', PN, PK, 390 $ A( I, I+KD ), LDA, TAU( I ), 391 $ WORK( TPOS ), LDT ) 392* 393* Compute W: 394* 395 CALL ZGEMM( 'Conjugate', 'No transpose', PK, PN, PK, 396 $ ONE, WORK( TPOS ), LDT, 397 $ A( I, I+KD ), LDA, 398 $ ZERO, WORK( S2POS ), LDS2 ) 399* 400 CALL ZHEMM( 'Right', UPLO, PK, PN, 401 $ ONE, A( I+KD, I+KD ), LDA, 402 $ WORK( S2POS ), LDS2, 403 $ ZERO, WORK( WPOS ), LDW ) 404* 405 CALL ZGEMM( 'No transpose', 'Conjugate', PK, PK, PN, 406 $ ONE, WORK( WPOS ), LDW, 407 $ WORK( S2POS ), LDS2, 408 $ ZERO, WORK( S1POS ), LDS1 ) 409* 410 CALL ZGEMM( 'No transpose', 'No transpose', PK, PN, PK, 411 $ -HALF, WORK( S1POS ), LDS1, 412 $ A( I, I+KD ), LDA, 413 $ ONE, WORK( WPOS ), LDW ) 414* 415* 416* Update the unreduced submatrix A(i+kd:n,i+kd:n), using 417* an update of the form: A := A - V'*W - W'*V 418* 419 CALL ZHER2K( UPLO, 'Conjugate', PN, PK, 420 $ -ONE, A( I, I+KD ), LDA, 421 $ WORK( WPOS ), LDW, 422 $ RONE, A( I+KD, I+KD ), LDA ) 423 10 CONTINUE 424* 425* Copy the upper band to AB which is the band storage matrix 426* 427 DO 30 J = N-KD+1, N 428 LK = MIN(KD, N-J) + 1 429 CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) 430 30 CONTINUE 431* 432 ELSE 433* 434* Reduce the lower triangle of A to lower band matrix 435* 436 DO 40 I = 1, N - KD, KD 437 PN = N-I-KD+1 438 PK = MIN( N-I-KD+1, KD ) 439* 440* Compute the QR factorization of the current block 441* 442 CALL ZGEQRF( PN, KD, A( I+KD, I ), LDA, 443 $ TAU( I ), WORK( S2POS ), LS2, IINFO ) 444* 445* Copy the upper portion of A into AB 446* 447 DO 50 J = I, I+PK-1 448 LK = MIN( KD, N-J ) + 1 449 CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) 450 50 CONTINUE 451* 452 CALL ZLASET( 'Upper', PK, PK, ZERO, ONE, 453 $ A( I+KD, I ), LDA ) 454* 455* Form the matrix T 456* 457 CALL ZLARFT( 'Forward', 'Columnwise', PN, PK, 458 $ A( I+KD, I ), LDA, TAU( I ), 459 $ WORK( TPOS ), LDT ) 460* 461* Compute W: 462* 463 CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK, 464 $ ONE, A( I+KD, I ), LDA, 465 $ WORK( TPOS ), LDT, 466 $ ZERO, WORK( S2POS ), LDS2 ) 467* 468 CALL ZHEMM( 'Left', UPLO, PN, PK, 469 $ ONE, A( I+KD, I+KD ), LDA, 470 $ WORK( S2POS ), LDS2, 471 $ ZERO, WORK( WPOS ), LDW ) 472* 473 CALL ZGEMM( 'Conjugate', 'No transpose', PK, PK, PN, 474 $ ONE, WORK( S2POS ), LDS2, 475 $ WORK( WPOS ), LDW, 476 $ ZERO, WORK( S1POS ), LDS1 ) 477* 478 CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK, 479 $ -HALF, A( I+KD, I ), LDA, 480 $ WORK( S1POS ), LDS1, 481 $ ONE, WORK( WPOS ), LDW ) 482* 483* 484* Update the unreduced submatrix A(i+kd:n,i+kd:n), using 485* an update of the form: A := A - V*W' - W*V' 486* 487 CALL ZHER2K( UPLO, 'No transpose', PN, PK, 488 $ -ONE, A( I+KD, I ), LDA, 489 $ WORK( WPOS ), LDW, 490 $ RONE, A( I+KD, I+KD ), LDA ) 491* ================================================================== 492* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED 493* DO 45 J = I, I+PK-1 494* LK = MIN( KD, N-J ) + 1 495* CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) 496* 45 CONTINUE 497* ================================================================== 498 40 CONTINUE 499* 500* Copy the lower band to AB which is the band storage matrix 501* 502 DO 60 J = N-KD+1, N 503 LK = MIN(KD, N-J) + 1 504 CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) 505 60 CONTINUE 506 507 END IF 508* 509 WORK( 1 ) = LWMIN 510 RETURN 511* 512* End of ZHETRD_HE2HB 513* 514 END 515