1*> \brief <b> ZHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> 2* 3* @precisions fortran z -> s d c 4* 5* =========== DOCUMENTATION =========== 6* 7* Online html documentation available at 8* http://www.netlib.org/lapack/explore-html/ 9* 10*> \htmlonly 11*> Download ZHBEVD_2STAGE + dependencies 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbevd_2stage.f"> 13*> [TGZ]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbevd_2stage.f"> 15*> [ZIP]</a> 16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbevd_2stage.f"> 17*> [TXT]</a> 18*> \endhtmlonly 19* 20* Definition: 21* =========== 22* 23* SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, 24* WORK, LWORK, RWORK, LRWORK, IWORK, 25* LIWORK, INFO ) 26* 27* IMPLICIT NONE 28* 29* .. Scalar Arguments .. 30* CHARACTER JOBZ, UPLO 31* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N 32* .. 33* .. Array Arguments .. 34* INTEGER IWORK( * ) 35* DOUBLE PRECISION RWORK( * ), W( * ) 36* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) 37* .. 38* 39* 40*> \par Purpose: 41* ============= 42*> 43*> \verbatim 44*> 45*> ZHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of 46*> a complex Hermitian band matrix A using the 2stage technique for 47*> the reduction to tridiagonal. If eigenvectors are desired, it 48*> uses a divide and conquer algorithm. 49*> 50*> The divide and conquer algorithm makes very mild assumptions about 51*> floating point arithmetic. It will work on machines with a guard 52*> digit in add/subtract, or on those binary machines without guard 53*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or 54*> Cray-2. It could conceivably fail on hexadecimal or decimal machines 55*> without guard digits, but we know of none. 56*> \endverbatim 57* 58* Arguments: 59* ========== 60* 61*> \param[in] JOBZ 62*> \verbatim 63*> JOBZ is CHARACTER*1 64*> = 'N': Compute eigenvalues only; 65*> = 'V': Compute eigenvalues and eigenvectors. 66*> Not available in this release. 67*> \endverbatim 68*> 69*> \param[in] UPLO 70*> \verbatim 71*> UPLO is CHARACTER*1 72*> = 'U': Upper triangle of A is stored; 73*> = 'L': Lower triangle of A is stored. 74*> \endverbatim 75*> 76*> \param[in] N 77*> \verbatim 78*> N is INTEGER 79*> The order of the matrix A. N >= 0. 80*> \endverbatim 81*> 82*> \param[in] KD 83*> \verbatim 84*> KD is INTEGER 85*> The number of superdiagonals of the matrix A if UPLO = 'U', 86*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. 87*> \endverbatim 88*> 89*> \param[in,out] AB 90*> \verbatim 91*> AB is COMPLEX*16 array, dimension (LDAB, N) 92*> On entry, the upper or lower triangle of the Hermitian band 93*> matrix A, stored in the first KD+1 rows of the array. The 94*> j-th column of A is stored in the j-th column of the array AB 95*> as follows: 96*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; 97*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). 98*> 99*> On exit, AB is overwritten by values generated during the 100*> reduction to tridiagonal form. If UPLO = 'U', the first 101*> superdiagonal and the diagonal of the tridiagonal matrix T 102*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', 103*> the diagonal and first subdiagonal of T are returned in the 104*> first two rows of AB. 105*> \endverbatim 106*> 107*> \param[in] LDAB 108*> \verbatim 109*> LDAB is INTEGER 110*> The leading dimension of the array AB. LDAB >= KD + 1. 111*> \endverbatim 112*> 113*> \param[out] W 114*> \verbatim 115*> W is DOUBLE PRECISION array, dimension (N) 116*> If INFO = 0, the eigenvalues in ascending order. 117*> \endverbatim 118*> 119*> \param[out] Z 120*> \verbatim 121*> Z is COMPLEX*16 array, dimension (LDZ, N) 122*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal 123*> eigenvectors of the matrix A, with the i-th column of Z 124*> holding the eigenvector associated with W(i). 125*> If JOBZ = 'N', then Z is not referenced. 126*> \endverbatim 127*> 128*> \param[in] LDZ 129*> \verbatim 130*> LDZ is INTEGER 131*> The leading dimension of the array Z. LDZ >= 1, and if 132*> JOBZ = 'V', LDZ >= max(1,N). 133*> \endverbatim 134*> 135*> \param[out] WORK 136*> \verbatim 137*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) 138*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 139*> \endverbatim 140*> 141*> \param[in] LWORK 142*> \verbatim 143*> LWORK is INTEGER 144*> The length of the array WORK. LWORK >= 1, when N <= 1; 145*> otherwise 146*> If JOBZ = 'N' and N > 1, LWORK must be queried. 147*> LWORK = MAX(1, dimension) where 148*> dimension = (2KD+1)*N + KD*NTHREADS 149*> where KD is the size of the band. 150*> NTHREADS is the number of threads used when 151*> openMP compilation is enabled, otherwise =1. 152*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. 153*> 154*> If LWORK = -1, then a workspace query is assumed; the routine 155*> only calculates the optimal sizes of the WORK, RWORK and 156*> IWORK arrays, returns these values as the first entries of 157*> the WORK, RWORK and IWORK arrays, and no error message 158*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. 159*> \endverbatim 160*> 161*> \param[out] RWORK 162*> \verbatim 163*> RWORK is DOUBLE PRECISION array, 164*> dimension (LRWORK) 165*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. 166*> \endverbatim 167*> 168*> \param[in] LRWORK 169*> \verbatim 170*> LRWORK is INTEGER 171*> The dimension of array RWORK. 172*> If N <= 1, LRWORK must be at least 1. 173*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. 174*> If JOBZ = 'V' and N > 1, LRWORK must be at least 175*> 1 + 5*N + 2*N**2. 176*> 177*> If LRWORK = -1, then a workspace query is assumed; the 178*> routine only calculates the optimal sizes of the WORK, RWORK 179*> and IWORK arrays, returns these values as the first entries 180*> of the WORK, RWORK and IWORK arrays, and no error message 181*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. 182*> \endverbatim 183*> 184*> \param[out] IWORK 185*> \verbatim 186*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) 187*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. 188*> \endverbatim 189*> 190*> \param[in] LIWORK 191*> \verbatim 192*> LIWORK is INTEGER 193*> The dimension of array IWORK. 194*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. 195*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . 196*> 197*> If LIWORK = -1, then a workspace query is assumed; the 198*> routine only calculates the optimal sizes of the WORK, RWORK 199*> and IWORK arrays, returns these values as the first entries 200*> of the WORK, RWORK and IWORK arrays, and no error message 201*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. 202*> \endverbatim 203*> 204*> \param[out] INFO 205*> \verbatim 206*> INFO is INTEGER 207*> = 0: successful exit. 208*> < 0: if INFO = -i, the i-th argument had an illegal value. 209*> > 0: if INFO = i, the algorithm failed to converge; i 210*> off-diagonal elements of an intermediate tridiagonal 211*> form did not converge to zero. 212*> \endverbatim 213* 214* Authors: 215* ======== 216* 217*> \author Univ. of Tennessee 218*> \author Univ. of California Berkeley 219*> \author Univ. of Colorado Denver 220*> \author NAG Ltd. 221* 222*> \date November 2017 223* 224*> \ingroup complex16OTHEReigen 225* 226*> \par Further Details: 227* ===================== 228*> 229*> \verbatim 230*> 231*> All details about the 2stage techniques are available in: 232*> 233*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. 234*> Parallel reduction to condensed forms for symmetric eigenvalue problems 235*> using aggregated fine-grained and memory-aware kernels. In Proceedings 236*> of 2011 International Conference for High Performance Computing, 237*> Networking, Storage and Analysis (SC '11), New York, NY, USA, 238*> Article 8 , 11 pages. 239*> http://doi.acm.org/10.1145/2063384.2063394 240*> 241*> A. Haidar, J. Kurzak, P. Luszczek, 2013. 242*> An improved parallel singular value algorithm and its implementation 243*> for multicore hardware, In Proceedings of 2013 International Conference 244*> for High Performance Computing, Networking, Storage and Analysis (SC '13). 245*> Denver, Colorado, USA, 2013. 246*> Article 90, 12 pages. 247*> http://doi.acm.org/10.1145/2503210.2503292 248*> 249*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. 250*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure 251*> calculations based on fine-grained memory aware tasks. 252*> International Journal of High Performance Computing Applications. 253*> Volume 28 Issue 2, Pages 196-209, May 2014. 254*> http://hpc.sagepub.com/content/28/2/196 255*> 256*> \endverbatim 257* 258* ===================================================================== 259 SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, 260 $ WORK, LWORK, RWORK, LRWORK, IWORK, 261 $ LIWORK, INFO ) 262* 263 IMPLICIT NONE 264* 265* -- LAPACK driver routine (version 3.8.0) -- 266* -- LAPACK is a software package provided by Univ. of Tennessee, -- 267* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 268* November 2017 269* 270* .. Scalar Arguments .. 271 CHARACTER JOBZ, UPLO 272 INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N 273* .. 274* .. Array Arguments .. 275 INTEGER IWORK( * ) 276 DOUBLE PRECISION RWORK( * ), W( * ) 277 COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) 278* .. 279* 280* ===================================================================== 281* 282* .. Parameters .. 283 DOUBLE PRECISION ZERO, ONE 284 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) 285 COMPLEX*16 CZERO, CONE 286 PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), 287 $ CONE = ( 1.0D0, 0.0D0 ) ) 288* .. 289* .. Local Scalars .. 290 LOGICAL LOWER, LQUERY, WANTZ 291 INTEGER IINFO, IMAX, INDE, INDWK2, INDRWK, ISCALE, 292 $ LLWORK, INDWK, LHTRD, LWTRD, IB, INDHOUS, 293 $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN 294 DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, 295 $ SMLNUM 296* .. 297* .. External Functions .. 298 LOGICAL LSAME 299 INTEGER ILAENV2STAGE 300 DOUBLE PRECISION DLAMCH, ZLANHB 301 EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE 302* .. 303* .. External Subroutines .. 304 EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZLACPY, 305 $ ZLASCL, ZSTEDC, ZHETRD_HB2ST 306* .. 307* .. Intrinsic Functions .. 308 INTRINSIC DBLE, SQRT 309* .. 310* .. Executable Statements .. 311* 312* Test the input parameters. 313* 314 WANTZ = LSAME( JOBZ, 'V' ) 315 LOWER = LSAME( UPLO, 'L' ) 316 LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) 317* 318 INFO = 0 319 IF( N.LE.1 ) THEN 320 LWMIN = 1 321 LRWMIN = 1 322 LIWMIN = 1 323 ELSE 324 IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) 325 LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) 326 LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) 327 IF( WANTZ ) THEN 328 LWMIN = 2*N**2 329 LRWMIN = 1 + 5*N + 2*N**2 330 LIWMIN = 3 + 5*N 331 ELSE 332 LWMIN = MAX( N, LHTRD + LWTRD ) 333 LRWMIN = N 334 LIWMIN = 1 335 END IF 336 END IF 337 IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN 338 INFO = -1 339 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN 340 INFO = -2 341 ELSE IF( N.LT.0 ) THEN 342 INFO = -3 343 ELSE IF( KD.LT.0 ) THEN 344 INFO = -4 345 ELSE IF( LDAB.LT.KD+1 ) THEN 346 INFO = -6 347 ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN 348 INFO = -9 349 END IF 350* 351 IF( INFO.EQ.0 ) THEN 352 WORK( 1 ) = LWMIN 353 RWORK( 1 ) = LRWMIN 354 IWORK( 1 ) = LIWMIN 355* 356 IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN 357 INFO = -11 358 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN 359 INFO = -13 360 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN 361 INFO = -15 362 END IF 363 END IF 364* 365 IF( INFO.NE.0 ) THEN 366 CALL XERBLA( 'ZHBEVD_2STAGE', -INFO ) 367 RETURN 368 ELSE IF( LQUERY ) THEN 369 RETURN 370 END IF 371* 372* Quick return if possible 373* 374 IF( N.EQ.0 ) 375 $ RETURN 376* 377 IF( N.EQ.1 ) THEN 378 W( 1 ) = DBLE( AB( 1, 1 ) ) 379 IF( WANTZ ) 380 $ Z( 1, 1 ) = CONE 381 RETURN 382 END IF 383* 384* Get machine constants. 385* 386 SAFMIN = DLAMCH( 'Safe minimum' ) 387 EPS = DLAMCH( 'Precision' ) 388 SMLNUM = SAFMIN / EPS 389 BIGNUM = ONE / SMLNUM 390 RMIN = SQRT( SMLNUM ) 391 RMAX = SQRT( BIGNUM ) 392* 393* Scale matrix to allowable range, if necessary. 394* 395 ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) 396 ISCALE = 0 397 IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN 398 ISCALE = 1 399 SIGMA = RMIN / ANRM 400 ELSE IF( ANRM.GT.RMAX ) THEN 401 ISCALE = 1 402 SIGMA = RMAX / ANRM 403 END IF 404 IF( ISCALE.EQ.1 ) THEN 405 IF( LOWER ) THEN 406 CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) 407 ELSE 408 CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) 409 END IF 410 END IF 411* 412* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. 413* 414 INDE = 1 415 INDRWK = INDE + N 416 LLRWK = LRWORK - INDRWK + 1 417 INDHOUS = 1 418 INDWK = INDHOUS + LHTRD 419 LLWORK = LWORK - INDWK + 1 420 INDWK2 = INDWK + N*N 421 LLWK2 = LWORK - INDWK2 + 1 422* 423 CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, 424 $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, 425 $ WORK( INDWK ), LLWORK, IINFO ) 426* 427* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. 428* 429 IF( .NOT.WANTZ ) THEN 430 CALL DSTERF( N, W, RWORK( INDE ), INFO ) 431 ELSE 432 CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), 433 $ LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, 434 $ INFO ) 435 CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, 436 $ WORK( INDWK2 ), N ) 437 CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) 438 END IF 439* 440* If matrix was scaled, then rescale eigenvalues appropriately. 441* 442 IF( ISCALE.EQ.1 ) THEN 443 IF( INFO.EQ.0 ) THEN 444 IMAX = N 445 ELSE 446 IMAX = INFO - 1 447 END IF 448 CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) 449 END IF 450* 451 WORK( 1 ) = LWMIN 452 RWORK( 1 ) = LRWMIN 453 IWORK( 1 ) = LIWMIN 454 RETURN 455* 456* End of ZHBEVD_2STAGE 457* 458 END 459