1*> \brief <b> ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> 2* 3* @precisions fortran z -> s d c 4* 5* =========== DOCUMENTATION =========== 6* 7* Online html documentation available at 8* http://www.netlib.org/lapack/explore-html/ 9* 10*> \htmlonly 11*> Download ZHEEVD_2STAGE + dependencies 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd_2stage.f"> 13*> [TGZ]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd_2stage.f"> 15*> [ZIP]</a> 16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd_2stage.f"> 17*> [TXT]</a> 18*> \endhtmlonly 19* 20* Definition: 21* =========== 22* 23* SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, 24* RWORK, LRWORK, IWORK, LIWORK, INFO ) 25* 26* IMPLICIT NONE 27* 28* .. Scalar Arguments .. 29* CHARACTER JOBZ, UPLO 30* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N 31* .. 32* .. Array Arguments .. 33* INTEGER IWORK( * ) 34* DOUBLE PRECISION RWORK( * ), W( * ) 35* COMPLEX*16 A( LDA, * ), WORK( * ) 36* .. 37* 38* 39*> \par Purpose: 40* ============= 41*> 42*> \verbatim 43*> 44*> ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a 45*> complex Hermitian matrix A using the 2stage technique for 46*> the reduction to tridiagonal. If eigenvectors are desired, it uses a 47*> divide and conquer algorithm. 48*> 49*> The divide and conquer algorithm makes very mild assumptions about 50*> floating point arithmetic. It will work on machines with a guard 51*> digit in add/subtract, or on those binary machines without guard 52*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or 53*> Cray-2. It could conceivably fail on hexadecimal or decimal machines 54*> without guard digits, but we know of none. 55*> \endverbatim 56* 57* Arguments: 58* ========== 59* 60*> \param[in] JOBZ 61*> \verbatim 62*> JOBZ is CHARACTER*1 63*> = 'N': Compute eigenvalues only; 64*> = 'V': Compute eigenvalues and eigenvectors. 65*> Not available in this release. 66*> \endverbatim 67*> 68*> \param[in] UPLO 69*> \verbatim 70*> UPLO is CHARACTER*1 71*> = 'U': Upper triangle of A is stored; 72*> = 'L': Lower triangle of A is stored. 73*> \endverbatim 74*> 75*> \param[in] N 76*> \verbatim 77*> N is INTEGER 78*> The order of the matrix A. N >= 0. 79*> \endverbatim 80*> 81*> \param[in,out] A 82*> \verbatim 83*> A is COMPLEX*16 array, dimension (LDA, N) 84*> On entry, the Hermitian matrix A. If UPLO = 'U', the 85*> leading N-by-N upper triangular part of A contains the 86*> upper triangular part of the matrix A. If UPLO = 'L', 87*> the leading N-by-N lower triangular part of A contains 88*> the lower triangular part of the matrix A. 89*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the 90*> orthonormal eigenvectors of the matrix A. 91*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') 92*> or the upper triangle (if UPLO='U') of A, including the 93*> diagonal, is destroyed. 94*> \endverbatim 95*> 96*> \param[in] LDA 97*> \verbatim 98*> LDA is INTEGER 99*> The leading dimension of the array A. LDA >= max(1,N). 100*> \endverbatim 101*> 102*> \param[out] W 103*> \verbatim 104*> W is DOUBLE PRECISION array, dimension (N) 105*> If INFO = 0, the eigenvalues in ascending order. 106*> \endverbatim 107*> 108*> \param[out] WORK 109*> \verbatim 110*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) 111*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 112*> \endverbatim 113*> 114*> \param[in] LWORK 115*> \verbatim 116*> LWORK is INTEGER 117*> The dimension of the array WORK. 118*> If N <= 1, LWORK must be at least 1. 119*> If JOBZ = 'N' and N > 1, LWORK must be queried. 120*> LWORK = MAX(1, dimension) where 121*> dimension = max(stage1,stage2) + (KD+1)*N + N+1 122*> = N*KD + N*max(KD+1,FACTOPTNB) 123*> + max(2*KD*KD, KD*NTHREADS) 124*> + (KD+1)*N + N+1 125*> where KD is the blocking size of the reduction, 126*> FACTOPTNB is the blocking used by the QR or LQ 127*> algorithm, usually FACTOPTNB=128 is a good choice 128*> NTHREADS is the number of threads used when 129*> openMP compilation is enabled, otherwise =1. 130*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 131*> 132*> If LWORK = -1, then a workspace query is assumed; the routine 133*> only calculates the optimal sizes of the WORK, RWORK and 134*> IWORK arrays, returns these values as the first entries of 135*> the WORK, RWORK and IWORK arrays, and no error message 136*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. 137*> \endverbatim 138*> 139*> \param[out] RWORK 140*> \verbatim 141*> RWORK is DOUBLE PRECISION array, 142*> dimension (LRWORK) 143*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. 144*> \endverbatim 145*> 146*> \param[in] LRWORK 147*> \verbatim 148*> LRWORK is INTEGER 149*> The dimension of the array RWORK. 150*> If N <= 1, LRWORK must be at least 1. 151*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. 152*> If JOBZ = 'V' and N > 1, LRWORK must be at least 153*> 1 + 5*N + 2*N**2. 154*> 155*> If LRWORK = -1, then a workspace query is assumed; the 156*> routine only calculates the optimal sizes of the WORK, RWORK 157*> and IWORK arrays, returns these values as the first entries 158*> of the WORK, RWORK and IWORK arrays, and no error message 159*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. 160*> \endverbatim 161*> 162*> \param[out] IWORK 163*> \verbatim 164*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) 165*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. 166*> \endverbatim 167*> 168*> \param[in] LIWORK 169*> \verbatim 170*> LIWORK is INTEGER 171*> The dimension of the array IWORK. 172*> If N <= 1, LIWORK must be at least 1. 173*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. 174*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. 175*> 176*> If LIWORK = -1, then a workspace query is assumed; the 177*> routine only calculates the optimal sizes of the WORK, RWORK 178*> and IWORK arrays, returns these values as the first entries 179*> of the WORK, RWORK and IWORK arrays, and no error message 180*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. 181*> \endverbatim 182*> 183*> \param[out] INFO 184*> \verbatim 185*> INFO is INTEGER 186*> = 0: successful exit 187*> < 0: if INFO = -i, the i-th argument had an illegal value 188*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed 189*> to converge; i off-diagonal elements of an intermediate 190*> tridiagonal form did not converge to zero; 191*> if INFO = i and JOBZ = 'V', then the algorithm failed 192*> to compute an eigenvalue while working on the submatrix 193*> lying in rows and columns INFO/(N+1) through 194*> mod(INFO,N+1). 195*> \endverbatim 196* 197* Authors: 198* ======== 199* 200*> \author Univ. of Tennessee 201*> \author Univ. of California Berkeley 202*> \author Univ. of Colorado Denver 203*> \author NAG Ltd. 204* 205*> \date November 2017 206* 207*> \ingroup complex16HEeigen 208* 209*> \par Further Details: 210* ===================== 211*> 212*> Modified description of INFO. Sven, 16 Feb 05. 213* 214*> \par Contributors: 215* ================== 216*> 217*> Jeff Rutter, Computer Science Division, University of California 218*> at Berkeley, USA 219*> 220*> \par Further Details: 221* ===================== 222*> 223*> \verbatim 224*> 225*> All details about the 2stage techniques are available in: 226*> 227*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. 228*> Parallel reduction to condensed forms for symmetric eigenvalue problems 229*> using aggregated fine-grained and memory-aware kernels. In Proceedings 230*> of 2011 International Conference for High Performance Computing, 231*> Networking, Storage and Analysis (SC '11), New York, NY, USA, 232*> Article 8 , 11 pages. 233*> http://doi.acm.org/10.1145/2063384.2063394 234*> 235*> A. Haidar, J. Kurzak, P. Luszczek, 2013. 236*> An improved parallel singular value algorithm and its implementation 237*> for multicore hardware, In Proceedings of 2013 International Conference 238*> for High Performance Computing, Networking, Storage and Analysis (SC '13). 239*> Denver, Colorado, USA, 2013. 240*> Article 90, 12 pages. 241*> http://doi.acm.org/10.1145/2503210.2503292 242*> 243*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. 244*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure 245*> calculations based on fine-grained memory aware tasks. 246*> International Journal of High Performance Computing Applications. 247*> Volume 28 Issue 2, Pages 196-209, May 2014. 248*> http://hpc.sagepub.com/content/28/2/196 249*> 250*> \endverbatim 251* 252* ===================================================================== 253 SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, 254 $ RWORK, LRWORK, IWORK, LIWORK, INFO ) 255* 256 IMPLICIT NONE 257* 258* -- LAPACK driver routine (version 3.8.0) -- 259* -- LAPACK is a software package provided by Univ. of Tennessee, -- 260* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 261* November 2017 262* 263* .. Scalar Arguments .. 264 CHARACTER JOBZ, UPLO 265 INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N 266* .. 267* .. Array Arguments .. 268 INTEGER IWORK( * ) 269 DOUBLE PRECISION RWORK( * ), W( * ) 270 COMPLEX*16 A( LDA, * ), WORK( * ) 271* .. 272* 273* ===================================================================== 274* 275* .. Parameters .. 276 DOUBLE PRECISION ZERO, ONE 277 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) 278 COMPLEX*16 CONE 279 PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) 280* .. 281* .. Local Scalars .. 282 LOGICAL LOWER, LQUERY, WANTZ 283 INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, 284 $ INDWRK, ISCALE, LIWMIN, LLRWK, LLWORK, 285 $ LLWRK2, LRWMIN, LWMIN, 286 $ LHTRD, LWTRD, KD, IB, INDHOUS 287 288 289 DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, 290 $ SMLNUM 291* .. 292* .. External Functions .. 293 LOGICAL LSAME 294 INTEGER ILAENV2STAGE 295 DOUBLE PRECISION DLAMCH, ZLANHE 296 EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE 297* .. 298* .. External Subroutines .. 299 EXTERNAL DSCAL, DSTERF, XERBLA, ZLACPY, ZLASCL, 300 $ ZSTEDC, ZUNMTR, ZHETRD_2STAGE 301* .. 302* .. Intrinsic Functions .. 303 INTRINSIC DBLE, MAX, SQRT 304* .. 305* .. Executable Statements .. 306* 307* Test the input parameters. 308* 309 WANTZ = LSAME( JOBZ, 'V' ) 310 LOWER = LSAME( UPLO, 'L' ) 311 LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) 312* 313 INFO = 0 314 IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN 315 INFO = -1 316 ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN 317 INFO = -2 318 ELSE IF( N.LT.0 ) THEN 319 INFO = -3 320 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN 321 INFO = -5 322 END IF 323* 324 IF( INFO.EQ.0 ) THEN 325 IF( N.LE.1 ) THEN 326 LWMIN = 1 327 LRWMIN = 1 328 LIWMIN = 1 329 ELSE 330 KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, 331 $ N, -1, -1, -1 ) 332 IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, 333 $ N, KD, -1, -1 ) 334 LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, 335 $ N, KD, IB, -1 ) 336 LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, 337 $ N, KD, IB, -1 ) 338 IF( WANTZ ) THEN 339 LWMIN = 2*N + N*N 340 LRWMIN = 1 + 5*N + 2*N**2 341 LIWMIN = 3 + 5*N 342 ELSE 343 LWMIN = N + 1 + LHTRD + LWTRD 344 LRWMIN = N 345 LIWMIN = 1 346 END IF 347 END IF 348 WORK( 1 ) = LWMIN 349 RWORK( 1 ) = LRWMIN 350 IWORK( 1 ) = LIWMIN 351* 352 IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN 353 INFO = -8 354 ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN 355 INFO = -10 356 ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN 357 INFO = -12 358 END IF 359 END IF 360* 361 IF( INFO.NE.0 ) THEN 362 CALL XERBLA( 'ZHEEVD_2STAGE', -INFO ) 363 RETURN 364 ELSE IF( LQUERY ) THEN 365 RETURN 366 END IF 367* 368* Quick return if possible 369* 370 IF( N.EQ.0 ) 371 $ RETURN 372* 373 IF( N.EQ.1 ) THEN 374 W( 1 ) = DBLE( A( 1, 1 ) ) 375 IF( WANTZ ) 376 $ A( 1, 1 ) = CONE 377 RETURN 378 END IF 379* 380* Get machine constants. 381* 382 SAFMIN = DLAMCH( 'Safe minimum' ) 383 EPS = DLAMCH( 'Precision' ) 384 SMLNUM = SAFMIN / EPS 385 BIGNUM = ONE / SMLNUM 386 RMIN = SQRT( SMLNUM ) 387 RMAX = SQRT( BIGNUM ) 388* 389* Scale matrix to allowable range, if necessary. 390* 391 ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) 392 ISCALE = 0 393 IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN 394 ISCALE = 1 395 SIGMA = RMIN / ANRM 396 ELSE IF( ANRM.GT.RMAX ) THEN 397 ISCALE = 1 398 SIGMA = RMAX / ANRM 399 END IF 400 IF( ISCALE.EQ.1 ) 401 $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) 402* 403* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. 404* 405 INDE = 1 406 INDRWK = INDE + N 407 LLRWK = LRWORK - INDRWK + 1 408 INDTAU = 1 409 INDHOUS = INDTAU + N 410 INDWRK = INDHOUS + LHTRD 411 LLWORK = LWORK - INDWRK + 1 412 INDWK2 = INDWRK + N*N 413 LLWRK2 = LWORK - INDWK2 + 1 414* 415 CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), 416 $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, 417 $ WORK( INDWRK ), LLWORK, IINFO ) 418* 419* For eigenvalues only, call DSTERF. For eigenvectors, first call 420* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the 421* tridiagonal matrix, then call ZUNMTR to multiply it to the 422* Householder transformations represented as Householder vectors in 423* A. 424* 425 IF( .NOT.WANTZ ) THEN 426 CALL DSTERF( N, W, RWORK( INDE ), INFO ) 427 ELSE 428 CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, 429 $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, 430 $ IWORK, LIWORK, INFO ) 431 CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), 432 $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) 433 CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) 434 END IF 435* 436* If matrix was scaled, then rescale eigenvalues appropriately. 437* 438 IF( ISCALE.EQ.1 ) THEN 439 IF( INFO.EQ.0 ) THEN 440 IMAX = N 441 ELSE 442 IMAX = INFO - 1 443 END IF 444 CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) 445 END IF 446* 447 WORK( 1 ) = LWMIN 448 RWORK( 1 ) = LRWMIN 449 IWORK( 1 ) = LIWMIN 450* 451 RETURN 452* 453* End of ZHEEVD_2STAGE 454* 455 END 456