1*> \brief <b> ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2*
3*  @precisions fortran z -> s d c
4*
5*  =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8*            http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHEEVD_2STAGE + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd_2stage.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd_2stage.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd_2stage.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20*  Definition:
21*  ===========
22*
23*       SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24*                          RWORK, LRWORK, IWORK, LIWORK, INFO )
25*
26*       IMPLICIT NONE
27*
28*       .. Scalar Arguments ..
29*       CHARACTER          JOBZ, UPLO
30*       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
31*       ..
32*       .. Array Arguments ..
33*       INTEGER            IWORK( * )
34*       DOUBLE PRECISION   RWORK( * ), W( * )
35*       COMPLEX*16         A( LDA, * ), WORK( * )
36*       ..
37*
38*
39*> \par Purpose:
40*  =============
41*>
42*> \verbatim
43*>
44*> ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
45*> complex Hermitian matrix A using the 2stage technique for
46*> the reduction to tridiagonal.  If eigenvectors are desired, it uses a
47*> divide and conquer algorithm.
48*>
49*> The divide and conquer algorithm makes very mild assumptions about
50*> floating point arithmetic. It will work on machines with a guard
51*> digit in add/subtract, or on those binary machines without guard
52*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
53*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
54*> without guard digits, but we know of none.
55*> \endverbatim
56*
57*  Arguments:
58*  ==========
59*
60*> \param[in] JOBZ
61*> \verbatim
62*>          JOBZ is CHARACTER*1
63*>          = 'N':  Compute eigenvalues only;
64*>          = 'V':  Compute eigenvalues and eigenvectors.
65*>                  Not available in this release.
66*> \endverbatim
67*>
68*> \param[in] UPLO
69*> \verbatim
70*>          UPLO is CHARACTER*1
71*>          = 'U':  Upper triangle of A is stored;
72*>          = 'L':  Lower triangle of A is stored.
73*> \endverbatim
74*>
75*> \param[in] N
76*> \verbatim
77*>          N is INTEGER
78*>          The order of the matrix A.  N >= 0.
79*> \endverbatim
80*>
81*> \param[in,out] A
82*> \verbatim
83*>          A is COMPLEX*16 array, dimension (LDA, N)
84*>          On entry, the Hermitian matrix A.  If UPLO = 'U', the
85*>          leading N-by-N upper triangular part of A contains the
86*>          upper triangular part of the matrix A.  If UPLO = 'L',
87*>          the leading N-by-N lower triangular part of A contains
88*>          the lower triangular part of the matrix A.
89*>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
90*>          orthonormal eigenvectors of the matrix A.
91*>          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
92*>          or the upper triangle (if UPLO='U') of A, including the
93*>          diagonal, is destroyed.
94*> \endverbatim
95*>
96*> \param[in] LDA
97*> \verbatim
98*>          LDA is INTEGER
99*>          The leading dimension of the array A.  LDA >= max(1,N).
100*> \endverbatim
101*>
102*> \param[out] W
103*> \verbatim
104*>          W is DOUBLE PRECISION array, dimension (N)
105*>          If INFO = 0, the eigenvalues in ascending order.
106*> \endverbatim
107*>
108*> \param[out] WORK
109*> \verbatim
110*>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
111*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
112*> \endverbatim
113*>
114*> \param[in] LWORK
115*> \verbatim
116*>          LWORK is INTEGER
117*>          The dimension of the array WORK.
118*>          If N <= 1,               LWORK must be at least 1.
119*>          If JOBZ = 'N' and N > 1, LWORK must be queried.
120*>                                   LWORK = MAX(1, dimension) where
121*>                                   dimension = max(stage1,stage2) + (KD+1)*N + N+1
122*>                                             = N*KD + N*max(KD+1,FACTOPTNB)
123*>                                               + max(2*KD*KD, KD*NTHREADS)
124*>                                               + (KD+1)*N + N+1
125*>                                   where KD is the blocking size of the reduction,
126*>                                   FACTOPTNB is the blocking used by the QR or LQ
127*>                                   algorithm, usually FACTOPTNB=128 is a good choice
128*>                                   NTHREADS is the number of threads used when
129*>                                   openMP compilation is enabled, otherwise =1.
130*>          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2
131*>
132*>          If LWORK = -1, then a workspace query is assumed; the routine
133*>          only calculates the optimal sizes of the WORK, RWORK and
134*>          IWORK arrays, returns these values as the first entries of
135*>          the WORK, RWORK and IWORK arrays, and no error message
136*>          related to LWORK or LRWORK or LIWORK is issued by XERBLA.
137*> \endverbatim
138*>
139*> \param[out] RWORK
140*> \verbatim
141*>          RWORK is DOUBLE PRECISION array,
142*>                                         dimension (LRWORK)
143*>          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
144*> \endverbatim
145*>
146*> \param[in] LRWORK
147*> \verbatim
148*>          LRWORK is INTEGER
149*>          The dimension of the array RWORK.
150*>          If N <= 1,                LRWORK must be at least 1.
151*>          If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
152*>          If JOBZ  = 'V' and N > 1, LRWORK must be at least
153*>                         1 + 5*N + 2*N**2.
154*>
155*>          If LRWORK = -1, then a workspace query is assumed; the
156*>          routine only calculates the optimal sizes of the WORK, RWORK
157*>          and IWORK arrays, returns these values as the first entries
158*>          of the WORK, RWORK and IWORK arrays, and no error message
159*>          related to LWORK or LRWORK or LIWORK is issued by XERBLA.
160*> \endverbatim
161*>
162*> \param[out] IWORK
163*> \verbatim
164*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
165*>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
166*> \endverbatim
167*>
168*> \param[in] LIWORK
169*> \verbatim
170*>          LIWORK is INTEGER
171*>          The dimension of the array IWORK.
172*>          If N <= 1,                LIWORK must be at least 1.
173*>          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
174*>          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
175*>
176*>          If LIWORK = -1, then a workspace query is assumed; the
177*>          routine only calculates the optimal sizes of the WORK, RWORK
178*>          and IWORK arrays, returns these values as the first entries
179*>          of the WORK, RWORK and IWORK arrays, and no error message
180*>          related to LWORK or LRWORK or LIWORK is issued by XERBLA.
181*> \endverbatim
182*>
183*> \param[out] INFO
184*> \verbatim
185*>          INFO is INTEGER
186*>          = 0:  successful exit
187*>          < 0:  if INFO = -i, the i-th argument had an illegal value
188*>          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
189*>                to converge; i off-diagonal elements of an intermediate
190*>                tridiagonal form did not converge to zero;
191*>                if INFO = i and JOBZ = 'V', then the algorithm failed
192*>                to compute an eigenvalue while working on the submatrix
193*>                lying in rows and columns INFO/(N+1) through
194*>                mod(INFO,N+1).
195*> \endverbatim
196*
197*  Authors:
198*  ========
199*
200*> \author Univ. of Tennessee
201*> \author Univ. of California Berkeley
202*> \author Univ. of Colorado Denver
203*> \author NAG Ltd.
204*
205*> \date November 2017
206*
207*> \ingroup complex16HEeigen
208*
209*> \par Further Details:
210*  =====================
211*>
212*>  Modified description of INFO. Sven, 16 Feb 05.
213*
214*> \par Contributors:
215*  ==================
216*>
217*> Jeff Rutter, Computer Science Division, University of California
218*> at Berkeley, USA
219*>
220*> \par Further Details:
221*  =====================
222*>
223*> \verbatim
224*>
225*>  All details about the 2stage techniques are available in:
226*>
227*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
228*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
229*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
230*>  of 2011 International Conference for High Performance Computing,
231*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
232*>  Article 8 , 11 pages.
233*>  http://doi.acm.org/10.1145/2063384.2063394
234*>
235*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
236*>  An improved parallel singular value algorithm and its implementation
237*>  for multicore hardware, In Proceedings of 2013 International Conference
238*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
239*>  Denver, Colorado, USA, 2013.
240*>  Article 90, 12 pages.
241*>  http://doi.acm.org/10.1145/2503210.2503292
242*>
243*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
244*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
245*>  calculations based on fine-grained memory aware tasks.
246*>  International Journal of High Performance Computing Applications.
247*>  Volume 28 Issue 2, Pages 196-209, May 2014.
248*>  http://hpc.sagepub.com/content/28/2/196
249*>
250*> \endverbatim
251*
252*  =====================================================================
253      SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
254     $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
255*
256      IMPLICIT NONE
257*
258*  -- LAPACK driver routine (version 3.8.0) --
259*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
260*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
261*     November 2017
262*
263*     .. Scalar Arguments ..
264      CHARACTER          JOBZ, UPLO
265      INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
266*     ..
267*     .. Array Arguments ..
268      INTEGER            IWORK( * )
269      DOUBLE PRECISION   RWORK( * ), W( * )
270      COMPLEX*16         A( LDA, * ), WORK( * )
271*     ..
272*
273*  =====================================================================
274*
275*     .. Parameters ..
276      DOUBLE PRECISION   ZERO, ONE
277      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
278      COMPLEX*16         CONE
279      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
280*     ..
281*     .. Local Scalars ..
282      LOGICAL            LOWER, LQUERY, WANTZ
283      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
284     $                   INDWRK, ISCALE, LIWMIN, LLRWK, LLWORK,
285     $                   LLWRK2, LRWMIN, LWMIN,
286     $                   LHTRD, LWTRD, KD, IB, INDHOUS
287
288
289      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
290     $                   SMLNUM
291*     ..
292*     .. External Functions ..
293      LOGICAL            LSAME
294      INTEGER            ILAENV2STAGE
295      DOUBLE PRECISION   DLAMCH, ZLANHE
296      EXTERNAL           LSAME, DLAMCH, ZLANHE, ILAENV2STAGE
297*     ..
298*     .. External Subroutines ..
299      EXTERNAL           DSCAL, DSTERF, XERBLA, ZLACPY, ZLASCL,
300     $                   ZSTEDC, ZUNMTR, ZHETRD_2STAGE
301*     ..
302*     .. Intrinsic Functions ..
303      INTRINSIC          DBLE, MAX, SQRT
304*     ..
305*     .. Executable Statements ..
306*
307*     Test the input parameters.
308*
309      WANTZ = LSAME( JOBZ, 'V' )
310      LOWER = LSAME( UPLO, 'L' )
311      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
312*
313      INFO = 0
314      IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
315         INFO = -1
316      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
317         INFO = -2
318      ELSE IF( N.LT.0 ) THEN
319         INFO = -3
320      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
321         INFO = -5
322      END IF
323*
324      IF( INFO.EQ.0 ) THEN
325         IF( N.LE.1 ) THEN
326            LWMIN = 1
327            LRWMIN = 1
328            LIWMIN = 1
329         ELSE
330            KD    = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ,
331     $                            N, -1, -1, -1 )
332            IB    = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ,
333     $                            N, KD, -1, -1 )
334            LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ,
335     $                            N, KD, IB, -1 )
336            LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ,
337     $                            N, KD, IB, -1 )
338            IF( WANTZ ) THEN
339               LWMIN = 2*N + N*N
340               LRWMIN = 1 + 5*N + 2*N**2
341               LIWMIN = 3 + 5*N
342            ELSE
343               LWMIN = N + 1 + LHTRD + LWTRD
344               LRWMIN = N
345               LIWMIN = 1
346            END IF
347         END IF
348         WORK( 1 )  = LWMIN
349         RWORK( 1 ) = LRWMIN
350         IWORK( 1 ) = LIWMIN
351*
352         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
353            INFO = -8
354         ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
355            INFO = -10
356         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
357            INFO = -12
358         END IF
359      END IF
360*
361      IF( INFO.NE.0 ) THEN
362         CALL XERBLA( 'ZHEEVD_2STAGE', -INFO )
363         RETURN
364      ELSE IF( LQUERY ) THEN
365         RETURN
366      END IF
367*
368*     Quick return if possible
369*
370      IF( N.EQ.0 )
371     $   RETURN
372*
373      IF( N.EQ.1 ) THEN
374         W( 1 ) = DBLE( A( 1, 1 ) )
375         IF( WANTZ )
376     $      A( 1, 1 ) = CONE
377         RETURN
378      END IF
379*
380*     Get machine constants.
381*
382      SAFMIN = DLAMCH( 'Safe minimum' )
383      EPS    = DLAMCH( 'Precision' )
384      SMLNUM = SAFMIN / EPS
385      BIGNUM = ONE / SMLNUM
386      RMIN   = SQRT( SMLNUM )
387      RMAX   = SQRT( BIGNUM )
388*
389*     Scale matrix to allowable range, if necessary.
390*
391      ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK )
392      ISCALE = 0
393      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
394         ISCALE = 1
395         SIGMA = RMIN / ANRM
396      ELSE IF( ANRM.GT.RMAX ) THEN
397         ISCALE = 1
398         SIGMA = RMAX / ANRM
399      END IF
400      IF( ISCALE.EQ.1 )
401     $   CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
402*
403*     Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
404*
405      INDE    = 1
406      INDRWK  = INDE + N
407      LLRWK   = LRWORK - INDRWK + 1
408      INDTAU  = 1
409      INDHOUS = INDTAU + N
410      INDWRK  = INDHOUS + LHTRD
411      LLWORK  = LWORK - INDWRK + 1
412      INDWK2  = INDWRK + N*N
413      LLWRK2  = LWORK - INDWK2 + 1
414*
415      CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ),
416     $                    WORK( INDTAU ), WORK( INDHOUS ), LHTRD,
417     $                    WORK( INDWRK ), LLWORK, IINFO )
418*
419*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
420*     ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
421*     tridiagonal matrix, then call ZUNMTR to multiply it to the
422*     Householder transformations represented as Householder vectors in
423*     A.
424*
425      IF( .NOT.WANTZ ) THEN
426         CALL DSTERF( N, W, RWORK( INDE ), INFO )
427      ELSE
428         CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N,
429     $                WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK,
430     $                IWORK, LIWORK, INFO )
431         CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
432     $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
433         CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
434      END IF
435*
436*     If matrix was scaled, then rescale eigenvalues appropriately.
437*
438      IF( ISCALE.EQ.1 ) THEN
439         IF( INFO.EQ.0 ) THEN
440            IMAX = N
441         ELSE
442            IMAX = INFO - 1
443         END IF
444         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
445      END IF
446*
447      WORK( 1 )  = LWMIN
448      RWORK( 1 ) = LRWMIN
449      IWORK( 1 ) = LIWMIN
450*
451      RETURN
452*
453*     End of ZHEEVD_2STAGE
454*
455      END
456