1*> \brief <b> ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2*
3*  @precisions fortran z -> s d c
4*
5*  =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8*            http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHEEV_2STAGE + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev_2stage.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev_2stage.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev_2stage.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20*  Definition:
21*  ===========
22*
23*       SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24*                                RWORK, INFO )
25*
26*       IMPLICIT NONE
27*
28*       .. Scalar Arguments ..
29*       CHARACTER          JOBZ, UPLO
30*       INTEGER            INFO, LDA, LWORK, N
31*       ..
32*       .. Array Arguments ..
33*       DOUBLE PRECISION   RWORK( * ), W( * )
34*       COMPLEX*16         A( LDA, * ), WORK( * )
35*       ..
36*
37*
38*> \par Purpose:
39*  =============
40*>
41*> \verbatim
42*>
43*> ZHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
44*> complex Hermitian matrix A using the 2stage technique for
45*> the reduction to tridiagonal.
46*> \endverbatim
47*
48*  Arguments:
49*  ==========
50*
51*> \param[in] JOBZ
52*> \verbatim
53*>          JOBZ is CHARACTER*1
54*>          = 'N':  Compute eigenvalues only;
55*>          = 'V':  Compute eigenvalues and eigenvectors.
56*>                  Not available in this release.
57*> \endverbatim
58*>
59*> \param[in] UPLO
60*> \verbatim
61*>          UPLO is CHARACTER*1
62*>          = 'U':  Upper triangle of A is stored;
63*>          = 'L':  Lower triangle of A is stored.
64*> \endverbatim
65*>
66*> \param[in] N
67*> \verbatim
68*>          N is INTEGER
69*>          The order of the matrix A.  N >= 0.
70*> \endverbatim
71*>
72*> \param[in,out] A
73*> \verbatim
74*>          A is COMPLEX*16 array, dimension (LDA, N)
75*>          On entry, the Hermitian matrix A.  If UPLO = 'U', the
76*>          leading N-by-N upper triangular part of A contains the
77*>          upper triangular part of the matrix A.  If UPLO = 'L',
78*>          the leading N-by-N lower triangular part of A contains
79*>          the lower triangular part of the matrix A.
80*>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
81*>          orthonormal eigenvectors of the matrix A.
82*>          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
83*>          or the upper triangle (if UPLO='U') of A, including the
84*>          diagonal, is destroyed.
85*> \endverbatim
86*>
87*> \param[in] LDA
88*> \verbatim
89*>          LDA is INTEGER
90*>          The leading dimension of the array A.  LDA >= max(1,N).
91*> \endverbatim
92*>
93*> \param[out] W
94*> \verbatim
95*>          W is DOUBLE PRECISION array, dimension (N)
96*>          If INFO = 0, the eigenvalues in ascending order.
97*> \endverbatim
98*>
99*> \param[out] WORK
100*> \verbatim
101*>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
102*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
103*> \endverbatim
104*>
105*> \param[in] LWORK
106*> \verbatim
107*>          LWORK is INTEGER
108*>          The length of the array WORK. LWORK >= 1, when N <= 1;
109*>          otherwise
110*>          If JOBZ = 'N' and N > 1, LWORK must be queried.
111*>                                   LWORK = MAX(1, dimension) where
112*>                                   dimension = max(stage1,stage2) + (KD+1)*N + N
113*>                                             = N*KD + N*max(KD+1,FACTOPTNB)
114*>                                               + max(2*KD*KD, KD*NTHREADS)
115*>                                               + (KD+1)*N + N
116*>                                   where KD is the blocking size of the reduction,
117*>                                   FACTOPTNB is the blocking used by the QR or LQ
118*>                                   algorithm, usually FACTOPTNB=128 is a good choice
119*>                                   NTHREADS is the number of threads used when
120*>                                   openMP compilation is enabled, otherwise =1.
121*>          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available
122*>
123*>          If LWORK = -1, then a workspace query is assumed; the routine
124*>          only calculates the optimal size of the WORK array, returns
125*>          this value as the first entry of the WORK array, and no error
126*>          message related to LWORK is issued by XERBLA.
127*> \endverbatim
128*>
129*> \param[out] RWORK
130*> \verbatim
131*>          RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2))
132*> \endverbatim
133*>
134*> \param[out] INFO
135*> \verbatim
136*>          INFO is INTEGER
137*>          = 0:  successful exit
138*>          < 0:  if INFO = -i, the i-th argument had an illegal value
139*>          > 0:  if INFO = i, the algorithm failed to converge; i
140*>                off-diagonal elements of an intermediate tridiagonal
141*>                form did not converge to zero.
142*> \endverbatim
143*
144*  Authors:
145*  ========
146*
147*> \author Univ. of Tennessee
148*> \author Univ. of California Berkeley
149*> \author Univ. of Colorado Denver
150*> \author NAG Ltd.
151*
152*> \ingroup complex16HEeigen
153*
154*> \par Further Details:
155*  =====================
156*>
157*> \verbatim
158*>
159*>  All details about the 2stage techniques are available in:
160*>
161*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
162*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
163*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
164*>  of 2011 International Conference for High Performance Computing,
165*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
166*>  Article 8 , 11 pages.
167*>  http://doi.acm.org/10.1145/2063384.2063394
168*>
169*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
170*>  An improved parallel singular value algorithm and its implementation
171*>  for multicore hardware, In Proceedings of 2013 International Conference
172*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
173*>  Denver, Colorado, USA, 2013.
174*>  Article 90, 12 pages.
175*>  http://doi.acm.org/10.1145/2503210.2503292
176*>
177*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
178*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
179*>  calculations based on fine-grained memory aware tasks.
180*>  International Journal of High Performance Computing Applications.
181*>  Volume 28 Issue 2, Pages 196-209, May 2014.
182*>  http://hpc.sagepub.com/content/28/2/196
183*>
184*> \endverbatim
185*
186*  =====================================================================
187      SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
188     $                         RWORK, INFO )
189*
190      IMPLICIT NONE
191*
192*  -- LAPACK driver routine --
193*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
194*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
195*
196*     .. Scalar Arguments ..
197      CHARACTER          JOBZ, UPLO
198      INTEGER            INFO, LDA, LWORK, N
199*     ..
200*     .. Array Arguments ..
201      DOUBLE PRECISION   RWORK( * ), W( * )
202      COMPLEX*16         A( LDA, * ), WORK( * )
203*     ..
204*
205*  =====================================================================
206*
207*     .. Parameters ..
208      DOUBLE PRECISION   ZERO, ONE
209      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
210      COMPLEX*16         CONE
211      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
212*     ..
213*     .. Local Scalars ..
214      LOGICAL            LOWER, LQUERY, WANTZ
215      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE,
216     $                   LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS
217      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
218     $                   SMLNUM
219*     ..
220*     .. External Functions ..
221      LOGICAL            LSAME
222      INTEGER            ILAENV2STAGE
223      DOUBLE PRECISION   DLAMCH, ZLANHE
224      EXTERNAL           LSAME, DLAMCH, ZLANHE, ILAENV2STAGE
225*     ..
226*     .. External Subroutines ..
227      EXTERNAL           DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR,
228     $                   ZUNGTR, ZHETRD_2STAGE
229*     ..
230*     .. Intrinsic Functions ..
231      INTRINSIC          DBLE, MAX, SQRT
232*     ..
233*     .. Executable Statements ..
234*
235*     Test the input parameters.
236*
237      WANTZ = LSAME( JOBZ, 'V' )
238      LOWER = LSAME( UPLO, 'L' )
239      LQUERY = ( LWORK.EQ.-1 )
240*
241      INFO = 0
242      IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
243         INFO = -1
244      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
245         INFO = -2
246      ELSE IF( N.LT.0 ) THEN
247         INFO = -3
248      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
249         INFO = -5
250      END IF
251*
252      IF( INFO.EQ.0 ) THEN
253         KD    = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 )
254         IB    = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 )
255         LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
256         LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
257         LWMIN = N + LHTRD + LWTRD
258         WORK( 1 )  = LWMIN
259*
260         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY )
261     $      INFO = -8
262      END IF
263*
264      IF( INFO.NE.0 ) THEN
265         CALL XERBLA( 'ZHEEV_2STAGE ', -INFO )
266         RETURN
267      ELSE IF( LQUERY ) THEN
268         RETURN
269      END IF
270*
271*     Quick return if possible
272*
273      IF( N.EQ.0 ) THEN
274         RETURN
275      END IF
276*
277      IF( N.EQ.1 ) THEN
278         W( 1 ) = DBLE( A( 1, 1 ) )
279         WORK( 1 ) = 1
280         IF( WANTZ )
281     $      A( 1, 1 ) = CONE
282         RETURN
283      END IF
284*
285*     Get machine constants.
286*
287      SAFMIN = DLAMCH( 'Safe minimum' )
288      EPS    = DLAMCH( 'Precision' )
289      SMLNUM = SAFMIN / EPS
290      BIGNUM = ONE / SMLNUM
291      RMIN   = SQRT( SMLNUM )
292      RMAX   = SQRT( BIGNUM )
293*
294*     Scale matrix to allowable range, if necessary.
295*
296      ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK )
297      ISCALE = 0
298      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
299         ISCALE = 1
300         SIGMA = RMIN / ANRM
301      ELSE IF( ANRM.GT.RMAX ) THEN
302         ISCALE = 1
303         SIGMA = RMAX / ANRM
304      END IF
305      IF( ISCALE.EQ.1 )
306     $   CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
307*
308*     Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
309*
310      INDE    = 1
311      INDTAU  = 1
312      INDHOUS = INDTAU + N
313      INDWRK  = INDHOUS + LHTRD
314      LLWORK  = LWORK - INDWRK + 1
315*
316      CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ),
317     $                    WORK( INDTAU ), WORK( INDHOUS ), LHTRD,
318     $                    WORK( INDWRK ), LLWORK, IINFO )
319*
320*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
321*     ZUNGTR to generate the unitary matrix, then call ZSTEQR.
322*
323      IF( .NOT.WANTZ ) THEN
324         CALL DSTERF( N, W, RWORK( INDE ), INFO )
325      ELSE
326         CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ),
327     $                LLWORK, IINFO )
328         INDWRK = INDE + N
329         CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA,
330     $                RWORK( INDWRK ), INFO )
331      END IF
332*
333*     If matrix was scaled, then rescale eigenvalues appropriately.
334*
335      IF( ISCALE.EQ.1 ) THEN
336         IF( INFO.EQ.0 ) THEN
337            IMAX = N
338         ELSE
339            IMAX = INFO - 1
340         END IF
341         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
342      END IF
343*
344*     Set WORK(1) to optimal complex workspace size.
345*
346      WORK( 1 ) = LWMIN
347*
348      RETURN
349*
350*     End of ZHEEV_2STAGE
351*
352      END
353