1*> \brief <b> ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2*
3*  @precisions fortran z -> s d c
4*
5*  =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8*            http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHEEV_2STAGE + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev_2stage.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev_2stage.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev_2stage.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20*  Definition:
21*  ===========
22*
23*       SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24*                                RWORK, INFO )
25*
26*       IMPLICIT NONE
27*
28*       .. Scalar Arguments ..
29*       CHARACTER          JOBZ, UPLO
30*       INTEGER            INFO, LDA, LWORK, N
31*       ..
32*       .. Array Arguments ..
33*       DOUBLE PRECISION   RWORK( * ), W( * )
34*       COMPLEX*16         A( LDA, * ), WORK( * )
35*       ..
36*
37*
38*> \par Purpose:
39*  =============
40*>
41*> \verbatim
42*>
43*> ZHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
44*> complex Hermitian matrix A using the 2stage technique for
45*> the reduction to tridiagonal.
46*> \endverbatim
47*
48*  Arguments:
49*  ==========
50*
51*> \param[in] JOBZ
52*> \verbatim
53*>          JOBZ is CHARACTER*1
54*>          = 'N':  Compute eigenvalues only;
55*>          = 'V':  Compute eigenvalues and eigenvectors.
56*>                  Not available in this release.
57*> \endverbatim
58*>
59*> \param[in] UPLO
60*> \verbatim
61*>          UPLO is CHARACTER*1
62*>          = 'U':  Upper triangle of A is stored;
63*>          = 'L':  Lower triangle of A is stored.
64*> \endverbatim
65*>
66*> \param[in] N
67*> \verbatim
68*>          N is INTEGER
69*>          The order of the matrix A.  N >= 0.
70*> \endverbatim
71*>
72*> \param[in,out] A
73*> \verbatim
74*>          A is COMPLEX*16 array, dimension (LDA, N)
75*>          On entry, the Hermitian matrix A.  If UPLO = 'U', the
76*>          leading N-by-N upper triangular part of A contains the
77*>          upper triangular part of the matrix A.  If UPLO = 'L',
78*>          the leading N-by-N lower triangular part of A contains
79*>          the lower triangular part of the matrix A.
80*>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
81*>          orthonormal eigenvectors of the matrix A.
82*>          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
83*>          or the upper triangle (if UPLO='U') of A, including the
84*>          diagonal, is destroyed.
85*> \endverbatim
86*>
87*> \param[in] LDA
88*> \verbatim
89*>          LDA is INTEGER
90*>          The leading dimension of the array A.  LDA >= max(1,N).
91*> \endverbatim
92*>
93*> \param[out] W
94*> \verbatim
95*>          W is DOUBLE PRECISION array, dimension (N)
96*>          If INFO = 0, the eigenvalues in ascending order.
97*> \endverbatim
98*>
99*> \param[out] WORK
100*> \verbatim
101*>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
102*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
103*> \endverbatim
104*>
105*> \param[in] LWORK
106*> \verbatim
107*>          LWORK is INTEGER
108*>          The length of the array WORK. LWORK >= 1, when N <= 1;
109*>          otherwise
110*>          If JOBZ = 'N' and N > 1, LWORK must be queried.
111*>                                   LWORK = MAX(1, dimension) where
112*>                                   dimension = max(stage1,stage2) + (KD+1)*N + N
113*>                                             = N*KD + N*max(KD+1,FACTOPTNB)
114*>                                               + max(2*KD*KD, KD*NTHREADS)
115*>                                               + (KD+1)*N + N
116*>                                   where KD is the blocking size of the reduction,
117*>                                   FACTOPTNB is the blocking used by the QR or LQ
118*>                                   algorithm, usually FACTOPTNB=128 is a good choice
119*>                                   NTHREADS is the number of threads used when
120*>                                   openMP compilation is enabled, otherwise =1.
121*>          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available
122*>
123*>          If LWORK = -1, then a workspace query is assumed; the routine
124*>          only calculates the optimal size of the WORK array, returns
125*>          this value as the first entry of the WORK array, and no error
126*>          message related to LWORK is issued by XERBLA.
127*> \endverbatim
128*>
129*> \param[out] RWORK
130*> \verbatim
131*>          RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2))
132*> \endverbatim
133*>
134*> \param[out] INFO
135*> \verbatim
136*>          INFO is INTEGER
137*>          = 0:  successful exit
138*>          < 0:  if INFO = -i, the i-th argument had an illegal value
139*>          > 0:  if INFO = i, the algorithm failed to converge; i
140*>                off-diagonal elements of an intermediate tridiagonal
141*>                form did not converge to zero.
142*> \endverbatim
143*
144*  Authors:
145*  ========
146*
147*> \author Univ. of Tennessee
148*> \author Univ. of California Berkeley
149*> \author Univ. of Colorado Denver
150*> \author NAG Ltd.
151*
152*> \date November 2017
153*
154*> \ingroup complex16HEeigen
155*
156*> \par Further Details:
157*  =====================
158*>
159*> \verbatim
160*>
161*>  All details about the 2stage techniques are available in:
162*>
163*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
164*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
165*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
166*>  of 2011 International Conference for High Performance Computing,
167*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
168*>  Article 8 , 11 pages.
169*>  http://doi.acm.org/10.1145/2063384.2063394
170*>
171*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
172*>  An improved parallel singular value algorithm and its implementation
173*>  for multicore hardware, In Proceedings of 2013 International Conference
174*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
175*>  Denver, Colorado, USA, 2013.
176*>  Article 90, 12 pages.
177*>  http://doi.acm.org/10.1145/2503210.2503292
178*>
179*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
180*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
181*>  calculations based on fine-grained memory aware tasks.
182*>  International Journal of High Performance Computing Applications.
183*>  Volume 28 Issue 2, Pages 196-209, May 2014.
184*>  http://hpc.sagepub.com/content/28/2/196
185*>
186*> \endverbatim
187*
188*  =====================================================================
189      SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
190     $                         RWORK, INFO )
191*
192      IMPLICIT NONE
193*
194*  -- LAPACK driver routine (version 3.8.0) --
195*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
196*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
197*     November 2017
198*
199*     .. Scalar Arguments ..
200      CHARACTER          JOBZ, UPLO
201      INTEGER            INFO, LDA, LWORK, N
202*     ..
203*     .. Array Arguments ..
204      DOUBLE PRECISION   RWORK( * ), W( * )
205      COMPLEX*16         A( LDA, * ), WORK( * )
206*     ..
207*
208*  =====================================================================
209*
210*     .. Parameters ..
211      DOUBLE PRECISION   ZERO, ONE
212      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
213      COMPLEX*16         CONE
214      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
215*     ..
216*     .. Local Scalars ..
217      LOGICAL            LOWER, LQUERY, WANTZ
218      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE,
219     $                   LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS
220      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
221     $                   SMLNUM
222*     ..
223*     .. External Functions ..
224      LOGICAL            LSAME
225      INTEGER            ILAENV2STAGE
226      DOUBLE PRECISION   DLAMCH, ZLANHE
227      EXTERNAL           LSAME, DLAMCH, ZLANHE, ILAENV2STAGE
228*     ..
229*     .. External Subroutines ..
230      EXTERNAL           DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR,
231     $                   ZUNGTR, ZHETRD_2STAGE
232*     ..
233*     .. Intrinsic Functions ..
234      INTRINSIC          DBLE, MAX, SQRT
235*     ..
236*     .. Executable Statements ..
237*
238*     Test the input parameters.
239*
240      WANTZ = LSAME( JOBZ, 'V' )
241      LOWER = LSAME( UPLO, 'L' )
242      LQUERY = ( LWORK.EQ.-1 )
243*
244      INFO = 0
245      IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
246         INFO = -1
247      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
248         INFO = -2
249      ELSE IF( N.LT.0 ) THEN
250         INFO = -3
251      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
252         INFO = -5
253      END IF
254*
255      IF( INFO.EQ.0 ) THEN
256         KD    = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 )
257         IB    = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 )
258         LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
259         LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
260         LWMIN = N + LHTRD + LWTRD
261         WORK( 1 )  = LWMIN
262*
263         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY )
264     $      INFO = -8
265      END IF
266*
267      IF( INFO.NE.0 ) THEN
268         CALL XERBLA( 'ZHEEV_2STAGE ', -INFO )
269         RETURN
270      ELSE IF( LQUERY ) THEN
271         RETURN
272      END IF
273*
274*     Quick return if possible
275*
276      IF( N.EQ.0 ) THEN
277         RETURN
278      END IF
279*
280      IF( N.EQ.1 ) THEN
281         W( 1 ) = DBLE( A( 1, 1 ) )
282         WORK( 1 ) = 1
283         IF( WANTZ )
284     $      A( 1, 1 ) = CONE
285         RETURN
286      END IF
287*
288*     Get machine constants.
289*
290      SAFMIN = DLAMCH( 'Safe minimum' )
291      EPS    = DLAMCH( 'Precision' )
292      SMLNUM = SAFMIN / EPS
293      BIGNUM = ONE / SMLNUM
294      RMIN   = SQRT( SMLNUM )
295      RMAX   = SQRT( BIGNUM )
296*
297*     Scale matrix to allowable range, if necessary.
298*
299      ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK )
300      ISCALE = 0
301      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
302         ISCALE = 1
303         SIGMA = RMIN / ANRM
304      ELSE IF( ANRM.GT.RMAX ) THEN
305         ISCALE = 1
306         SIGMA = RMAX / ANRM
307      END IF
308      IF( ISCALE.EQ.1 )
309     $   CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
310*
311*     Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
312*
313      INDE    = 1
314      INDTAU  = 1
315      INDHOUS = INDTAU + N
316      INDWRK  = INDHOUS + LHTRD
317      LLWORK  = LWORK - INDWRK + 1
318*
319      CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ),
320     $                    WORK( INDTAU ), WORK( INDHOUS ), LHTRD,
321     $                    WORK( INDWRK ), LLWORK, IINFO )
322*
323*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
324*     ZUNGTR to generate the unitary matrix, then call ZSTEQR.
325*
326      IF( .NOT.WANTZ ) THEN
327         CALL DSTERF( N, W, RWORK( INDE ), INFO )
328      ELSE
329         CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ),
330     $                LLWORK, IINFO )
331         INDWRK = INDE + N
332         CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA,
333     $                RWORK( INDWRK ), INFO )
334      END IF
335*
336*     If matrix was scaled, then rescale eigenvalues appropriately.
337*
338      IF( ISCALE.EQ.1 ) THEN
339         IF( INFO.EQ.0 ) THEN
340            IMAX = N
341         ELSE
342            IMAX = INFO - 1
343         END IF
344         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
345      END IF
346*
347*     Set WORK(1) to optimal complex workspace size.
348*
349      WORK( 1 ) = LWMIN
350*
351      RETURN
352*
353*     End of ZHEEV_2STAGE
354*
355      END
356