1*> \brief <b> ZHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b>
2*
3*  @precisions fortran z -> s d c
4*
5*  =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8*            http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHBEVD_2STAGE + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbevd_2stage.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbevd_2stage.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbevd_2stage.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20*  Definition:
21*  ===========
22*
23*       SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ,
24*                                 WORK, LWORK, RWORK, LRWORK, IWORK,
25*                                 LIWORK, INFO )
26*
27*       IMPLICIT NONE
28*
29*       .. Scalar Arguments ..
30*       CHARACTER          JOBZ, UPLO
31*       INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N
32*       ..
33*       .. Array Arguments ..
34*       INTEGER            IWORK( * )
35*       DOUBLE PRECISION   RWORK( * ), W( * )
36*       COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * )
37*       ..
38*
39*
40*> \par Purpose:
41*  =============
42*>
43*> \verbatim
44*>
45*> ZHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of
46*> a complex Hermitian band matrix A using the 2stage technique for
47*> the reduction to tridiagonal.  If eigenvectors are desired, it
48*> uses a divide and conquer algorithm.
49*>
50*> The divide and conquer algorithm makes very mild assumptions about
51*> floating point arithmetic. It will work on machines with a guard
52*> digit in add/subtract, or on those binary machines without guard
53*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
54*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
55*> without guard digits, but we know of none.
56*> \endverbatim
57*
58*  Arguments:
59*  ==========
60*
61*> \param[in] JOBZ
62*> \verbatim
63*>          JOBZ is CHARACTER*1
64*>          = 'N':  Compute eigenvalues only;
65*>          = 'V':  Compute eigenvalues and eigenvectors.
66*>                  Not available in this release.
67*> \endverbatim
68*>
69*> \param[in] UPLO
70*> \verbatim
71*>          UPLO is CHARACTER*1
72*>          = 'U':  Upper triangle of A is stored;
73*>          = 'L':  Lower triangle of A is stored.
74*> \endverbatim
75*>
76*> \param[in] N
77*> \verbatim
78*>          N is INTEGER
79*>          The order of the matrix A.  N >= 0.
80*> \endverbatim
81*>
82*> \param[in] KD
83*> \verbatim
84*>          KD is INTEGER
85*>          The number of superdiagonals of the matrix A if UPLO = 'U',
86*>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
87*> \endverbatim
88*>
89*> \param[in,out] AB
90*> \verbatim
91*>          AB is COMPLEX*16 array, dimension (LDAB, N)
92*>          On entry, the upper or lower triangle of the Hermitian band
93*>          matrix A, stored in the first KD+1 rows of the array.  The
94*>          j-th column of A is stored in the j-th column of the array AB
95*>          as follows:
96*>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
97*>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
98*>
99*>          On exit, AB is overwritten by values generated during the
100*>          reduction to tridiagonal form.  If UPLO = 'U', the first
101*>          superdiagonal and the diagonal of the tridiagonal matrix T
102*>          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
103*>          the diagonal and first subdiagonal of T are returned in the
104*>          first two rows of AB.
105*> \endverbatim
106*>
107*> \param[in] LDAB
108*> \verbatim
109*>          LDAB is INTEGER
110*>          The leading dimension of the array AB.  LDAB >= KD + 1.
111*> \endverbatim
112*>
113*> \param[out] W
114*> \verbatim
115*>          W is DOUBLE PRECISION array, dimension (N)
116*>          If INFO = 0, the eigenvalues in ascending order.
117*> \endverbatim
118*>
119*> \param[out] Z
120*> \verbatim
121*>          Z is COMPLEX*16 array, dimension (LDZ, N)
122*>          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
123*>          eigenvectors of the matrix A, with the i-th column of Z
124*>          holding the eigenvector associated with W(i).
125*>          If JOBZ = 'N', then Z is not referenced.
126*> \endverbatim
127*>
128*> \param[in] LDZ
129*> \verbatim
130*>          LDZ is INTEGER
131*>          The leading dimension of the array Z.  LDZ >= 1, and if
132*>          JOBZ = 'V', LDZ >= max(1,N).
133*> \endverbatim
134*>
135*> \param[out] WORK
136*> \verbatim
137*>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
138*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
139*> \endverbatim
140*>
141*> \param[in] LWORK
142*> \verbatim
143*>          LWORK is INTEGER
144*>          The length of the array WORK. LWORK >= 1, when N <= 1;
145*>          otherwise
146*>          If JOBZ = 'N' and N > 1, LWORK must be queried.
147*>                                   LWORK = MAX(1, dimension) where
148*>                                   dimension = (2KD+1)*N + KD*NTHREADS
149*>                                   where KD is the size of the band.
150*>                                   NTHREADS is the number of threads used when
151*>                                   openMP compilation is enabled, otherwise =1.
152*>          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available.
153*>
154*>          If LWORK = -1, then a workspace query is assumed; the routine
155*>          only calculates the optimal sizes of the WORK, RWORK and
156*>          IWORK arrays, returns these values as the first entries of
157*>          the WORK, RWORK and IWORK arrays, and no error message
158*>          related to LWORK or LRWORK or LIWORK is issued by XERBLA.
159*> \endverbatim
160*>
161*> \param[out] RWORK
162*> \verbatim
163*>          RWORK is DOUBLE PRECISION array,
164*>                                         dimension (LRWORK)
165*>          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
166*> \endverbatim
167*>
168*> \param[in] LRWORK
169*> \verbatim
170*>          LRWORK is INTEGER
171*>          The dimension of array RWORK.
172*>          If N <= 1,               LRWORK must be at least 1.
173*>          If JOBZ = 'N' and N > 1, LRWORK must be at least N.
174*>          If JOBZ = 'V' and N > 1, LRWORK must be at least
175*>                        1 + 5*N + 2*N**2.
176*>
177*>          If LRWORK = -1, then a workspace query is assumed; the
178*>          routine only calculates the optimal sizes of the WORK, RWORK
179*>          and IWORK arrays, returns these values as the first entries
180*>          of the WORK, RWORK and IWORK arrays, and no error message
181*>          related to LWORK or LRWORK or LIWORK is issued by XERBLA.
182*> \endverbatim
183*>
184*> \param[out] IWORK
185*> \verbatim
186*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
187*>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
188*> \endverbatim
189*>
190*> \param[in] LIWORK
191*> \verbatim
192*>          LIWORK is INTEGER
193*>          The dimension of array IWORK.
194*>          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1.
195*>          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N .
196*>
197*>          If LIWORK = -1, then a workspace query is assumed; the
198*>          routine only calculates the optimal sizes of the WORK, RWORK
199*>          and IWORK arrays, returns these values as the first entries
200*>          of the WORK, RWORK and IWORK arrays, and no error message
201*>          related to LWORK or LRWORK or LIWORK is issued by XERBLA.
202*> \endverbatim
203*>
204*> \param[out] INFO
205*> \verbatim
206*>          INFO is INTEGER
207*>          = 0:  successful exit.
208*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
209*>          > 0:  if INFO = i, the algorithm failed to converge; i
210*>                off-diagonal elements of an intermediate tridiagonal
211*>                form did not converge to zero.
212*> \endverbatim
213*
214*  Authors:
215*  ========
216*
217*> \author Univ. of Tennessee
218*> \author Univ. of California Berkeley
219*> \author Univ. of Colorado Denver
220*> \author NAG Ltd.
221*
222*> \date November 2017
223*
224*> \ingroup complex16OTHEReigen
225*
226*> \par Further Details:
227*  =====================
228*>
229*> \verbatim
230*>
231*>  All details about the 2stage techniques are available in:
232*>
233*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
234*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
235*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
236*>  of 2011 International Conference for High Performance Computing,
237*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
238*>  Article 8 , 11 pages.
239*>  http://doi.acm.org/10.1145/2063384.2063394
240*>
241*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
242*>  An improved parallel singular value algorithm and its implementation
243*>  for multicore hardware, In Proceedings of 2013 International Conference
244*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
245*>  Denver, Colorado, USA, 2013.
246*>  Article 90, 12 pages.
247*>  http://doi.acm.org/10.1145/2503210.2503292
248*>
249*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
250*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
251*>  calculations based on fine-grained memory aware tasks.
252*>  International Journal of High Performance Computing Applications.
253*>  Volume 28 Issue 2, Pages 196-209, May 2014.
254*>  http://hpc.sagepub.com/content/28/2/196
255*>
256*> \endverbatim
257*
258*  =====================================================================
259      SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ,
260     $                          WORK, LWORK, RWORK, LRWORK, IWORK,
261     $                          LIWORK, INFO )
262*
263      IMPLICIT NONE
264*
265*  -- LAPACK driver routine (version 3.8.0) --
266*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
267*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
268*     November 2017
269*
270*     .. Scalar Arguments ..
271      CHARACTER          JOBZ, UPLO
272      INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N
273*     ..
274*     .. Array Arguments ..
275      INTEGER            IWORK( * )
276      DOUBLE PRECISION   RWORK( * ), W( * )
277      COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * )
278*     ..
279*
280*  =====================================================================
281*
282*     .. Parameters ..
283      DOUBLE PRECISION   ZERO, ONE
284      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
285      COMPLEX*16         CZERO, CONE
286      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
287     $                   CONE = ( 1.0D0, 0.0D0 ) )
288*     ..
289*     .. Local Scalars ..
290      LOGICAL            LOWER, LQUERY, WANTZ
291      INTEGER            IINFO, IMAX, INDE, INDWK2, INDRWK, ISCALE,
292     $                   LLWORK, INDWK, LHTRD, LWTRD, IB, INDHOUS,
293     $                   LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN
294      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
295     $                   SMLNUM
296*     ..
297*     .. External Functions ..
298      LOGICAL            LSAME
299      INTEGER            ILAENV2STAGE
300      DOUBLE PRECISION   DLAMCH, ZLANHB
301      EXTERNAL           LSAME, DLAMCH, ZLANHB, ILAENV2STAGE
302*     ..
303*     .. External Subroutines ..
304      EXTERNAL           DSCAL, DSTERF, XERBLA, ZGEMM, ZLACPY,
305     $                   ZLASCL, ZSTEDC, ZHETRD_HB2ST
306*     ..
307*     .. Intrinsic Functions ..
308      INTRINSIC          DBLE, SQRT
309*     ..
310*     .. Executable Statements ..
311*
312*     Test the input parameters.
313*
314      WANTZ = LSAME( JOBZ, 'V' )
315      LOWER = LSAME( UPLO, 'L' )
316      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 )
317*
318      INFO = 0
319      IF( N.LE.1 ) THEN
320         LWMIN = 1
321         LRWMIN = 1
322         LIWMIN = 1
323      ELSE
324         IB    = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 )
325         LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 )
326         LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 )
327         IF( WANTZ ) THEN
328            LWMIN = 2*N**2
329            LRWMIN = 1 + 5*N + 2*N**2
330            LIWMIN = 3 + 5*N
331         ELSE
332            LWMIN  = MAX( N, LHTRD + LWTRD )
333            LRWMIN = N
334            LIWMIN = 1
335         END IF
336      END IF
337      IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
338         INFO = -1
339      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
340         INFO = -2
341      ELSE IF( N.LT.0 ) THEN
342         INFO = -3
343      ELSE IF( KD.LT.0 ) THEN
344         INFO = -4
345      ELSE IF( LDAB.LT.KD+1 ) THEN
346         INFO = -6
347      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
348         INFO = -9
349      END IF
350*
351      IF( INFO.EQ.0 ) THEN
352         WORK( 1 )  = LWMIN
353         RWORK( 1 ) = LRWMIN
354         IWORK( 1 ) = LIWMIN
355*
356         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
357            INFO = -11
358         ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
359            INFO = -13
360         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
361            INFO = -15
362         END IF
363      END IF
364*
365      IF( INFO.NE.0 ) THEN
366         CALL XERBLA( 'ZHBEVD_2STAGE', -INFO )
367         RETURN
368      ELSE IF( LQUERY ) THEN
369         RETURN
370      END IF
371*
372*     Quick return if possible
373*
374      IF( N.EQ.0 )
375     $   RETURN
376*
377      IF( N.EQ.1 ) THEN
378         W( 1 ) = DBLE( AB( 1, 1 ) )
379         IF( WANTZ )
380     $      Z( 1, 1 ) = CONE
381         RETURN
382      END IF
383*
384*     Get machine constants.
385*
386      SAFMIN = DLAMCH( 'Safe minimum' )
387      EPS    = DLAMCH( 'Precision' )
388      SMLNUM = SAFMIN / EPS
389      BIGNUM = ONE / SMLNUM
390      RMIN   = SQRT( SMLNUM )
391      RMAX   = SQRT( BIGNUM )
392*
393*     Scale matrix to allowable range, if necessary.
394*
395      ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
396      ISCALE = 0
397      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
398         ISCALE = 1
399         SIGMA = RMIN / ANRM
400      ELSE IF( ANRM.GT.RMAX ) THEN
401         ISCALE = 1
402         SIGMA = RMAX / ANRM
403      END IF
404      IF( ISCALE.EQ.1 ) THEN
405         IF( LOWER ) THEN
406            CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
407         ELSE
408            CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
409         END IF
410      END IF
411*
412*     Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form.
413*
414      INDE    = 1
415      INDRWK  = INDE + N
416      LLRWK   = LRWORK - INDRWK + 1
417      INDHOUS = 1
418      INDWK   = INDHOUS + LHTRD
419      LLWORK  = LWORK - INDWK + 1
420      INDWK2  = INDWK + N*N
421      LLWK2   = LWORK - INDWK2 + 1
422*
423      CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W,
424     $                    RWORK( INDE ), WORK( INDHOUS ), LHTRD,
425     $                    WORK( INDWK ), LLWORK, IINFO )
426*
427*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEDC.
428*
429      IF( .NOT.WANTZ ) THEN
430         CALL DSTERF( N, W, RWORK( INDE ), INFO )
431      ELSE
432         CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ),
433     $                LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK,
434     $                INFO )
435         CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO,
436     $               WORK( INDWK2 ), N )
437         CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
438      END IF
439*
440*     If matrix was scaled, then rescale eigenvalues appropriately.
441*
442      IF( ISCALE.EQ.1 ) THEN
443         IF( INFO.EQ.0 ) THEN
444            IMAX = N
445         ELSE
446            IMAX = INFO - 1
447         END IF
448         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
449      END IF
450*
451      WORK( 1 )  = LWMIN
452      RWORK( 1 ) = LRWMIN
453      IWORK( 1 ) = LIWMIN
454      RETURN
455*
456*     End of ZHBEVD_2STAGE
457*
458      END
459