1*> \brief <b> DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b>
2*
3*  @precisions fortran d -> s
4*
5*  =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8*            http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download DSYEVD_2STAGE + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd_2stage.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd_2stage.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd_2stage.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20*  Definition:
21*  ===========
22*
23*       SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24*                                IWORK, LIWORK, INFO )
25*
26*       IMPLICIT NONE
27*
28*       .. Scalar Arguments ..
29*       CHARACTER          JOBZ, UPLO
30*       INTEGER            INFO, LDA, LIWORK, LWORK, N
31*       ..
32*       .. Array Arguments ..
33*       INTEGER            IWORK( * )
34*       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
35*       ..
36*
37*
38*> \par Purpose:
39*  =============
40*>
41*> \verbatim
42*>
43*> DSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
44*> real symmetric matrix A using the 2stage technique for
45*> the reduction to tridiagonal. If eigenvectors are desired, it uses a
46*> divide and conquer algorithm.
47*>
48*> The divide and conquer algorithm makes very mild assumptions about
49*> floating point arithmetic. It will work on machines with a guard
50*> digit in add/subtract, or on those binary machines without guard
51*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
52*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
53*> without guard digits, but we know of none.
54*> \endverbatim
55*
56*  Arguments:
57*  ==========
58*
59*> \param[in] JOBZ
60*> \verbatim
61*>          JOBZ is CHARACTER*1
62*>          = 'N':  Compute eigenvalues only;
63*>          = 'V':  Compute eigenvalues and eigenvectors.
64*>                  Not available in this release.
65*> \endverbatim
66*>
67*> \param[in] UPLO
68*> \verbatim
69*>          UPLO is CHARACTER*1
70*>          = 'U':  Upper triangle of A is stored;
71*>          = 'L':  Lower triangle of A is stored.
72*> \endverbatim
73*>
74*> \param[in] N
75*> \verbatim
76*>          N is INTEGER
77*>          The order of the matrix A.  N >= 0.
78*> \endverbatim
79*>
80*> \param[in,out] A
81*> \verbatim
82*>          A is DOUBLE PRECISION array, dimension (LDA, N)
83*>          On entry, the symmetric matrix A.  If UPLO = 'U', the
84*>          leading N-by-N upper triangular part of A contains the
85*>          upper triangular part of the matrix A.  If UPLO = 'L',
86*>          the leading N-by-N lower triangular part of A contains
87*>          the lower triangular part of the matrix A.
88*>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
89*>          orthonormal eigenvectors of the matrix A.
90*>          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
91*>          or the upper triangle (if UPLO='U') of A, including the
92*>          diagonal, is destroyed.
93*> \endverbatim
94*>
95*> \param[in] LDA
96*> \verbatim
97*>          LDA is INTEGER
98*>          The leading dimension of the array A.  LDA >= max(1,N).
99*> \endverbatim
100*>
101*> \param[out] W
102*> \verbatim
103*>          W is DOUBLE PRECISION array, dimension (N)
104*>          If INFO = 0, the eigenvalues in ascending order.
105*> \endverbatim
106*>
107*> \param[out] WORK
108*> \verbatim
109*>          WORK is DOUBLE PRECISION array,
110*>                                         dimension (LWORK)
111*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
112*> \endverbatim
113*>
114*> \param[in] LWORK
115*> \verbatim
116*>          LWORK is INTEGER
117*>          The dimension of the array WORK.
118*>          If N <= 1,               LWORK must be at least 1.
119*>          If JOBZ = 'N' and N > 1, LWORK must be queried.
120*>                                   LWORK = MAX(1, dimension) where
121*>                                   dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1
122*>                                             = N*KD + N*max(KD+1,FACTOPTNB)
123*>                                               + max(2*KD*KD, KD*NTHREADS)
124*>                                               + (KD+1)*N + 2*N+1
125*>                                   where KD is the blocking size of the reduction,
126*>                                   FACTOPTNB is the blocking used by the QR or LQ
127*>                                   algorithm, usually FACTOPTNB=128 is a good choice
128*>                                   NTHREADS is the number of threads used when
129*>                                   openMP compilation is enabled, otherwise =1.
130*>          If JOBZ = 'V' and N > 1, LWORK must be at least
131*>                                                1 + 6*N + 2*N**2.
132*>
133*>          If LWORK = -1, then a workspace query is assumed; the routine
134*>          only calculates the optimal sizes of the WORK and IWORK
135*>          arrays, returns these values as the first entries of the WORK
136*>          and IWORK arrays, and no error message related to LWORK or
137*>          LIWORK is issued by XERBLA.
138*> \endverbatim
139*>
140*> \param[out] IWORK
141*> \verbatim
142*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
143*>          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
144*> \endverbatim
145*>
146*> \param[in] LIWORK
147*> \verbatim
148*>          LIWORK is INTEGER
149*>          The dimension of the array IWORK.
150*>          If N <= 1,                LIWORK must be at least 1.
151*>          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
152*>          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
153*>
154*>          If LIWORK = -1, then a workspace query is assumed; the
155*>          routine only calculates the optimal sizes of the WORK and
156*>          IWORK arrays, returns these values as the first entries of
157*>          the WORK and IWORK arrays, and no error message related to
158*>          LWORK or LIWORK is issued by XERBLA.
159*> \endverbatim
160*>
161*> \param[out] INFO
162*> \verbatim
163*>          INFO is INTEGER
164*>          = 0:  successful exit
165*>          < 0:  if INFO = -i, the i-th argument had an illegal value
166*>          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
167*>                to converge; i off-diagonal elements of an intermediate
168*>                tridiagonal form did not converge to zero;
169*>                if INFO = i and JOBZ = 'V', then the algorithm failed
170*>                to compute an eigenvalue while working on the submatrix
171*>                lying in rows and columns INFO/(N+1) through
172*>                mod(INFO,N+1).
173*> \endverbatim
174*
175*  Authors:
176*  ========
177*
178*> \author Univ. of Tennessee
179*> \author Univ. of California Berkeley
180*> \author Univ. of Colorado Denver
181*> \author NAG Ltd.
182*
183*> \date November 2017
184*
185*> \ingroup doubleSYeigen
186*
187*> \par Contributors:
188*  ==================
189*>
190*> Jeff Rutter, Computer Science Division, University of California
191*> at Berkeley, USA \n
192*>  Modified by Francoise Tisseur, University of Tennessee \n
193*>  Modified description of INFO. Sven, 16 Feb 05. \n
194*> \par Further Details:
195*  =====================
196*>
197*> \verbatim
198*>
199*>  All details about the 2stage techniques are available in:
200*>
201*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
202*>  Parallel reduction to condensed forms for symmetric eigenvalue problems
203*>  using aggregated fine-grained and memory-aware kernels. In Proceedings
204*>  of 2011 International Conference for High Performance Computing,
205*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
206*>  Article 8 , 11 pages.
207*>  http://doi.acm.org/10.1145/2063384.2063394
208*>
209*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
210*>  An improved parallel singular value algorithm and its implementation
211*>  for multicore hardware, In Proceedings of 2013 International Conference
212*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
213*>  Denver, Colorado, USA, 2013.
214*>  Article 90, 12 pages.
215*>  http://doi.acm.org/10.1145/2503210.2503292
216*>
217*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
218*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
219*>  calculations based on fine-grained memory aware tasks.
220*>  International Journal of High Performance Computing Applications.
221*>  Volume 28 Issue 2, Pages 196-209, May 2014.
222*>  http://hpc.sagepub.com/content/28/2/196
223*>
224*> \endverbatim
225*
226*  =====================================================================
227      SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
228     $                          IWORK, LIWORK, INFO )
229*
230      IMPLICIT NONE
231*
232*  -- LAPACK driver routine (version 3.8.0) --
233*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
234*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
235*     November 2017
236*
237*     .. Scalar Arguments ..
238      CHARACTER          JOBZ, UPLO
239      INTEGER            INFO, LDA, LIWORK, LWORK, N
240*     ..
241*     .. Array Arguments ..
242      INTEGER            IWORK( * )
243      DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
244*     ..
245*
246*  =====================================================================
247*
248*     .. Parameters ..
249      DOUBLE PRECISION   ZERO, ONE
250      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
251*     ..
252*     .. Local Scalars ..
253*
254      LOGICAL            LOWER, LQUERY, WANTZ
255      INTEGER            IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
256     $                   LIWMIN, LLWORK, LLWRK2, LWMIN,
257     $                   LHTRD, LWTRD, KD, IB, INDHOUS
258      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
259     $                   SMLNUM
260*     ..
261*     .. External Functions ..
262      LOGICAL            LSAME
263      INTEGER            ILAENV2STAGE
264      DOUBLE PRECISION   DLAMCH, DLANSY
265      EXTERNAL           LSAME, DLAMCH, DLANSY, ILAENV2STAGE
266*     ..
267*     .. External Subroutines ..
268      EXTERNAL           DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF,
269     $                   DSYTRD_2STAGE, XERBLA
270*     ..
271*     .. Intrinsic Functions ..
272      INTRINSIC          MAX, SQRT
273*     ..
274*     .. Executable Statements ..
275*
276*     Test the input parameters.
277*
278      WANTZ = LSAME( JOBZ, 'V' )
279      LOWER = LSAME( UPLO, 'L' )
280      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
281*
282      INFO = 0
283      IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
284         INFO = -1
285      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
286         INFO = -2
287      ELSE IF( N.LT.0 ) THEN
288         INFO = -3
289      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
290         INFO = -5
291      END IF
292*
293      IF( INFO.EQ.0 ) THEN
294         IF( N.LE.1 ) THEN
295            LIWMIN = 1
296            LWMIN = 1
297         ELSE
298            KD    = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ,
299     $                            N, -1, -1, -1 )
300            IB    = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ,
301     $                            N, KD, -1, -1 )
302            LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ,
303     $                            N, KD, IB, -1 )
304            LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ,
305     $                            N, KD, IB, -1 )
306            IF( WANTZ ) THEN
307               LIWMIN = 3 + 5*N
308               LWMIN = 1 + 6*N + 2*N**2
309            ELSE
310               LIWMIN = 1
311               LWMIN = 2*N + 1 + LHTRD + LWTRD
312            END IF
313         END IF
314         WORK( 1 )  = LWMIN
315         IWORK( 1 ) = LIWMIN
316*
317         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
318            INFO = -8
319         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
320            INFO = -10
321         END IF
322      END IF
323*
324      IF( INFO.NE.0 ) THEN
325         CALL XERBLA( 'DSYEVD_2STAGE', -INFO )
326         RETURN
327      ELSE IF( LQUERY ) THEN
328         RETURN
329      END IF
330*
331*     Quick return if possible
332*
333      IF( N.EQ.0 )
334     $   RETURN
335*
336      IF( N.EQ.1 ) THEN
337         W( 1 ) = A( 1, 1 )
338         IF( WANTZ )
339     $      A( 1, 1 ) = ONE
340         RETURN
341      END IF
342*
343*     Get machine constants.
344*
345      SAFMIN = DLAMCH( 'Safe minimum' )
346      EPS    = DLAMCH( 'Precision' )
347      SMLNUM = SAFMIN / EPS
348      BIGNUM = ONE / SMLNUM
349      RMIN   = SQRT( SMLNUM )
350      RMAX   = SQRT( BIGNUM )
351*
352*     Scale matrix to allowable range, if necessary.
353*
354      ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
355      ISCALE = 0
356      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
357         ISCALE = 1
358         SIGMA = RMIN / ANRM
359      ELSE IF( ANRM.GT.RMAX ) THEN
360         ISCALE = 1
361         SIGMA = RMAX / ANRM
362      END IF
363      IF( ISCALE.EQ.1 )
364     $   CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
365*
366*     Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form.
367*
368      INDE    = 1
369      INDTAU  = INDE + N
370      INDHOUS = INDTAU + N
371      INDWRK  = INDHOUS + LHTRD
372      LLWORK  = LWORK - INDWRK + 1
373      INDWK2  = INDWRK + N*N
374      LLWRK2  = LWORK - INDWK2 + 1
375*
376      CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ),
377     $                    WORK( INDTAU ), WORK( INDHOUS ), LHTRD,
378     $                    WORK( INDWRK ), LLWORK, IINFO )
379*
380*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
381*     DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
382*     tridiagonal matrix, then call DORMTR to multiply it by the
383*     Householder transformations stored in A.
384*
385      IF( .NOT.WANTZ ) THEN
386         CALL DSTERF( N, W, WORK( INDE ), INFO )
387      ELSE
388*        Not available in this release, and argument checking should not
389*        let it getting here
390         RETURN
391         CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
392     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
393         CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
394     $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
395         CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
396      END IF
397*
398*     If matrix was scaled, then rescale eigenvalues appropriately.
399*
400      IF( ISCALE.EQ.1 )
401     $   CALL DSCAL( N, ONE / SIGMA, W, 1 )
402*
403      WORK( 1 )  = LWMIN
404      IWORK( 1 ) = LIWMIN
405*
406      RETURN
407*
408*     End of DSYEVD_2STAGE
409*
410      END
411