1      SUBROUTINE PSSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2     $                    VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3     $                    JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL,
4     $                    ICLUSTR, GAP, INFO )
5*
6*  -- ScaLAPACK routine (version 1.7) --
7*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8*     and University of California, Berkeley.
9*     May 25, 2001
10*
11*     .. Scalar Arguments ..
12      CHARACTER          JOBZ, RANGE, UPLO
13      INTEGER            IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
14     $                   N, NZ
15      REAL               ABSTOL, ORFAC, VL, VU
16*     ..
17*     .. Array Arguments ..
18      INTEGER            DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19     $                   IFAIL( * ), IWORK( * )
20      REAL               A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
21*     ..
22*
23*  Purpose
24*  =======
25*
26*  PSSYEVX computes selected eigenvalues and, optionally, eigenvectors
27*  of a real symmetric matrix A by calling the recommended sequence
28*  of ScaLAPACK routines.  Eigenvalues/vectors can be selected by
29*  specifying a range of values or a range of indices for the desired
30*  eigenvalues.
31*
32*  Notes
33*  =====
34*
35*  Each global data object is described by an associated description
36*  vector.  This vector stores the information required to establish
37*  the mapping between an object element and its corresponding process
38*  and memory location.
39*
40*  Let A be a generic term for any 2D block cyclicly distributed array.
41*  Such a global array has an associated description vector DESCA.
42*  In the following comments, the character _ should be read as
43*  "of the global array".
44*
45*  NOTATION        STORED IN      EXPLANATION
46*  --------------- -------------- --------------------------------------
47*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
48*                                 DTYPE_A = 1.
49*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50*                                 the BLACS process grid A is distribu-
51*                                 ted over. The context itself is glo-
52*                                 bal, but the handle (the integer
53*                                 value) may vary.
54*  M_A    (global) DESCA( M_ )    The number of rows in the global
55*                                 array A.
56*  N_A    (global) DESCA( N_ )    The number of columns in the global
57*                                 array A.
58*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
59*                                 the rows of the array.
60*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
61*                                 the columns of the array.
62*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63*                                 row of the array A is distributed.
64*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65*                                 first column of the array A is
66*                                 distributed.
67*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
68*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
69*
70*  Let K be the number of rows or columns of a distributed matrix,
71*  and assume that its process grid has dimension p x q.
72*  LOCr( K ) denotes the number of elements of K that a process
73*  would receive if K were distributed over the p processes of its
74*  process column.
75*  Similarly, LOCc( K ) denotes the number of elements of K that a
76*  process would receive if K were distributed over the q processes of
77*  its process row.
78*  The values of LOCr() and LOCc() may be determined via a call to the
79*  ScaLAPACK tool function, NUMROC:
80*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82*  An upper bound for these quantities may be computed by:
83*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85*
86*  PSSYEVX assumes IEEE 754 standard compliant arithmetic.  To port
87*  to a system which does not have IEEE 754 arithmetic, modify
88*  the appropriate SLmake.inc file to include the compiler switch
89*  -DNO_IEEE.  This switch only affects the compilation of pslaiect.c.
90*
91*  Arguments
92*  =========
93*
94*     NP = the number of rows local to a given process.
95*     NQ = the number of columns local to a given process.
96*
97*  JOBZ    (global input) CHARACTER*1
98*          Specifies whether or not to compute the eigenvectors:
99*          = 'N':  Compute eigenvalues only.
100*          = 'V':  Compute eigenvalues and eigenvectors.
101*
102*  RANGE   (global input) CHARACTER*1
103*          = 'A': all eigenvalues will be found.
104*          = 'V': all eigenvalues in the interval [VL,VU] will be found.
105*          = 'I': the IL-th through IU-th eigenvalues will be found.
106*
107*  UPLO    (global input) CHARACTER*1
108*          Specifies whether the upper or lower triangular part of the
109*          symmetric matrix A is stored:
110*          = 'U':  Upper triangular
111*          = 'L':  Lower triangular
112*
113*  N       (global input) INTEGER
114*          The number of rows and columns of the matrix A.  N >= 0.
115*
116*  A       (local input/workspace) block cyclic REAL array,
117*          global dimension (N, N),
118*          local dimension ( LLD_A, LOCc(JA+N-1) )
119*
120*          On entry, the symmetric matrix A.  If UPLO = 'U', only the
121*          upper triangular part of A is used to define the elements of
122*          the symmetric matrix.  If UPLO = 'L', only the lower
123*          triangular part of A is used to define the elements of the
124*          symmetric matrix.
125*
126*          On exit, the lower triangle (if UPLO='L') or the upper
127*          triangle (if UPLO='U') of A, including the diagonal, is
128*          destroyed.
129*
130*  IA      (global input) INTEGER
131*          A's global row index, which points to the beginning of the
132*          submatrix which is to be operated on.
133*
134*  JA      (global input) INTEGER
135*          A's global column index, which points to the beginning of
136*          the submatrix which is to be operated on.
137*
138*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
139*          The array descriptor for the distributed matrix A.
140*          If DESCA( CTXT_ ) is incorrect, PSSYEVX cannot guarantee
141*          correct error reporting.
142*
143*  VL      (global input) REAL
144*          If RANGE='V', the lower bound of the interval to be searched
145*          for eigenvalues.  Not referenced if RANGE = 'A' or 'I'.
146*
147*  VU      (global input) REAL
148*          If RANGE='V', the upper bound of the interval to be searched
149*          for eigenvalues.  Not referenced if RANGE = 'A' or 'I'.
150*
151*  IL      (global input) INTEGER
152*          If RANGE='I', the index (from smallest to largest) of the
153*          smallest eigenvalue to be returned.  IL >= 1.
154*          Not referenced if RANGE = 'A' or 'V'.
155*
156*  IU      (global input) INTEGER
157*          If RANGE='I', the index (from smallest to largest) of the
158*          largest eigenvalue to be returned.  min(IL,N) <= IU <= N.
159*          Not referenced if RANGE = 'A' or 'V'.
160*
161*  ABSTOL  (global input) REAL
162*          If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
163*          the most orthogonal eigenvectors.
164*
165*          The absolute error tolerance for the eigenvalues.
166*          An approximate eigenvalue is accepted as converged
167*          when it is determined to lie in an interval [a,b]
168*          of width less than or equal to
169*
170*                  ABSTOL + EPS *   max( |a|,|b| ) ,
171*
172*          where EPS is the machine precision.  If ABSTOL is less than
173*          or equal to zero, then EPS*norm(T) will be used in its place,
174*          where norm(T) is the 1-norm of the tridiagonal matrix
175*          obtained by reducing A to tridiagonal form.
176*
177*          Eigenvalues will be computed most accurately when ABSTOL is
178*          set to twice the underflow threshold 2*PSLAMCH('S') not zero.
179*          If this routine returns with ((MOD(INFO,2).NE.0) .OR.
180*          (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
181*          eigenvectors did not converge, try setting ABSTOL to
182*          2*PSLAMCH('S').
183*
184*          See "Computing Small Singular Values of Bidiagonal Matrices
185*          with Guaranteed High Relative Accuracy," by Demmel and
186*          Kahan, LAPACK Working Note #3.
187*
188*          See "On the correctness of Parallel Bisection in Floating
189*          Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
190*
191*  M       (global output) INTEGER
192*          Total number of eigenvalues found.  0 <= M <= N.
193*
194*  NZ      (global output) INTEGER
195*          Total number of eigenvectors computed.  0 <= NZ <= M.
196*          The number of columns of Z that are filled.
197*          If JOBZ .NE. 'V', NZ is not referenced.
198*          If JOBZ .EQ. 'V', NZ = M unless the user supplies
199*          insufficient space and PSSYEVX is not able to detect this
200*          before beginning computation.  To get all the eigenvectors
201*          requested, the user must supply both sufficient
202*          space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
203*          and sufficient workspace to compute them.  (See LWORK below.)
204*          PSSYEVX is always able to detect insufficient space without
205*          computation unless RANGE .EQ. 'V'.
206*
207*  W       (global output) REAL array, dimension (N)
208*          On normal exit, the first M entries contain the selected
209*          eigenvalues in ascending order.
210*
211*  ORFAC   (global input) REAL
212*          Specifies which eigenvectors should be reorthogonalized.
213*          Eigenvectors that correspond to eigenvalues which are within
214*          tol=ORFAC*norm(A) of each other are to be reorthogonalized.
215*          However, if the workspace is insufficient (see LWORK),
216*          tol may be decreased until all eigenvectors to be
217*          reorthogonalized can be stored in one process.
218*          No reorthogonalization will be done if ORFAC equals zero.
219*          A default value of 10^-3 is used if ORFAC is negative.
220*          ORFAC should be identical on all processes.
221*
222*  Z       (local output) REAL array,
223*          global dimension (N, N),
224*          local dimension ( LLD_Z, LOCc(JZ+N-1) )
225*          If JOBZ = 'V', then on normal exit the first M columns of Z
226*          contain the orthonormal eigenvectors of the matrix
227*          corresponding to the selected eigenvalues.  If an eigenvector
228*          fails to converge, then that column of Z contains the latest
229*          approximation to the eigenvector, and the index of the
230*          eigenvector is returned in IFAIL.
231*          If JOBZ = 'N', then Z is not referenced.
232*
233*  IZ      (global input) INTEGER
234*          Z's global row index, which points to the beginning of the
235*          submatrix which is to be operated on.
236*
237*  JZ      (global input) INTEGER
238*          Z's global column index, which points to the beginning of
239*          the submatrix which is to be operated on.
240*
241*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
242*          The array descriptor for the distributed matrix Z.
243*          DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
244*
245*  WORK    (local workspace/output) REAL array,
246*          dimension max(3,LWORK)
247*          On return, WORK(1) contains the optimal amount of
248*          workspace required for efficient execution.
249*          if JOBZ='N' WORK(1) = optimal amount of workspace
250*             required to compute eigenvalues efficiently
251*          if JOBZ='V' WORK(1) = optimal amount of workspace
252*             required to compute eigenvalues and eigenvectors
253*             efficiently with no guarantee on orthogonality.
254*             If RANGE='V', it is assumed that all eigenvectors
255*             may be required.
256*
257*  LWORK   (local input) INTEGER
258*          Size of WORK
259*          See below for definitions of variables used to define LWORK.
260*          If no eigenvectors are requested (JOBZ = 'N') then
261*             LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
262*          If eigenvectors are requested (JOBZ = 'V' ) then
263*             the amount of workspace required to guarantee that all
264*             eigenvectors are computed is:
265*             LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
266*               ICEIL( NEIG, NPROW*NPCOL)*NN
267*
268*             The computed eigenvectors may not be orthogonal if the
269*             minimal workspace is supplied and ORFAC is too small.
270*             If you want to guarantee orthogonality (at the cost
271*             of potentially poor performance) you should add
272*             the following to LWORK:
273*                (CLUSTERSIZE-1)*N
274*             where CLUSTERSIZE is the number of eigenvalues in the
275*             largest cluster, where a cluster is defined as a set of
276*             close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
277*                                  W(J+1) <= W(J) + ORFAC*2*norm(A) }
278*          Variable definitions:
279*             NEIG = number of eigenvectors requested
280*             NB = DESCA( MB_ ) = DESCA( NB_ ) =
281*                  DESCZ( MB_ ) = DESCZ( NB_ )
282*             NN = MAX( N, NB, 2 )
283*             DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
284*                              DESCZ( CSRC_ ) = 0
285*             NP0 = NUMROC( NN, NB, 0, 0, NPROW )
286*             MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
287*             ICEIL( X, Y ) is a ScaLAPACK function returning
288*             ceiling(X/Y)
289*
290*          When LWORK is too small:
291*             If LWORK is too small to guarantee orthogonality,
292*             PSSYEVX attempts to maintain orthogonality in
293*             the clusters with the smallest
294*             spacing between the eigenvalues.
295*             If LWORK is too small to compute all the eigenvectors
296*             requested, no computation is performed and INFO=-23
297*             is returned.  Note that when RANGE='V', PSSYEVX does
298*             not know how many eigenvectors are requested until
299*             the eigenvalues are computed.  Therefore, when RANGE='V'
300*             and as long as LWORK is large enough to allow PSSYEVX to
301*             compute the eigenvalues, PSSYEVX will compute the
302*             eigenvalues and as many eigenvectors as it can.
303*
304*          Relationship between workspace, orthogonality & performance:
305*             Greater performance can be achieved if adequate workspace
306*             is provided.  On the other hand, in some situations,
307*             performance can decrease as the workspace provided
308*             increases above the workspace amount shown below:
309*
310*             For optimal performance, greater workspace may be
311*             needed, i.e.
312*                LWORK >=  MAX( LWORK, 5*N + NSYTRD_LWOPT )
313*                Where:
314*                  LWORK, as defined previously, depends upon the number
315*                     of eigenvectors requested, and
316*                  NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
317*                    ( NPS + 3 ) *  NPS
318*
319*                  ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L',
320*                     0, 0, 0, 0)
321*                  SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
322*                  NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
323*
324*                  NUMROC is a ScaLAPACK tool functions;
325*                  PJLAENV is a ScaLAPACK envionmental inquiry function
326*                  MYROW, MYCOL, NPROW and NPCOL can be determined by
327*                    calling the subroutine BLACS_GRIDINFO.
328*
329*                For large N, no extra workspace is needed, however the
330*                biggest boost in performance comes for small N, so it
331*                is wise to provide the extra workspace (typically less
332*                than a Megabyte per process).
333*
334*             If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
335*             enough space to compute all the eigenvectors
336*             orthogonally will cause serious degradation in
337*             performance. In the limit (i.e. CLUSTERSIZE = N-1)
338*             PSSTEIN will perform no better than SSTEIN on 1
339*             processor.
340*             For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
341*             all eigenvectors will increase the total execution time
342*             by a factor of 2 or more.
343*             For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
344*             grow as the square of the cluster size, all other factors
345*             remaining equal and assuming enough workspace.  Less
346*             workspace means less reorthogonalization but faster
347*             execution.
348*
349*          If LWORK = -1, then LWORK is global input and a workspace
350*          query is assumed; the routine only calculates the size
351*          required for optimal performance for all work arrays. Each of
352*          these values is returned in the first entry of the
353*          corresponding work arrays, and no error message is issued by
354*          PXERBLA.
355*
356*  IWORK   (local workspace) INTEGER array
357*          On return, IWORK(1) contains the amount of integer workspace
358*          required.
359*
360*  LIWORK  (local input) INTEGER
361*          size of IWORK
362*          LIWORK >= 6 * NNP
363*          Where:
364*            NNP = MAX( N, NPROW*NPCOL + 1, 4 )
365*          If LIWORK = -1, then LIWORK is global input and a workspace
366*          query is assumed; the routine only calculates the minimum
367*          and optimal size for all work arrays. Each of these
368*          values is returned in the first entry of the corresponding
369*          work array, and no error message is issued by PXERBLA.
370*
371*  IFAIL   (global output) INTEGER array, dimension (N)
372*          If JOBZ = 'V', then on normal exit, the first M elements of
373*          IFAIL are zero.  If (MOD(INFO,2).NE.0) on exit, then
374*          IFAIL contains the
375*          indices of the eigenvectors that failed to converge.
376*          If JOBZ = 'N', then IFAIL is not referenced.
377*
378*  ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
379*          This array contains indices of eigenvectors corresponding to
380*          a cluster of eigenvalues that could not be reorthogonalized
381*          due to insufficient workspace (see LWORK, ORFAC and INFO).
382*          Eigenvectors corresponding to clusters of eigenvalues indexed
383*          ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
384*          reorthogonalized due to lack of workspace. Hence the
385*          eigenvectors corresponding to these clusters may not be
386*          orthogonal.  ICLUSTR() is a zero terminated array.
387*          (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
388*          K is the number of clusters
389*          ICLUSTR is not referenced if JOBZ = 'N'
390*
391*  GAP     (global output) REAL array,
392*             dimension (NPROW*NPCOL)
393*          This array contains the gap between eigenvalues whose
394*          eigenvectors could not be reorthogonalized. The output
395*          values in this array correspond to the clusters indicated
396*          by the array ICLUSTR. As a result, the dot product between
397*          eigenvectors correspoding to the I^th cluster may be as high
398*          as ( C * n ) / GAP(I) where C is a small constant.
399*
400*  INFO    (global output) INTEGER
401*          = 0:  successful exit
402*          < 0:  If the i-th argument is an array and the j-entry had
403*                an illegal value, then INFO = -(i*100+j), if the i-th
404*                argument is a scalar and had an illegal value, then
405*                INFO = -i.
406*          > 0:  if (MOD(INFO,2).NE.0), then one or more eigenvectors
407*                  failed to converge.  Their indices are stored
408*                  in IFAIL.  Ensure ABSTOL=2.0*PSLAMCH( 'U' )
409*                  Send e-mail to scalapack@cs.utk.edu
410*                if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
411*                  to one or more clusters of eigenvalues could not be
412*                  reorthogonalized because of insufficient workspace.
413*                  The indices of the clusters are stored in the array
414*                  ICLUSTR.
415*                if (MOD(INFO/4,2).NE.0), then space limit prevented
416*                  PSSYEVX from computing all of the eigenvectors
417*                  between VL and VU.  The number of eigenvectors
418*                  computed is returned in NZ.
419*                if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to compute
420*                  eigenvalues.  Ensure ABSTOL=2.0*PSLAMCH( 'U' )
421*                  Send e-mail to scalapack@cs.utk.edu
422*
423*  Alignment requirements
424*  ======================
425*
426*  The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
427*  must verify some alignment properties, namely the following
428*  expressions should be true:
429*
430*  ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
431*    IAROW.EQ.IZROW )
432*  where
433*  IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
434*
435*  =====================================================================
436*
437*  Differences between PSSYEVX and SSYEVX
438*  ======================================
439*
440*  A, LDA -> A, IA, JA, DESCA
441*  Z, LDZ -> Z, IZ, JZ, DESCZ
442*  WORKSPACE needs are larger for PSSYEVX.
443*  LIWORK parameter added
444*
445*  ORFAC, ICLUSTER() and GAP() parameters added
446*  meaning of INFO is changed
447*
448*  Functional differences:
449*  PSSYEVX does not promise orthogonality for eigenvectors associated
450*  with tighly clustered eigenvalues.
451*  PSSYEVX does not reorthogonalize eigenvectors
452*  that are on different processes. The extent of reorthogonalization
453*  is controlled by the input parameter LWORK.
454*
455*  Version 1.4 limitations:
456*     DESCA(MB_) = DESCA(NB_)
457*     DESCA(M_) = DESCZ(M_)
458*     DESCA(N_) = DESCZ(N_)
459*     DESCA(MB_) = DESCZ(MB_)
460*     DESCA(NB_) = DESCZ(NB_)
461*     DESCA(RSRC_) = DESCZ(RSRC_)
462*
463*     .. Parameters ..
464      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
465     $                   MB_, NB_, RSRC_, CSRC_, LLD_
466      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
467     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
468     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
469      REAL               ZERO, ONE, TEN, FIVE
470      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0,
471     $                   FIVE = 5.0E+0 )
472      INTEGER            IERREIN, IERRCLS, IERRSPC, IERREBZ
473      PARAMETER          ( IERREIN = 1, IERRCLS = 2, IERRSPC = 4,
474     $                   IERREBZ = 8 )
475*     ..
476*     .. Local Scalars ..
477      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
478     $                   VALEIG, WANTZ
479      CHARACTER          ORDER
480      INTEGER            ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
481     $                   INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
482     $                   INDTAU, INDWORK, IROFFA, IROFFZ, ISCALE,
483     $                   ISIZESTEBZ, ISIZESTEIN, IZROW, LALLWORK,
484     $                   LIWMIN, LLWORK, LWMIN, LWOPT, MAXEIGS, MB_A,
485     $                   MQ0, MYCOL, MYROW, NB, NB_A, NEIG, NN, NNP,
486     $                   NP0, NPCOL, NPROCS, NPROW, NPS, NSPLIT,
487     $                   NSYTRD_LWOPT, NZZ, OFFSET, RSRC_A, RSRC_Z,
488     $                   SIZEORMTR, SIZESTEIN, SIZESYEVX, SQNPC
489      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
490     $                   SIGMA, SMLNUM, VLL, VUU
491*     ..
492*     .. Local Arrays ..
493      INTEGER            IDUM1( 4 ), IDUM2( 4 )
494*     ..
495*     .. External Functions ..
496      LOGICAL            LSAME
497      INTEGER            ICEIL, INDXG2P, NUMROC, PJLAENV
498      REAL               PSLAMCH, PSLANSY
499      EXTERNAL           LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV,
500     $                   PSLAMCH, PSLANSY
501*     ..
502*     .. External Subroutines ..
503      EXTERNAL           BLACS_GRIDINFO, CHK1MAT, IGAMN2D, PCHK1MAT,
504     $                   PCHK2MAT, PSELGET, PSLARED1D, PSLASCL, PSORMTR,
505     $                   PSSTEBZ, PSSTEIN, PSSYNTRD, PXERBLA, SGEBR2D,
506     $                   SGEBS2D, SLASRT, SSCAL
507*     ..
508*     .. Intrinsic Functions ..
509      INTRINSIC          ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, REAL,
510     $                   SQRT
511*     ..
512*     .. Executable Statements ..
513*       This is just to keep ftnchek and toolpack/1 happy
514      IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
515     $    RSRC_.LT.0 )RETURN
516*
517      QUICKRETURN = ( N.EQ.0 )
518*
519*     Test the input arguments.
520*
521      ICTXT = DESCA( CTXT_ )
522      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
523      INFO = 0
524*
525      WANTZ = LSAME( JOBZ, 'V' )
526      IF( NPROW.EQ.-1 ) THEN
527         INFO = -( 800+CTXT_ )
528      ELSE IF( WANTZ ) THEN
529         IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN
530            INFO = -( 2100+CTXT_ )
531         END IF
532      END IF
533      IF( INFO.EQ.0 ) THEN
534         CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO )
535         IF( WANTZ )
536     $      CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO )
537*
538         IF( INFO.EQ.0 ) THEN
539*
540*     Get machine constants.
541*
542            SAFMIN = PSLAMCH( ICTXT, 'Safe minimum' )
543            EPS = PSLAMCH( ICTXT, 'Precision' )
544            SMLNUM = SAFMIN / EPS
545            BIGNUM = ONE / SMLNUM
546            RMIN = SQRT( SMLNUM )
547            RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
548*
549            NPROCS = NPROW*NPCOL
550            LOWER = LSAME( UPLO, 'L' )
551            ALLEIG = LSAME( RANGE, 'A' )
552            VALEIG = LSAME( RANGE, 'V' )
553            INDEIG = LSAME( RANGE, 'I' )
554*
555*     Set up pointers into the WORK array
556*
557            INDTAU = 1
558            INDE = INDTAU + N
559            INDD = INDE + N
560            INDD2 = INDD + N
561            INDE2 = INDD2 + N
562            INDWORK = INDE2 + N
563            LLWORK = LWORK - INDWORK + 1
564*
565*     Set up pointers into the IWORK array
566*
567            ISIZESTEIN = 3*N + NPROCS + 1
568            ISIZESTEBZ = MAX( 4*N, 14, NPROCS )
569            INDIBL = ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1
570            INDISP = INDIBL + N
571*
572*     Compute the total amount of space needed
573*
574            LQUERY = .FALSE.
575            IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
576     $         LQUERY = .TRUE.
577*
578            NNP = MAX( N, NPROCS+1, 4 )
579            LIWMIN = 6*NNP
580*
581            NPROCS = NPROW*NPCOL
582            NB_A = DESCA( NB_ )
583            MB_A = DESCA( MB_ )
584            NB = NB_A
585            NN = MAX( N, NB, 2 )
586*
587            RSRC_A = DESCA( RSRC_ )
588            CSRC_A = DESCA( CSRC_ )
589            IROFFA = MOD( IA-1, MB_A )
590            ICOFFA = MOD( JA-1, NB_A )
591            IAROW = INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW )
592            NP0 = NUMROC( N+IROFFA, NB, 0, 0, NPROW )
593            MQ0 = NUMROC( N+ICOFFA, NB, 0, 0, NPCOL )
594            IF( WANTZ ) THEN
595               RSRC_Z = DESCZ( RSRC_ )
596               IROFFZ = MOD( IZ-1, MB_A )
597               IZROW = INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW )
598            ELSE
599               IROFFZ = 0
600               IZROW = 0
601            END IF
602*
603            IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) )
604     $           THEN
605               LWMIN = 5*N + MAX( 5*NN, NB*( NP0+1 ) )
606               IF( WANTZ ) THEN
607                  MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL )
608                  LWOPT = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB )
609               ELSE
610                  LWOPT = LWMIN
611               END IF
612               NEIG = 0
613            ELSE
614               IF( ALLEIG .OR. VALEIG ) THEN
615                  NEIG = N
616               ELSE IF( INDEIG ) THEN
617                  NEIG = IU - IL + 1
618               END IF
619               MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
620               LWMIN = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) +
621     $                 ICEIL( NEIG, NPROW*NPCOL )*NN
622               LWOPT = LWMIN
623*
624            END IF
625*
626*           Compute how much workspace is needed to use the
627*           new TRD code
628*
629            ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
630            SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) )
631            NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
632            NSYTRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS
633            LWOPT = MAX( LWOPT, 5*N+NSYTRD_LWOPT )
634*
635         END IF
636         IF( INFO.EQ.0 ) THEN
637            IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
638               WORK( 1 ) = ABSTOL
639               IF( VALEIG ) THEN
640                  WORK( 2 ) = VL
641                  WORK( 3 ) = VU
642               ELSE
643                  WORK( 2 ) = ZERO
644                  WORK( 3 ) = ZERO
645               END IF
646               CALL SGEBS2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3 )
647            ELSE
648               CALL SGEBR2D( ICTXT, 'ALL', ' ', 3, 1, WORK, 3, 0, 0 )
649            END IF
650            IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
651               INFO = -1
652            ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
653               INFO = -2
654            ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
655               INFO = -3
656            ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
657               INFO = -10
658            ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) )
659     $                THEN
660               INFO = -11
661            ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) )
662     $                THEN
663               INFO = -12
664            ELSE IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN
665               INFO = -23
666            ELSE IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN
667               INFO = -25
668            ELSE IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS*
669     $               ABS( VL ) ) ) THEN
670               INFO = -9
671            ELSE IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS*
672     $               ABS( VU ) ) ) THEN
673               INFO = -10
674            ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) )
675     $                THEN
676               INFO = -13
677            ELSE IF( IROFFA.NE.0 ) THEN
678               INFO = -6
679            ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
680               INFO = -( 800+NB_ )
681            END IF
682            IF( WANTZ ) THEN
683               IF( IROFFA.NE.IROFFZ ) THEN
684                  INFO = -19
685               ELSE IF( IAROW.NE.IZROW ) THEN
686                  INFO = -19
687               ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN
688                  INFO = -( 2100+M_ )
689               ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN
690                  INFO = -( 2100+N_ )
691               ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN
692                  INFO = -( 2100+MB_ )
693               ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN
694                  INFO = -( 2100+NB_ )
695               ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN
696                  INFO = -( 2100+RSRC_ )
697               ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN
698                  INFO = -( 2100+CSRC_ )
699               ELSE IF( ICTXT.NE.DESCZ( CTXT_ ) ) THEN
700                  INFO = -( 2100+CTXT_ )
701               END IF
702            END IF
703         END IF
704         IF( WANTZ ) THEN
705            IDUM1( 1 ) = ICHAR( 'V' )
706         ELSE
707            IDUM1( 1 ) = ICHAR( 'N' )
708         END IF
709         IDUM2( 1 ) = 1
710         IF( LOWER ) THEN
711            IDUM1( 2 ) = ICHAR( 'L' )
712         ELSE
713            IDUM1( 2 ) = ICHAR( 'U' )
714         END IF
715         IDUM2( 2 ) = 2
716         IF( ALLEIG ) THEN
717            IDUM1( 3 ) = ICHAR( 'A' )
718         ELSE IF( INDEIG ) THEN
719            IDUM1( 3 ) = ICHAR( 'I' )
720         ELSE
721            IDUM1( 3 ) = ICHAR( 'V' )
722         END IF
723         IDUM2( 3 ) = 3
724         IF( LQUERY ) THEN
725            IDUM1( 4 ) = -1
726         ELSE
727            IDUM1( 4 ) = 1
728         END IF
729         IDUM2( 4 ) = 4
730         IF( WANTZ ) THEN
731            CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ,
732     $                     JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO )
733         ELSE
734            CALL PCHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, 4, IDUM1,
735     $                     IDUM2, INFO )
736         END IF
737         WORK( 1 ) = REAL( LWOPT )
738         IWORK( 1 ) = LIWMIN
739      END IF
740*
741      IF( INFO.NE.0 ) THEN
742         CALL PXERBLA( ICTXT, 'PSSYEVX', -INFO )
743         RETURN
744      ELSE IF( LQUERY ) THEN
745         RETURN
746      END IF
747*
748*     Quick return if possible
749*
750      IF( QUICKRETURN ) THEN
751         IF( WANTZ ) THEN
752            NZ = 0
753            ICLUSTR( 1 ) = 0
754         END IF
755         M = 0
756         WORK( 1 ) = REAL( LWOPT )
757         IWORK( 1 ) = LIWMIN
758         RETURN
759      END IF
760*
761*     Scale matrix to allowable range, if necessary.
762*
763      ABSTLL = ABSTOL
764      ISCALE = 0
765      IF( VALEIG ) THEN
766         VLL = VL
767         VUU = VU
768      ELSE
769         VLL = ZERO
770         VUU = ZERO
771      END IF
772*
773      ANRM = PSLANSY( 'M', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) )
774*
775      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
776         ISCALE = 1
777         SIGMA = RMIN / ANRM
778         ANRM = ANRM*SIGMA
779      ELSE IF( ANRM.GT.RMAX ) THEN
780         ISCALE = 1
781         SIGMA = RMAX / ANRM
782         ANRM = ANRM*SIGMA
783      END IF
784*
785      IF( ISCALE.EQ.1 ) THEN
786         CALL PSLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, IINFO )
787         IF( ABSTOL.GT.0 )
788     $      ABSTLL = ABSTOL*SIGMA
789         IF( VALEIG ) THEN
790            VLL = VL*SIGMA
791            VUU = VU*SIGMA
792            IF( VUU.EQ.VLL ) THEN
793               VUU = VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN )
794            END IF
795         END IF
796      END IF
797*
798*     Call PSSYNTRD to reduce symmetric matrix to tridiagonal form.
799*
800      LALLWORK = LLWORK
801*
802      CALL PSSYNTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ),
803     $               WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ),
804     $               LLWORK, IINFO )
805*
806*
807*     Copy the values of D, E to all processes
808*
809*     Here PxLARED1D is used to redistribute the tridiagonal matrix.
810*     PxLARED1D, however, doesn't yet work with arbritary matrix
811*     distributions so we have PxELGET as a backup.
812*
813      OFFSET = 0
814      IF( IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 )
815     $     THEN
816         CALL PSLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ),
817     $                   WORK( INDWORK ), LLWORK )
818*
819         CALL PSLARED1D( N, IA, JA, DESCA, WORK( INDE ), WORK( INDE2 ),
820     $                   WORK( INDWORK ), LLWORK )
821         IF( .NOT.LOWER )
822     $      OFFSET = 1
823      ELSE
824         DO 10 I = 1, N
825            CALL PSELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1,
826     $                    I+JA-1, DESCA )
827   10    CONTINUE
828         IF( LSAME( UPLO, 'U' ) ) THEN
829            DO 20 I = 1, N - 1
830               CALL PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1,
831     $                       I+JA, DESCA )
832   20       CONTINUE
833         ELSE
834            DO 30 I = 1, N - 1
835               CALL PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA,
836     $                       I+JA-1, DESCA )
837   30       CONTINUE
838         END IF
839      END IF
840*
841*     Call PSSTEBZ and, if eigenvectors are desired, PSSTEIN.
842*
843      IF( WANTZ ) THEN
844         ORDER = 'B'
845      ELSE
846         ORDER = 'E'
847      END IF
848*
849      CALL PSSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
850     $              WORK( INDD2 ), WORK( INDE2+OFFSET ), M, NSPLIT, W,
851     $              IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWORK ),
852     $              LLWORK, IWORK( 1 ), ISIZESTEBZ, IINFO )
853*
854*
855*     IF PSSTEBZ fails, the error propogates to INFO, but
856*     we do not propogate the eigenvalue(s) which failed because:
857*     1)  This should never happen if the user specifies
858*         ABSTOL = 2 * PSLAMCH( 'U' )
859*     2)  PSSTEIN will confirm/deny whether the eigenvalues are
860*         close enough.
861*
862      IF( IINFO.NE.0 ) THEN
863         INFO = INFO + IERREBZ
864         DO 40 I = 1, M
865            IWORK( INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) )
866   40    CONTINUE
867      END IF
868      IF( WANTZ ) THEN
869*
870         IF( VALEIG ) THEN
871*
872*           Compute the maximum number of eigenvalues that we can
873*           compute in the
874*           workspace that we have, and that we can store in Z.
875*
876*           Loop through the possibilities looking for the largest
877*           NZ that we can feed to PSSTEIN and PSORMTR
878*
879*           Since all processes must end up with the same value
880*           of NZ, we first compute the minimum of LALLWORK
881*
882            CALL IGAMN2D( ICTXT, 'A', ' ', 1, 1, LALLWORK, 1, 1, 1, -1,
883     $                    -1, -1 )
884*
885            MAXEIGS = DESCZ( N_ )
886*
887            DO 50 NZ = MIN( MAXEIGS, M ), 0, -1
888               MQ0 = NUMROC( NZ, NB, 0, 0, NPCOL )
889               SIZESTEIN = ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 )
890               SIZEORMTR = MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) +
891     $                     NB*NB
892*
893               SIZESYEVX = MAX( SIZESTEIN, SIZEORMTR )
894               IF( SIZESYEVX.LE.LALLWORK )
895     $            GO TO 60
896   50       CONTINUE
897   60       CONTINUE
898         ELSE
899            NZ = M
900         END IF
901         NZ = MAX( NZ, 0 )
902         IF( NZ.NE.M ) THEN
903            INFO = INFO + IERRSPC
904*
905            DO 70 I = 1, M
906               IFAIL( I ) = 0
907   70       CONTINUE
908*
909*     The following code handles a rare special case
910*       - NZ .NE. M means that we don't have enough room to store
911*         all the vectors.
912*       - NSPLIT .GT. 1 means that the matrix split
913*     In this case, we cannot simply take the first NZ eigenvalues
914*     because PSSTEBZ sorts the eigenvalues by block when
915*     a split occurs.  So, we have to make another call to
916*     PSSTEBZ with a new upper limit - VUU.
917*
918            IF( NSPLIT.GT.1 ) THEN
919               CALL SLASRT( 'I', M, W, IINFO )
920               NZZ = 0
921               IF( NZ.GT.0 ) THEN
922*
923                  VUU = W( NZ ) - TEN*( EPS*ANRM+SAFMIN )
924                  IF( VLL.GE.VUU ) THEN
925                     NZZ = 0
926                  ELSE
927                     CALL PSSTEBZ( ICTXT, RANGE, ORDER, N, VLL, VUU, IL,
928     $                             IU, ABSTLL, WORK( INDD2 ),
929     $                             WORK( INDE2+OFFSET ), NZZ, NSPLIT, W,
930     $                             IWORK( INDIBL ), IWORK( INDISP ),
931     $                             WORK( INDWORK ), LLWORK, IWORK( 1 ),
932     $                             ISIZESTEBZ, IINFO )
933                  END IF
934*
935                  IF( MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN
936                     IF( NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN
937                        INFO = INFO + IERREBZ
938                     END IF
939                  END IF
940               END IF
941               NZ = MIN( NZ, NZZ )
942*
943            END IF
944         END IF
945         CALL PSSTEIN( N, WORK( INDD2 ), WORK( INDE2+OFFSET ), NZ, W,
946     $                 IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ,
947     $                 JZ, DESCZ, WORK( INDWORK ), LALLWORK, IWORK( 1 ),
948     $                 ISIZESTEIN, IFAIL, ICLUSTR, GAP, IINFO )
949*
950         IF( IINFO.GE.NZ+1 )
951     $      INFO = INFO + IERRCLS
952         IF( MOD( IINFO, NZ+1 ).NE.0 )
953     $      INFO = INFO + IERREIN
954*
955*     Z = Q * Z
956*
957*
958         IF( NZ.GT.0 ) THEN
959            CALL PSORMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA,
960     $                    WORK( INDTAU ), Z, IZ, JZ, DESCZ,
961     $                    WORK( INDWORK ), LLWORK, IINFO )
962         END IF
963*
964      END IF
965*
966*     If matrix was scaled, then rescale eigenvalues appropriately.
967*
968      IF( ISCALE.EQ.1 ) THEN
969         CALL SSCAL( M, ONE / SIGMA, W, 1 )
970      END IF
971*
972      WORK( 1 ) = REAL( LWOPT )
973      IWORK( 1 ) = LIWMIN
974*
975      RETURN
976*
977*     End of PSSYEVX
978*
979      END
980