1      SUBROUTINE PSSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2     $                    DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3     $                    ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4     $                    WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR,
5     $                    GAP, INFO )
6*
7*  -- ScaLAPACK routine (version 1.7) --
8*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9*     and University of California, Berkeley.
10*     October 15, 1999
11*
12*     .. Scalar Arguments ..
13      CHARACTER          JOBZ, RANGE, UPLO
14      INTEGER            IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15     $                   LIWORK, LWORK, M, N, NZ
16      REAL               ABSTOL, ORFAC, VL, VU
17*     ..
18*     .. Array Arguments ..
19*
20      INTEGER            DESCA( * ), DESCB( * ), DESCZ( * ),
21     $                   ICLUSTR( * ), IFAIL( * ), IWORK( * )
22      REAL               A( * ), B( * ), GAP( * ), W( * ), WORK( * ),
23     $                   Z( * )
24*     ..
25*
26*  Purpose
27*
28*  =======
29*
30*  PSSYGVX computes all the eigenvalues, and optionally,
31*  the eigenvectors
32*  of a real generalized SY-definite eigenproblem, of the form
33*  sub( A )*x=(lambda)*sub( B )*x,  sub( A )*sub( B )x=(lambda)*x,  or
34*  sub( B )*sub( A )*x=(lambda)*x.
35*  Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36*  SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37*  to be symmetric positive definite.
38*
39*  Notes
40*  =====
41*
42*
43*  Each global data object is described by an associated description
44*  vector.  This vector stores the information required to establish
45*  the mapping between an object element and its corresponding process
46*  and memory location.
47*
48*  Let A be a generic term for any 2D block cyclicly distributed array.
49*  Such a global array has an associated description vector DESCA.
50*  In the following comments, the character _ should be read as
51*  "of the global array".
52*
53*  NOTATION        STORED IN      EXPLANATION
54*  --------------- -------------- --------------------------------------
55*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
56*                                 DTYPE_A = 1.
57*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58*                                 the BLACS process grid A is distribu-
59*                                 ted over. The context itself is glo-
60*                                 bal, but the handle (the integer
61*                                 value) may vary.
62*  M_A    (global) DESCA( M_ )    The number of rows in the global
63*                                 array A.
64*  N_A    (global) DESCA( N_ )    The number of columns in the global
65*                                 array A.
66*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
67*                                 the rows of the array.
68*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
69*                                 the columns of the array.
70*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71*                                 row of the array A is distributed.
72*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73*                                 first column of the array A is
74*                                 distributed.
75*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
76*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
77*
78*  Let K be the number of rows or columns of a distributed matrix,
79*  and assume that its process grid has dimension p x q.
80*  LOCr( K ) denotes the number of elements of K that a process
81*  would receive if K were distributed over the p processes of its
82*  process column.
83*  Similarly, LOCc( K ) denotes the number of elements of K that a
84*  process would receive if K were distributed over the q processes of
85*  its process row.
86*  The values of LOCr() and LOCc() may be determined via a call to the
87*  ScaLAPACK tool function, NUMROC:
88*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90*  An upper bound for these quantities may be computed by:
91*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94*
95*  Arguments
96*  =========
97*
98*  IBTYPE   (global input) INTEGER
99*          Specifies the problem type to be solved:
100*          = 1:  sub( A )*x = (lambda)*sub( B )*x
101*          = 2:  sub( A )*sub( B )*x = (lambda)*x
102*          = 3:  sub( B )*sub( A )*x = (lambda)*x
103*
104*  JOBZ    (global input) CHARACTER*1
105*          = 'N':  Compute eigenvalues only;
106*          = 'V':  Compute eigenvalues and eigenvectors.
107*
108*  RANGE   (global input) CHARACTER*1
109*          = 'A': all eigenvalues will be found.
110*          = 'V': all eigenvalues in the interval [VL,VU] will be found.
111*          = 'I': the IL-th through IU-th eigenvalues will be found.
112*
113*  UPLO    (global input) CHARACTER*1
114*          = 'U':  Upper triangles of sub( A ) and sub( B ) are stored;
115*          = 'L':  Lower triangles of sub( A ) and sub( B ) are stored.
116*
117*  N       (global input) INTEGER
118*          The order of the matrices sub( A ) and sub( B ).  N >= 0.
119*
120*  A       (local input/local output) REAL pointer into the
121*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122*          On entry, this array contains the local pieces of the
123*          N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U',
124*          the leading N-by-N upper triangular part of sub( A ) contains
125*          the upper triangular part of the matrix.  If UPLO = 'L', the
126*          leading N-by-N lower triangular part of sub( A ) contains
127*          the lower triangular part of the matrix.
128*
129*          On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130*          the distributed matrix Z of eigenvectors.  The eigenvectors
131*          are normalized as follows:
132*          if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I;
133*          if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I.
134*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135*          or the lower triangle (if UPLO='L') of sub( A ), including
136*          the diagonal, is destroyed.
137*
138*  IA      (global input) INTEGER
139*          The row index in the global array A indicating the first
140*          row of sub( A ).
141*
142*  JA      (global input) INTEGER
143*          The column index in the global array A indicating the
144*          first column of sub( A ).
145*
146*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
147*          The array descriptor for the distributed matrix A.
148*          If DESCA( CTXT_ ) is incorrect, PSSYGVX cannot guarantee
149*          correct error reporting.
150*
151*  B       (local input/local output) REAL pointer into the
152*          local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153*          On entry, this array contains the local pieces of the
154*          N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U',
155*          the leading N-by-N upper triangular part of sub( B ) contains
156*          the upper triangular part of the matrix.  If UPLO = 'L', the
157*          leading N-by-N lower triangular part of sub( B ) contains
158*          the lower triangular part of the matrix.
159*
160*          On exit, if INFO <= N, the part of sub( B ) containing the
161*          matrix is overwritten by the triangular factor U or L from
162*          the Cholesky factorization sub( B ) = U**T*U or
163*          sub( B ) = L*L**T.
164*
165*  IB      (global input) INTEGER
166*          The row index in the global array B indicating the first
167*          row of sub( B ).
168*
169*  JB      (global input) INTEGER
170*          The column index in the global array B indicating the
171*          first column of sub( B ).
172*
173*  DESCB   (global and local input) INTEGER array of dimension DLEN_.
174*          The array descriptor for the distributed matrix B.
175*          DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176*
177*  VL      (global input) REAL
178*          If RANGE='V', the lower bound of the interval to be searched
179*          for eigenvalues.  Not referenced if RANGE = 'A' or 'I'.
180*
181*  VU      (global input) REAL
182*          If RANGE='V', the upper bound of the interval to be searched
183*          for eigenvalues.  Not referenced if RANGE = 'A' or 'I'.
184*
185*  IL      (global input) INTEGER
186*          If RANGE='I', the index (from smallest to largest) of the
187*          smallest eigenvalue to be returned.  IL >= 1.
188*          Not referenced if RANGE = 'A' or 'V'.
189*
190*  IU      (global input) INTEGER
191*          If RANGE='I', the index (from smallest to largest) of the
192*          largest eigenvalue to be returned.  min(IL,N) <= IU <= N.
193*          Not referenced if RANGE = 'A' or 'V'.
194*
195*  ABSTOL  (global input) REAL
196*          If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
197*          the most orthogonal eigenvectors.
198*
199*          The absolute error tolerance for the eigenvalues.
200*          An approximate eigenvalue is accepted as converged
201*          when it is determined to lie in an interval [a,b]
202*          of width less than or equal to
203*
204*                  ABSTOL + EPS *   max( |a|,|b| ) ,
205*
206*          where EPS is the machine precision.  If ABSTOL is less than
207*          or equal to zero, then EPS*norm(T) will be used in its place,
208*          where norm(T) is the 1-norm of the tridiagonal matrix
209*          obtained by reducing A to tridiagonal form.
210*
211*          Eigenvalues will be computed most accurately when ABSTOL is
212*          set to twice the underflow threshold 2*PSLAMCH('S') not zero.
213*          If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214*          (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215*          eigenvectors did not converge, try setting ABSTOL to
216*          2*PSLAMCH('S').
217*
218*          See "Computing Small Singular Values of Bidiagonal Matrices
219*          with Guaranteed High Relative Accuracy," by Demmel and
220*          Kahan, LAPACK Working Note #3.
221*
222*          See "On the correctness of Parallel Bisection in Floating
223*          Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224*
225*  M       (global output) INTEGER
226*          Total number of eigenvalues found.  0 <= M <= N.
227*
228*  NZ      (global output) INTEGER
229*          Total number of eigenvectors computed.  0 <= NZ <= M.
230*          The number of columns of Z that are filled.
231*          If JOBZ .NE. 'V', NZ is not referenced.
232*          If JOBZ .EQ. 'V', NZ = M unless the user supplies
233*          insufficient space and PSSYGVX is not able to detect this
234*          before beginning computation.  To get all the eigenvectors
235*          requested, the user must supply both sufficient
236*          space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237*          and sufficient workspace to compute them.  (See LWORK below.)
238*          PSSYGVX is always able to detect insufficient space without
239*          computation unless RANGE .EQ. 'V'.
240*
241*  W       (global output) REAL array, dimension (N)
242*          On normal exit, the first M entries contain the selected
243*          eigenvalues in ascending order.
244*
245*  ORFAC   (global input) REAL
246*          Specifies which eigenvectors should be reorthogonalized.
247*          Eigenvectors that correspond to eigenvalues which are within
248*          tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249*          However, if the workspace is insufficient (see LWORK),
250*          tol may be decreased until all eigenvectors to be
251*          reorthogonalized can be stored in one process.
252*          No reorthogonalization will be done if ORFAC equals zero.
253*          A default value of 10^-3 is used if ORFAC is negative.
254*          ORFAC should be identical on all processes.
255*
256*  Z       (local output) REAL array,
257*          global dimension (N, N),
258*          local dimension ( LLD_Z, LOCc(JZ+N-1) )
259*          If JOBZ = 'V', then on normal exit the first M columns of Z
260*          contain the orthonormal eigenvectors of the matrix
261*          corresponding to the selected eigenvalues.  If an eigenvector
262*          fails to converge, then that column of Z contains the latest
263*          approximation to the eigenvector, and the index of the
264*          eigenvector is returned in IFAIL.
265*          If JOBZ = 'N', then Z is not referenced.
266*
267*  IZ      (global input) INTEGER
268*          The row index in the global array Z indicating the first
269*          row of sub( Z ).
270*
271*  JZ      (global input) INTEGER
272*          The column index in the global array Z indicating the
273*          first column of sub( Z ).
274*
275*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
276*          The array descriptor for the distributed matrix Z.
277*          DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278*
279*  WORK    (local workspace/output) REAL array,
280*             dimension max(3,LWORK)
281*          if JOBZ='N' WORK(1) = optimal amount of workspace
282*             required to compute eigenvalues efficiently
283*          if JOBZ='V' WORK(1) = optimal amount of workspace
284*             required to compute eigenvalues and eigenvectors
285*             efficiently with no guarantee on orthogonality.
286*             If RANGE='V', it is assumed that all eigenvectors
287*             may be required.
288*
289*  LWORK   (local input) INTEGER
290*          See below for definitions of variables used to define LWORK.
291*          If no eigenvectors are requested (JOBZ = 'N') then
292*             LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
293*          If eigenvectors are requested (JOBZ = 'V' ) then
294*             the amount of workspace required to guarantee that all
295*             eigenvectors are computed is:
296*             LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
297*               ICEIL( NEIG, NPROW*NPCOL)*NN
298*
299*             The computed eigenvectors may not be orthogonal if the
300*             minimal workspace is supplied and ORFAC is too small.
301*             If you want to guarantee orthogonality (at the cost
302*             of potentially poor performance) you should add
303*             the following to LWORK:
304*                (CLUSTERSIZE-1)*N
305*             where CLUSTERSIZE is the number of eigenvalues in the
306*             largest cluster, where a cluster is defined as a set of
307*             close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
308*                                  W(J+1) <= W(J) + ORFAC*2*norm(A) }
309*          Variable definitions:
310*             NEIG = number of eigenvectors requested
311*             NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
312*                  DESCZ( NB_ )
313*             NN = MAX( N, NB, 2 )
314*             DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
315*                              DESCZ( CSRC_ ) = 0
316*             NP0 = NUMROC( NN, NB, 0, 0, NPROW )
317*             MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
318*             ICEIL( X, Y ) is a ScaLAPACK function returning
319*             ceiling(X/Y)
320*
321*          When LWORK is too small:
322*             If LWORK is too small to guarantee orthogonality,
323*             PSSYGVX attempts to maintain orthogonality in
324*             the clusters with the smallest
325*             spacing between the eigenvalues.
326*             If LWORK is too small to compute all the eigenvectors
327*             requested, no computation is performed and INFO=-23
328*             is returned.  Note that when RANGE='V', PSSYGVX does
329*             not know how many eigenvectors are requested until
330*             the eigenvalues are computed.  Therefore, when RANGE='V'
331*             and as long as LWORK is large enough to allow PSSYGVX to
332*             compute the eigenvalues, PSSYGVX will compute the
333*             eigenvalues and as many eigenvectors as it can.
334*
335*          Relationship between workspace, orthogonality & performance:
336*             Greater performance can be achieved if adequate workspace
337*             is provided.  On the other hand, in some situations,
338*             performance can decrease as the workspace provided
339*             increases above the workspace amount shown below:
340*
341*             For optimal performance, greater workspace may be
342*             needed, i.e.
343*                LWORK >=  MAX( LWORK, 5 * N + NSYTRD_LWOPT,
344*                  NSYGST_LWOPT )
345*                Where:
346*                  LWORK, as defined previously, depends upon the number
347*                     of eigenvectors requested, and
348*                  NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
349*                    ( NPS + 3 ) *  NPS
350*                  NSYGST_LWOPT =  2*NP0*NB + NQ0*NB + NB*NB
351*
352*                  ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L',
353*                     0, 0, 0, 0)
354*                  SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
355*                  NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
356*                  NB = DESCA( MB_ )
357*                  NP0 = NUMROC( N, NB, 0, 0, NPROW )
358*                  NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
359*
360*                  NUMROC is a ScaLAPACK tool functions;
361*                  PJLAENV is a ScaLAPACK envionmental inquiry function
362*                  MYROW, MYCOL, NPROW and NPCOL can be determined by
363*                    calling the subroutine BLACS_GRIDINFO.
364*
365*                For large N, no extra workspace is needed, however the
366*                biggest boost in performance comes for small N, so it
367*                is wise to provide the extra workspace (typically less
368*                than a Megabyte per process).
369*
370*             If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
371*             enough space to compute all the eigenvectors
372*             orthogonally will cause serious degradation in
373*             performance. In the limit (i.e. CLUSTERSIZE = N-1)
374*             PSSTEIN will perform no better than SSTEIN on 1 processor.
375*             For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
376*             all eigenvectors will increase the total execution time
377*             by a factor of 2 or more.
378*             For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
379*             grow as the square of the cluster size, all other factors
380*             remaining equal and assuming enough workspace.  Less
381*             workspace means less reorthogonalization but faster
382*             execution.
383*
384*          If LWORK = -1, then LWORK is global input and a workspace
385*          query is assumed; the routine only calculates the size
386*          required for optimal performance on all work arrays.
387*          Each of these values is returned in the first entry of the
388*          corresponding work array, and no error message is issued by
389*          PXERBLA.
390*
391*
392*  IWORK   (local workspace) INTEGER array
393*          On return, IWORK(1) contains the amount of integer workspace
394*          required.
395*
396*  LIWORK  (local input) INTEGER
397*          size of IWORK
398*          LIWORK >= 6 * NNP
399*          Where:
400*            NNP = MAX( N, NPROW*NPCOL + 1, 4 )
401*
402*          If LIWORK = -1, then LIWORK is global input and a workspace
403*          query is assumed; the routine only calculates the minimum
404*          and optimal size for all work arrays. Each of these
405*          values is returned in the first entry of the corresponding
406*          work array, and no error message is issued by PXERBLA.
407*
408*  IFAIL   (output) INTEGER array, dimension (N)
409*          IFAIL provides additional information when INFO .NE. 0
410*          If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
411*          the smallest minor which is not positive definite.
412*          If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
413*          indices of the eigenvectors that failed to converge.
414*
415*          If neither of the above error conditions hold and JOBZ = 'V',
416*          then the first M elements of IFAIL are set to zero.
417*
418*  ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
419*          This array contains indices of eigenvectors corresponding to
420*          a cluster of eigenvalues that could not be reorthogonalized
421*          due to insufficient workspace (see LWORK, ORFAC and INFO).
422*          Eigenvectors corresponding to clusters of eigenvalues indexed
423*          ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
424*          reorthogonalized due to lack of workspace. Hence the
425*          eigenvectors corresponding to these clusters may not be
426*          orthogonal.  ICLUSTR() is a zero terminated array.
427*          (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
428*          K is the number of clusters
429*          ICLUSTR is not referenced if JOBZ = 'N'
430*
431*  GAP     (global output) REAL array,
432*             dimension (NPROW*NPCOL)
433*          This array contains the gap between eigenvalues whose
434*          eigenvectors could not be reorthogonalized. The output
435*          values in this array correspond to the clusters indicated
436*          by the array ICLUSTR. As a result, the dot product between
437*          eigenvectors correspoding to the I^th cluster may be as high
438*          as ( C * n ) / GAP(I) where C is a small constant.
439*
440*  INFO    (global output) INTEGER
441*          = 0:  successful exit
442*          < 0:  If the i-th argument is an array and the j-entry had
443*                an illegal value, then INFO = -(i*100+j), if the i-th
444*                argument is a scalar and had an illegal value, then
445*                INFO = -i.
446*          > 0:  if (MOD(INFO,2).NE.0), then one or more eigenvectors
447*                  failed to converge.  Their indices are stored
448*                  in IFAIL.  Send e-mail to scalapack@cs.utk.edu
449*                if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
450*                  to one or more clusters of eigenvalues could not be
451*                  reorthogonalized because of insufficient workspace.
452*                  The indices of the clusters are stored in the array
453*                  ICLUSTR.
454*                if (MOD(INFO/4,2).NE.0), then space limit prevented
455*                  PSSYGVX from computing all of the eigenvectors
456*                  between VL and VU.  The number of eigenvectors
457*                  computed is returned in NZ.
458*                if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to
459*                  compute eigenvalues.
460*                  Send e-mail to scalapack@cs.utk.edu
461*                if (MOD(INFO/16,2).NE.0), then B was not positive
462*                  definite.  IFAIL(1) indicates the order of
463*                  the smallest minor which is not positive definite.
464*
465*  Alignment requirements
466*  ======================
467*
468*  The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
469*  and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
470*  namely the following expressions should be true:
471*
472*     DESCA(MB_) = DESCA(NB_)
473*     IA = IB = IZ
474*     JA = IB = JZ
475*     DESCA(M_) = DESCB(M_) =DESCZ(M_)
476*     DESCA(N_) = DESCB(N_)= DESCZ(N_)
477*     DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
478*     DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
479*     DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
480*     DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
481*     MOD( IA-1, DESCA( MB_ ) ) = 0
482*     MOD( JA-1, DESCA( NB_ ) ) = 0
483*     MOD( IB-1, DESCB( MB_ ) ) = 0
484*     MOD( JB-1, DESCB( NB_ ) ) = 0
485*
486*  =====================================================================
487*
488*     .. Parameters ..
489      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
490     $                   MB_, NB_, RSRC_, CSRC_, LLD_
491      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
492     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
493     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
494      REAL               ONE
495      PARAMETER          ( ONE = 1.0E+0 )
496      REAL               FIVE, ZERO
497      PARAMETER          ( FIVE = 5.0E+0, ZERO = 0.0E+0 )
498      INTEGER            IERRNPD
499      PARAMETER          ( IERRNPD = 16 )
500*     ..
501*     .. Local Scalars ..
502      LOGICAL            ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
503      CHARACTER          TRANS
504      INTEGER            ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
505     $                   ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN,
506     $                   LWOPT, MQ0, MYCOL, MYROW, NB, NEIG, NN, NP0,
507     $                   NPCOL, NPROW, NPS, NQ0, NSYGST_LWOPT,
508     $                   NSYTRD_LWOPT, SQNPC
509      REAL               EPS, SCALE
510*     ..
511*     .. Local Arrays ..
512      INTEGER            IDUM1( 5 ), IDUM2( 5 )
513*     ..
514*     .. External Functions ..
515      LOGICAL            LSAME
516      INTEGER            ICEIL, INDXG2P, NUMROC, PJLAENV
517      REAL               PSLAMCH
518      EXTERNAL           LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH
519*     ..
520*     .. External Subroutines ..
521      EXTERNAL           BLACS_GRIDINFO, CHK1MAT, PCHK1MAT, PCHK2MAT,
522     $                   PSPOTRF, PSSYEVX, PSSYNGST, PSTRMM, PSTRSM,
523     $                   PXERBLA, SGEBR2D, SGEBS2D, SSCAL
524*     ..
525*     .. Intrinsic Functions ..
526      INTRINSIC          ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, REAL,
527     $                   SQRT
528*     ..
529*     .. Executable Statements ..
530*       This is just to keep ftnchek and toolpack/1 happy
531      IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
532     $    RSRC_.LT.0 )RETURN
533*
534*     Get grid parameters
535*
536      ICTXT = DESCA( CTXT_ )
537      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
538*
539*     Test the input parameters
540*
541      INFO = 0
542      IF( NPROW.EQ.-1 ) THEN
543         INFO = -( 900+CTXT_ )
544      ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN
545         INFO = -( 1300+CTXT_ )
546      ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN
547         INFO = -( 2600+CTXT_ )
548      ELSE
549*
550*     Get machine constants.
551*
552         EPS = PSLAMCH( DESCA( CTXT_ ), 'Precision' )
553*
554         WANTZ = LSAME( JOBZ, 'V' )
555         UPPER = LSAME( UPLO, 'U' )
556         ALLEIG = LSAME( RANGE, 'A' )
557         VALEIG = LSAME( RANGE, 'V' )
558         INDEIG = LSAME( RANGE, 'I' )
559         CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO )
560         CALL CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO )
561         CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO )
562         IF( INFO.EQ.0 ) THEN
563            IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
564               WORK( 1 ) = ABSTOL
565               IF( VALEIG ) THEN
566                  WORK( 2 ) = VL
567                  WORK( 3 ) = VU
568               ELSE
569                  WORK( 2 ) = ZERO
570                  WORK( 3 ) = ZERO
571               END IF
572               CALL SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3 )
573            ELSE
574               CALL SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3,
575     $                       0, 0 )
576            END IF
577            IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
578     $              NPROW )
579            IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ),
580     $              NPROW )
581            IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
582     $              NPCOL )
583            IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
584     $              NPCOL )
585            IROFFA = MOD( IA-1, DESCA( MB_ ) )
586            ICOFFA = MOD( JA-1, DESCA( NB_ ) )
587            IROFFB = MOD( IB-1, DESCB( MB_ ) )
588            ICOFFB = MOD( JB-1, DESCB( NB_ ) )
589*
590*     Compute the total amount of space needed
591*
592            LQUERY = .FALSE.
593            IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
594     $         LQUERY = .TRUE.
595*
596            LIWMIN = 6*MAX( N, ( NPROW*NPCOL )+1, 4 )
597*
598            NB = DESCA( MB_ )
599            NN = MAX( N, NB, 2 )
600            NP0 = NUMROC( NN, NB, 0, 0, NPROW )
601*
602            IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) )
603     $           THEN
604               LWMIN = 5*N + MAX( 5*NN, NB*( NP0+1 ) )
605               IF( WANTZ ) THEN
606                  MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL )
607                  LWOPT = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB )
608               ELSE
609                  LWOPT = LWMIN
610               END IF
611               NEIG = 0
612            ELSE
613               IF( ALLEIG .OR. VALEIG ) THEN
614                  NEIG = N
615               ELSE IF( INDEIG ) THEN
616                  NEIG = IU - IL + 1
617               END IF
618               MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
619               LWMIN = 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) +
620     $                 ICEIL( NEIG, NPROW*NPCOL )*NN
621               LWOPT = LWMIN
622*
623            END IF
624*
625*           Compute how much workspace is needed to use the
626*           new TRD and GST algorithms
627*
628            ANB = PJLAENV( ICTXT, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
629            SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) )
630            NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
631            NSYTRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS
632            NB = DESCA( MB_ )
633            NP0 = NUMROC( N, NB, 0, 0, NPROW )
634            NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
635            NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
636            LWOPT = MAX( LWOPT, N+NSYTRD_LWOPT, NSYGST_LWOPT )
637*
638*       Version 1.0 Limitations
639*
640            IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN
641               INFO = -1
642            ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
643               INFO = -2
644            ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
645               INFO = -3
646            ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
647               INFO = -4
648            ELSE IF( N.LT.0 ) THEN
649               INFO = -5
650            ELSE IF( IROFFA.NE.0 ) THEN
651               INFO = -7
652            ELSE IF( ICOFFA.NE.0 ) THEN
653               INFO = -8
654            ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
655               INFO = -( 900+NB_ )
656            ELSE IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN
657               INFO = -( 1300+M_ )
658            ELSE IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN
659               INFO = -( 1300+N_ )
660            ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN
661               INFO = -( 1300+MB_ )
662            ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN
663               INFO = -( 1300+NB_ )
664            ELSE IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN
665               INFO = -( 1300+RSRC_ )
666            ELSE IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN
667               INFO = -( 1300+CSRC_ )
668            ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN
669               INFO = -( 1300+CTXT_ )
670            ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN
671               INFO = -( 2200+M_ )
672            ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN
673               INFO = -( 2200+N_ )
674            ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN
675               INFO = -( 2200+MB_ )
676            ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN
677               INFO = -( 2200+NB_ )
678            ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN
679               INFO = -( 2200+RSRC_ )
680            ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN
681               INFO = -( 2200+CSRC_ )
682            ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN
683               INFO = -( 2200+CTXT_ )
684            ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN
685               INFO = -11
686            ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN
687               INFO = -12
688            ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
689               INFO = -15
690            ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) )
691     $                THEN
692               INFO = -16
693            ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) )
694     $                THEN
695               INFO = -17
696            ELSE IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS*
697     $               ABS( VL ) ) ) THEN
698               INFO = -14
699            ELSE IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS*
700     $               ABS( VU ) ) ) THEN
701               INFO = -15
702            ELSE IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) )
703     $                THEN
704               INFO = -18
705            ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
706               INFO = -28
707            ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
708               INFO = -30
709            END IF
710         END IF
711         IDUM1( 1 ) = IBTYPE
712         IDUM2( 1 ) = 1
713         IF( WANTZ ) THEN
714            IDUM1( 2 ) = ICHAR( 'V' )
715         ELSE
716            IDUM1( 2 ) = ICHAR( 'N' )
717         END IF
718         IDUM2( 2 ) = 2
719         IF( UPPER ) THEN
720            IDUM1( 3 ) = ICHAR( 'U' )
721         ELSE
722            IDUM1( 3 ) = ICHAR( 'L' )
723         END IF
724         IDUM2( 3 ) = 3
725         IF( ALLEIG ) THEN
726            IDUM1( 4 ) = ICHAR( 'A' )
727         ELSE IF( INDEIG ) THEN
728            IDUM1( 4 ) = ICHAR( 'I' )
729         ELSE
730            IDUM1( 4 ) = ICHAR( 'V' )
731         END IF
732         IDUM2( 4 ) = 4
733         IF( LQUERY ) THEN
734            IDUM1( 5 ) = -1
735         ELSE
736            IDUM1( 5 ) = 1
737         END IF
738         IDUM2( 5 ) = 5
739         CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB,
740     $                  JB, DESCB, 13, 5, IDUM1, IDUM2, INFO )
741         CALL PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2,
742     $                  INFO )
743      END IF
744*
745      IWORK( 1 ) = LIWMIN
746      WORK( 1 ) = REAL( LWOPT )
747*
748      IF( INFO.NE.0 ) THEN
749         CALL PXERBLA( ICTXT, 'PSSYGVX ', -INFO )
750         RETURN
751      ELSE IF( LQUERY ) THEN
752         RETURN
753      END IF
754*
755*     Form a Cholesky factorization of sub( B ).
756*
757      CALL PSPOTRF( UPLO, N, B, IB, JB, DESCB, INFO )
758      IF( INFO.NE.0 ) THEN
759         IWORK( 1 ) = LIWMIN
760         WORK( 1 ) = REAL( LWOPT )
761         IFAIL( 1 ) = INFO
762         INFO = IERRNPD
763         RETURN
764      END IF
765*
766*     Transform problem to standard eigenvalue problem and solve.
767*
768      CALL PSSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB,
769     $               DESCB, SCALE, WORK, LWORK, INFO )
770      CALL PSSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL,
771     $              IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK,
772     $              LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO )
773*
774      IF( WANTZ ) THEN
775*
776*        Backtransform eigenvectors to the original problem.
777*
778         NEIG = M
779         IF( IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN
780*
781*           For sub( A )*x=(lambda)*sub( B )*x and
782*           sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
783*           x = inv(L)'*y or inv(U)*y
784*
785            IF( UPPER ) THEN
786               TRANS = 'N'
787            ELSE
788               TRANS = 'T'
789            END IF
790*
791            CALL PSTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
792     $                   B, IB, JB, DESCB, Z, IZ, JZ, DESCZ )
793*
794         ELSE IF( IBTYPE.EQ.3 ) THEN
795*
796*           For sub( B )*sub( A )*x=(lambda)*x;
797*           backtransform eigenvectors: x = L*y or U'*y
798*
799            IF( UPPER ) THEN
800               TRANS = 'T'
801            ELSE
802               TRANS = 'N'
803            END IF
804*
805            CALL PSTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
806     $                   B, IB, JB, DESCB, Z, IZ, JZ, DESCZ )
807         END IF
808      END IF
809*
810      IF( SCALE.NE.ONE ) THEN
811         CALL SSCAL( N, SCALE, W, 1 )
812      END IF
813*
814      IWORK( 1 ) = LIWMIN
815      WORK( 1 ) = REAL( LWOPT )
816      RETURN
817*
818*     End of PSSYGVX
819*
820      END
821