1      SUBROUTINE PZHEGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2     $                    DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3     $                    ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4     $                    WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK,
5     $                    IFAIL, ICLUSTR, GAP, INFO )
6*
7*  -- ScaLAPACK routine (version 1.7) --
8*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9*     and University of California, Berkeley.
10*     October 15, 1999
11*
12*     .. Scalar Arguments ..
13      CHARACTER          JOBZ, RANGE, UPLO
14      INTEGER            IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15     $                   LIWORK, LRWORK, LWORK, M, N, NZ
16      DOUBLE PRECISION   ABSTOL, ORFAC, VL, VU
17*     ..
18*     .. Array Arguments ..
19*
20      INTEGER            DESCA( * ), DESCB( * ), DESCZ( * ),
21     $                   ICLUSTR( * ), IFAIL( * ), IWORK( * )
22      DOUBLE PRECISION   GAP( * ), RWORK( * ), W( * )
23      COMPLEX*16         A( * ), B( * ), WORK( * ), Z( * )
24*     ..
25*
26*  Purpose
27*
28*  =======
29*
30*  PZHEGVX computes all the eigenvalues, and optionally,
31*  the eigenvectors
32*  of a complex generalized Hermitian-definite eigenproblem, of the form
33*  sub( A )*x=(lambda)*sub( B )*x,  sub( A )*sub( B )x=(lambda)*x,  or
34*  sub( B )*sub( A )*x=(lambda)*x.
35*  Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36*  Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37*  to be Hermitian positive definite.
38*
39*  Notes
40*  =====
41*
42*
43*  Each global data object is described by an associated description
44*  vector.  This vector stores the information required to establish
45*  the mapping between an object element and its corresponding process
46*  and memory location.
47*
48*  Let A be a generic term for any 2D block cyclicly distributed array.
49*  Such a global array has an associated description vector DESCA.
50*  In the following comments, the character _ should be read as
51*  "of the global array".
52*
53*  NOTATION        STORED IN      EXPLANATION
54*  --------------- -------------- --------------------------------------
55*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
56*                                 DTYPE_A = 1.
57*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58*                                 the BLACS process grid A is distribu-
59*                                 ted over. The context itself is glo-
60*                                 bal, but the handle (the integer
61*                                 value) may vary.
62*  M_A    (global) DESCA( M_ )    The number of rows in the global
63*                                 array A.
64*  N_A    (global) DESCA( N_ )    The number of columns in the global
65*                                 array A.
66*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
67*                                 the rows of the array.
68*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
69*                                 the columns of the array.
70*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71*                                 row of the array A is distributed.
72*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73*                                 first column of the array A is
74*                                 distributed.
75*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
76*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
77*
78*  Let K be the number of rows or columns of a distributed matrix,
79*  and assume that its process grid has dimension p x q.
80*  LOCr( K ) denotes the number of elements of K that a process
81*  would receive if K were distributed over the p processes of its
82*  process column.
83*  Similarly, LOCc( K ) denotes the number of elements of K that a
84*  process would receive if K were distributed over the q processes of
85*  its process row.
86*  The values of LOCr() and LOCc() may be determined via a call to the
87*  ScaLAPACK tool function, NUMROC:
88*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90*  An upper bound for these quantities may be computed by:
91*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94*
95*  Arguments
96*  =========
97*
98*  IBTYPE   (global input) INTEGER
99*          Specifies the problem type to be solved:
100*          = 1:  sub( A )*x = (lambda)*sub( B )*x
101*          = 2:  sub( A )*sub( B )*x = (lambda)*x
102*          = 3:  sub( B )*sub( A )*x = (lambda)*x
103*
104*  JOBZ    (global input) CHARACTER*1
105*          = 'N':  Compute eigenvalues only;
106*          = 'V':  Compute eigenvalues and eigenvectors.
107*
108*  RANGE   (global input) CHARACTER*1
109*          = 'A': all eigenvalues will be found.
110*          = 'V': all eigenvalues in the interval [VL,VU] will be found.
111*          = 'I': the IL-th through IU-th eigenvalues will be found.
112*
113*  UPLO    (global input) CHARACTER*1
114*          = 'U':  Upper triangles of sub( A ) and sub( B ) are stored;
115*          = 'L':  Lower triangles of sub( A ) and sub( B ) are stored.
116*
117*  N       (global input) INTEGER
118*          The order of the matrices sub( A ) and sub( B ).  N >= 0.
119*
120*  A       (local input/local output) COMPLEX*16 pointer into the
121*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122*          On entry, this array contains the local pieces of the
123*          N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
124*          the leading N-by-N upper triangular part of sub( A ) contains
125*          the upper triangular part of the matrix.  If UPLO = 'L', the
126*          leading N-by-N lower triangular part of sub( A ) contains
127*          the lower triangular part of the matrix.
128*
129*          On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130*          the distributed matrix Z of eigenvectors.  The eigenvectors
131*          are normalized as follows:
132*          if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I;
133*          if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I.
134*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135*          or the lower triangle (if UPLO='L') of sub( A ), including
136*          the diagonal, is destroyed.
137*
138*  IA      (global input) INTEGER
139*          The row index in the global array A indicating the first
140*          row of sub( A ).
141*
142*  JA      (global input) INTEGER
143*          The column index in the global array A indicating the
144*          first column of sub( A ).
145*
146*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
147*          The array descriptor for the distributed matrix A.
148*          If DESCA( CTXT_ ) is incorrect, PZHEGVX cannot guarantee
149*          correct error reporting.
150*
151*  B       (local input/local output) COMPLEX*16 pointer into the
152*          local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153*          On entry, this array contains the local pieces of the
154*          N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U',
155*          the leading N-by-N upper triangular part of sub( B ) contains
156*          the upper triangular part of the matrix.  If UPLO = 'L', the
157*          leading N-by-N lower triangular part of sub( B ) contains
158*          the lower triangular part of the matrix.
159*
160*          On exit, if INFO <= N, the part of sub( B ) containing the
161*          matrix is overwritten by the triangular factor U or L from
162*          the Cholesky factorization sub( B ) = U**H*U or
163*          sub( B ) = L*L**H.
164*
165*  IB      (global input) INTEGER
166*          The row index in the global array B indicating the first
167*          row of sub( B ).
168*
169*  JB      (global input) INTEGER
170*          The column index in the global array B indicating the
171*          first column of sub( B ).
172*
173*  DESCB   (global and local input) INTEGER array of dimension DLEN_.
174*          The array descriptor for the distributed matrix B.
175*          DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176*
177*  VL      (global input) DOUBLE PRECISION
178*          If RANGE='V', the lower bound of the interval to be searched
179*          for eigenvalues.  Not referenced if RANGE = 'A' or 'I'.
180*
181*  VU      (global input) DOUBLE PRECISION
182*          If RANGE='V', the upper bound of the interval to be searched
183*          for eigenvalues.  Not referenced if RANGE = 'A' or 'I'.
184*
185*  IL      (global input) INTEGER
186*          If RANGE='I', the index (from smallest to largest) of the
187*          smallest eigenvalue to be returned.  IL >= 1.
188*          Not referenced if RANGE = 'A' or 'V'.
189*
190*  IU      (global input) INTEGER
191*          If RANGE='I', the index (from smallest to largest) of the
192*          largest eigenvalue to be returned.  min(IL,N) <= IU <= N.
193*          Not referenced if RANGE = 'A' or 'V'.
194*
195*  ABSTOL  (global input) DOUBLE PRECISION
196*          If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
197*          the most orthogonal eigenvectors.
198*
199*          The absolute error tolerance for the eigenvalues.
200*          An approximate eigenvalue is accepted as converged
201*          when it is determined to lie in an interval [a,b]
202*          of width less than or equal to
203*
204*                  ABSTOL + EPS *   max( |a|,|b| ) ,
205*
206*          where EPS is the machine precision.  If ABSTOL is less than
207*          or equal to zero, then EPS*norm(T) will be used in its place,
208*          where norm(T) is the 1-norm of the tridiagonal matrix
209*          obtained by reducing A to tridiagonal form.
210*
211*          Eigenvalues will be computed most accurately when ABSTOL is
212*          set to twice the underflow threshold 2*PDLAMCH('S') not zero.
213*          If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214*          (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215*          eigenvectors did not converge, try setting ABSTOL to
216*          2*PDLAMCH('S').
217*
218*          See "Computing Small Singular Values of Bidiagonal Matrices
219*          with Guaranteed High Relative Accuracy," by Demmel and
220*          Kahan, LAPACK Working Note #3.
221*
222*          See "On the correctness of Parallel Bisection in Floating
223*          Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224*
225*  M       (global output) INTEGER
226*          Total number of eigenvalues found.  0 <= M <= N.
227*
228*  NZ      (global output) INTEGER
229*          Total number of eigenvectors computed.  0 <= NZ <= M.
230*          The number of columns of Z that are filled.
231*          If JOBZ .NE. 'V', NZ is not referenced.
232*          If JOBZ .EQ. 'V', NZ = M unless the user supplies
233*          insufficient space and PZHEGVX is not able to detect this
234*          before beginning computation.  To get all the eigenvectors
235*          requested, the user must supply both sufficient
236*          space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237*          and sufficient workspace to compute them.  (See LWORK below.)
238*          PZHEGVX is always able to detect insufficient space without
239*          computation unless RANGE .EQ. 'V'.
240*
241*  W       (global output) DOUBLE PRECISION array, dimension (N)
242*          On normal exit, the first M entries contain the selected
243*          eigenvalues in ascending order.
244*
245*  ORFAC   (global input) DOUBLE PRECISION
246*          Specifies which eigenvectors should be reorthogonalized.
247*          Eigenvectors that correspond to eigenvalues which are within
248*          tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249*          However, if the workspace is insufficient (see LWORK),
250*          tol may be decreased until all eigenvectors to be
251*          reorthogonalized can be stored in one process.
252*          No reorthogonalization will be done if ORFAC equals zero.
253*          A default value of 10^-3 is used if ORFAC is negative.
254*          ORFAC should be identical on all processes.
255*
256*  Z       (local output) COMPLEX*16 array,
257*          global dimension (N, N),
258*          local dimension ( LLD_Z, LOCc(JZ+N-1) )
259*          If JOBZ = 'V', then on normal exit the first M columns of Z
260*          contain the orthonormal eigenvectors of the matrix
261*          corresponding to the selected eigenvalues.  If an eigenvector
262*          fails to converge, then that column of Z contains the latest
263*          approximation to the eigenvector, and the index of the
264*          eigenvector is returned in IFAIL.
265*          If JOBZ = 'N', then Z is not referenced.
266*
267*  IZ      (global input) INTEGER
268*          The row index in the global array Z indicating the first
269*          row of sub( Z ).
270*
271*  JZ      (global input) INTEGER
272*          The column index in the global array Z indicating the
273*          first column of sub( Z ).
274*
275*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
276*          The array descriptor for the distributed matrix Z.
277*          DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278*
279*  WORK    (local workspace/output) COMPLEX*16 array,
280*             dimension (LWORK)
281*          WORK(1) returns the optimal workspace.
282*
283*  LWORK   (local input) INTEGER
284*          Size of WORK array.  If only eigenvalues are requested:
285*            LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 )
286*          If eigenvectors are requested:
287*            LWORK >= N + ( NP0 + MQ0 + NB ) * NB
288*          with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ).
289*
290*          For optimal performance, greater workspace is needed, i.e.
291*            LWORK >= MAX( LWORK, N + NHETRD_LWOPT,
292*              NHEGST_LWOPT )
293*          Where LWORK is as defined above, and
294*            NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) +
295*              ( NPS + 1 ) * NPS
296*            NHEGST_LWOPT =  2*NP0*NB + NQ0*NB + NB*NB
297*
298*            NB = DESCA( MB_ )
299*            NP0 = NUMROC( N, NB, 0, 0, NPROW )
300*            NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
301*            ICTXT = DESCA( CTXT_ )
302*            ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
303*            SQNPC = SQRT( DBLE( NPROW * NPCOL ) )
304*            NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
305*
306*            NUMROC is a ScaLAPACK tool functions;
307*            PJLAENV is a ScaLAPACK envionmental inquiry function
308*            MYROW, MYCOL, NPROW and NPCOL can be determined by calling
309*            the subroutine BLACS_GRIDINFO.
310*
311*          If LWORK = -1, then LWORK is global input and a workspace
312*          query is assumed; the routine only calculates the optimal
313*          size for all work arrays. Each of these values is returned
314*          in the first entry of the correspondingwork array, and no
315*          error message is issued by PXERBLA.
316*
317*  RWORK   (local workspace/output) DOUBLE PRECISION array,
318*             dimension max(3,LRWORK)
319*          On return, RWORK(1) contains the amount of workspace
320*             required for optimal efficiency
321*          if JOBZ='N' RWORK(1) = optimal amount of workspace
322*             required to compute eigenvalues efficiently
323*          if JOBZ='V' RWORK(1) = optimal amount of workspace
324*             required to compute eigenvalues and eigenvectors
325*             efficiently with no guarantee on orthogonality.
326*             If RANGE='V', it is assumed that all eigenvectors
327*             may be required when computing optimal workspace.
328*
329*  LRWORK  (local input) INTEGER
330*          Size of RWORK
331*          See below for definitions of variables used to define LRWORK.
332*          If no eigenvectors are requested (JOBZ = 'N') then
333*             LRWORK >= 5 * NN + 4 * N
334*          If eigenvectors are requested (JOBZ = 'V' ) then
335*             the amount of workspace required to guarantee that all
336*             eigenvectors are computed is:
337*             LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) +
338*               ICEIL( NEIG, NPROW*NPCOL)*NN
339*
340*             The computed eigenvectors may not be orthogonal if the
341*             minimal workspace is supplied and ORFAC is too small.
342*             If you want to guarantee orthogonality (at the cost
343*             of potentially poor performance) you should add
344*             the following to LRWORK:
345*                (CLUSTERSIZE-1)*N
346*             where CLUSTERSIZE is the number of eigenvalues in the
347*             largest cluster, where a cluster is defined as a set of
348*             close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
349*                                  W(J+1) <= W(J) + ORFAC*2*norm(A) }
350*          Variable definitions:
351*             NEIG = number of eigenvectors requested
352*             NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
353*                  DESCZ( NB_ )
354*             NN = MAX( N, NB, 2 )
355*             DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
356*                              DESCZ( CSRC_ ) = 0
357*             NP0 = NUMROC( NN, NB, 0, 0, NPROW )
358*             MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
359*             ICEIL( X, Y ) is a ScaLAPACK function returning
360*             ceiling(X/Y)
361*
362*          When LRWORK is too small:
363*             If LRWORK is too small to guarantee orthogonality,
364*             PZHEGVX attempts to maintain orthogonality in
365*             the clusters with the smallest
366*             spacing between the eigenvalues.
367*             If LRWORK is too small to compute all the eigenvectors
368*             requested, no computation is performed and INFO=-25
369*             is returned.  Note that when RANGE='V', PZHEGVX does
370*             not know how many eigenvectors are requested until
371*             the eigenvalues are computed.  Therefore, when RANGE='V'
372*             and as long as LRWORK is large enough to allow PZHEGVX to
373*             compute the eigenvalues, PZHEGVX will compute the
374*             eigenvalues and as many eigenvectors as it can.
375*
376*          Relationship between workspace, orthogonality & performance:
377*             If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
378*             enough space to compute all the eigenvectors
379*             orthogonally will cause serious degradation in
380*             performance. In the limit (i.e. CLUSTERSIZE = N-1)
381*             PZSTEIN will perform no better than ZSTEIN on 1 processor.
382*             For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
383*             all eigenvectors will increase the total execution time
384*             by a factor of 2 or more.
385*             For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
386*             grow as the square of the cluster size, all other factors
387*             remaining equal and assuming enough workspace.  Less
388*             workspace means less reorthogonalization but faster
389*             execution.
390*
391*          If LRWORK = -1, then LRWORK is global input and a workspace
392*          query is assumed; the routine only calculates the minimum
393*          and optimal size for all work arrays. Each of these
394*          values is returned in the first entry of the corresponding
395*          work array, and no error message is issued by PXERBLA.
396*
397*  IWORK   (local workspace) INTEGER array
398*          On return, IWORK(1) contains the amount of integer workspace
399*          required.
400*
401*  LIWORK  (local input) INTEGER
402*          size of IWORK
403*          LIWORK >= 6 * NNP
404*          Where:
405*            NNP = MAX( N, NPROW*NPCOL + 1, 4 )
406*
407*          If LIWORK = -1, then LIWORK is global input and a workspace
408*          query is assumed; the routine only calculates the minimum
409*          and optimal size for all work arrays. Each of these
410*          values is returned in the first entry of the corresponding
411*          work array, and no error message is issued by PXERBLA.
412*
413*  IFAIL   (output) INTEGER array, dimension (N)
414*          IFAIL provides additional information when INFO .NE. 0
415*          If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
416*          the smallest minor which is not positive definite.
417*          If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
418*          indices of the eigenvectors that failed to converge.
419*
420*          If neither of the above error conditions hold and JOBZ = 'V',
421*          then the first M elements of IFAIL are set to zero.
422*
423*  ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
424*          This array contains indices of eigenvectors corresponding to
425*          a cluster of eigenvalues that could not be reorthogonalized
426*          due to insufficient workspace (see LWORK, ORFAC and INFO).
427*          Eigenvectors corresponding to clusters of eigenvalues indexed
428*          ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
429*          reorthogonalized due to lack of workspace. Hence the
430*          eigenvectors corresponding to these clusters may not be
431*          orthogonal.  ICLUSTR() is a zero terminated array.
432*          (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
433*          K is the number of clusters
434*          ICLUSTR is not referenced if JOBZ = 'N'
435*
436*  GAP     (global output) DOUBLE PRECISION array,
437*             dimension (NPROW*NPCOL)
438*          This array contains the gap between eigenvalues whose
439*          eigenvectors could not be reorthogonalized. The output
440*          values in this array correspond to the clusters indicated
441*          by the array ICLUSTR. As a result, the dot product between
442*          eigenvectors correspoding to the I^th cluster may be as high
443*          as ( C * n ) / GAP(I) where C is a small constant.
444*
445*  INFO    (global output) INTEGER
446*          = 0:  successful exit
447*          < 0:  If the i-th argument is an array and the j-entry had
448*                an illegal value, then INFO = -(i*100+j), if the i-th
449*                argument is a scalar and had an illegal value, then
450*                INFO = -i.
451*          > 0:  if (MOD(INFO,2).NE.0), then one or more eigenvectors
452*                  failed to converge.  Their indices are stored
453*                  in IFAIL.  Send e-mail to scalapack@cs.utk.edu
454*                if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
455*                  to one or more clusters of eigenvalues could not be
456*                  reorthogonalized because of insufficient workspace.
457*                  The indices of the clusters are stored in the array
458*                  ICLUSTR.
459*                if (MOD(INFO/4,2).NE.0), then space limit prevented
460*                  PZHEGVX from computing all of the eigenvectors
461*                  between VL and VU.  The number of eigenvectors
462*                  computed is returned in NZ.
463*                if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to
464*                  compute eigenvalues.
465*                  Send e-mail to scalapack@cs.utk.edu
466*                if (MOD(INFO/16,2).NE.0), then B was not positive
467*                  definite.  IFAIL(1) indicates the order of
468*                  the smallest minor which is not positive definite.
469*
470*  Alignment requirements
471*  ======================
472*
473*  The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
474*  and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
475*  namely the following expressions should be true:
476*
477*     DESCA(MB_) = DESCA(NB_)
478*     IA = IB = IZ
479*     JA = IB = JZ
480*     DESCA(M_) = DESCB(M_) =DESCZ(M_)
481*     DESCA(N_) = DESCB(N_)= DESCZ(N_)
482*     DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
483*     DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
484*     DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
485*     DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
486*     MOD( IA-1, DESCA( MB_ ) ) = 0
487*     MOD( JA-1, DESCA( NB_ ) ) = 0
488*     MOD( IB-1, DESCB( MB_ ) ) = 0
489*     MOD( JB-1, DESCB( NB_ ) ) = 0
490*
491*  =====================================================================
492*
493*     .. Parameters ..
494      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
495     $                   MB_, NB_, RSRC_, CSRC_, LLD_
496      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
497     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
498     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
499      COMPLEX*16         ONE
500      PARAMETER          ( ONE = 1.0D+0 )
501      DOUBLE PRECISION   FIVE, ZERO
502      PARAMETER          ( FIVE = 5.0D+0, ZERO = 0.0D+0 )
503      INTEGER            IERRNPD
504      PARAMETER          ( IERRNPD = 16 )
505*     ..
506*     .. Local Scalars ..
507      LOGICAL            ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
508      CHARACTER          TRANS
509      INTEGER            ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
510     $                   ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LRWMIN,
511     $                   LRWOPT, LWMIN, LWOPT, MQ0, MYCOL, MYROW, NB,
512     $                   NEIG, NHEGST_LWOPT, NHETRD_LWOPT, NN, NP0,
513     $                   NPCOL, NPROW, NPS, NQ0, SQNPC
514      DOUBLE PRECISION   EPS, SCALE
515*     ..
516*     .. Local Arrays ..
517      INTEGER            IDUM1( 5 ), IDUM2( 5 )
518*     ..
519*     .. External Functions ..
520      LOGICAL            LSAME
521      INTEGER            ICEIL, INDXG2P, NUMROC, PJLAENV
522      DOUBLE PRECISION   PDLAMCH
523      EXTERNAL           LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PDLAMCH
524*     ..
525*     .. External Subroutines ..
526      EXTERNAL           BLACS_GRIDINFO, CHK1MAT, DGEBR2D, DGEBS2D,
527     $                   DSCAL, PCHK1MAT, PCHK2MAT, PXERBLA, PZHEEVX,
528     $                   PZHENGST, PZPOTRF, PZTRMM, PZTRSM
529*     ..
530*     .. Intrinsic Functions ..
531      INTRINSIC          ABS, DBLE, DCMPLX, ICHAR, INT, MAX, MIN, MOD,
532     $                   SQRT
533*     ..
534*     .. Executable Statements ..
535*       This is just to keep ftnchek and toolpack/1 happy
536      IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
537     $    RSRC_.LT.0 )RETURN
538*
539*     Get grid parameters
540*
541      ICTXT = DESCA( CTXT_ )
542      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
543*
544*     Test the input parameters
545*
546      INFO = 0
547      IF( NPROW.EQ.-1 ) THEN
548         INFO = -( 900+CTXT_ )
549      ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN
550         INFO = -( 1300+CTXT_ )
551      ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN
552         INFO = -( 2600+CTXT_ )
553      ELSE
554*
555*     Get machine constants.
556*
557         EPS = PDLAMCH( DESCA( CTXT_ ), 'Precision' )
558*
559         WANTZ = LSAME( JOBZ, 'V' )
560         UPPER = LSAME( UPLO, 'U' )
561         ALLEIG = LSAME( RANGE, 'A' )
562         VALEIG = LSAME( RANGE, 'V' )
563         INDEIG = LSAME( RANGE, 'I' )
564         CALL CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO )
565         CALL CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO )
566         CALL CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO )
567         IF( INFO.EQ.0 ) THEN
568            IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
569               RWORK( 1 ) = ABSTOL
570               IF( VALEIG ) THEN
571                  RWORK( 2 ) = VL
572                  RWORK( 3 ) = VU
573               ELSE
574                  RWORK( 2 ) = ZERO
575                  RWORK( 3 ) = ZERO
576               END IF
577               CALL DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK,
578     $                       3 )
579            ELSE
580               CALL DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, 3,
581     $                       0, 0 )
582            END IF
583            IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
584     $              NPROW )
585            IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ),
586     $              NPROW )
587            IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
588     $              NPCOL )
589            IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
590     $              NPCOL )
591            IROFFA = MOD( IA-1, DESCA( MB_ ) )
592            ICOFFA = MOD( JA-1, DESCA( NB_ ) )
593            IROFFB = MOD( IB-1, DESCB( MB_ ) )
594            ICOFFB = MOD( JB-1, DESCB( NB_ ) )
595*
596*     Compute the total amount of space needed
597*
598            LQUERY = .FALSE.
599            IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 )
600     $         LQUERY = .TRUE.
601*
602            LIWMIN = 6*MAX( N, ( NPROW*NPCOL )+1, 4 )
603*
604            NB = DESCA( MB_ )
605            NN = MAX( N, NB, 2 )
606            NP0 = NUMROC( NN, NB, 0, 0, NPROW )
607*
608            IF( ( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) )
609     $           THEN
610               LWMIN = N + MAX( NB*( NP0+1 ), 3 )
611               LWOPT = LWMIN
612               LRWMIN = 5*NN + 4*N
613               IF( WANTZ ) THEN
614                  MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL )
615                  LRWOPT = 4*N + MAX( 5*NN, NP0*MQ0 )
616               ELSE
617                  LRWOPT = LRWMIN
618               END IF
619               NEIG = 0
620            ELSE
621               IF( ALLEIG .OR. VALEIG ) THEN
622                  NEIG = N
623               ELSE IF( INDEIG ) THEN
624                  NEIG = IU - IL + 1
625               END IF
626               MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
627               LWMIN = N + ( NP0+MQ0+NB )*NB
628               LWOPT = LWMIN
629               LRWMIN = 4*N + MAX( 5*NN, NP0*MQ0 ) +
630     $                  ICEIL( NEIG, NPROW*NPCOL )*NN
631               LRWOPT = LRWMIN
632*
633            END IF
634*
635*           Compute how much workspace is needed to use the
636*           new TRD and GST algorithms
637*
638            ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
639            SQNPC = INT( SQRT( DBLE( NPROW*NPCOL ) ) )
640            NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
641            NHETRD_LWOPT = 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS+4 )*NPS
642            NB = DESCA( MB_ )
643            NP0 = NUMROC( N, NB, 0, 0, NPROW )
644            NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
645            NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
646            LWOPT = MAX( LWOPT, N+NHETRD_LWOPT, NHEGST_LWOPT )
647*
648*       Version 1.0 Limitations
649*
650            IF( IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN
651               INFO = -1
652            ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
653               INFO = -2
654            ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
655               INFO = -3
656            ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
657               INFO = -4
658            ELSE IF( N.LT.0 ) THEN
659               INFO = -5
660            ELSE IF( IROFFA.NE.0 ) THEN
661               INFO = -7
662            ELSE IF( ICOFFA.NE.0 ) THEN
663               INFO = -8
664            ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
665               INFO = -( 900+NB_ )
666            ELSE IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN
667               INFO = -( 1300+M_ )
668            ELSE IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN
669               INFO = -( 1300+N_ )
670            ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN
671               INFO = -( 1300+MB_ )
672            ELSE IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN
673               INFO = -( 1300+NB_ )
674            ELSE IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN
675               INFO = -( 1300+RSRC_ )
676            ELSE IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN
677               INFO = -( 1300+CSRC_ )
678            ELSE IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN
679               INFO = -( 1300+CTXT_ )
680            ELSE IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN
681               INFO = -( 2200+M_ )
682            ELSE IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN
683               INFO = -( 2200+N_ )
684            ELSE IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN
685               INFO = -( 2200+MB_ )
686            ELSE IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN
687               INFO = -( 2200+NB_ )
688            ELSE IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN
689               INFO = -( 2200+RSRC_ )
690            ELSE IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN
691               INFO = -( 2200+CSRC_ )
692            ELSE IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN
693               INFO = -( 2200+CTXT_ )
694            ELSE IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN
695               INFO = -11
696            ELSE IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN
697               INFO = -12
698            ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
699               INFO = -15
700            ELSE IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) )
701     $                THEN
702               INFO = -16
703            ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) )
704     $                THEN
705               INFO = -17
706            ELSE IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS*
707     $               ABS( VL ) ) ) THEN
708               INFO = -14
709            ELSE IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS*
710     $               ABS( VU ) ) ) THEN
711               INFO = -15
712            ELSE IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS*
713     $               ABS( ABSTOL ) ) THEN
714               INFO = -18
715            ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
716               INFO = -28
717            ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
718               INFO = -30
719            ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
720               INFO = -32
721            END IF
722         END IF
723         IDUM1( 1 ) = IBTYPE
724         IDUM2( 1 ) = 1
725         IF( WANTZ ) THEN
726            IDUM1( 2 ) = ICHAR( 'V' )
727         ELSE
728            IDUM1( 2 ) = ICHAR( 'N' )
729         END IF
730         IDUM2( 2 ) = 2
731         IF( UPPER ) THEN
732            IDUM1( 3 ) = ICHAR( 'U' )
733         ELSE
734            IDUM1( 3 ) = ICHAR( 'L' )
735         END IF
736         IDUM2( 3 ) = 3
737         IF( ALLEIG ) THEN
738            IDUM1( 4 ) = ICHAR( 'A' )
739         ELSE IF( INDEIG ) THEN
740            IDUM1( 4 ) = ICHAR( 'I' )
741         ELSE
742            IDUM1( 4 ) = ICHAR( 'V' )
743         END IF
744         IDUM2( 4 ) = 4
745         IF( LQUERY ) THEN
746            IDUM1( 5 ) = -1
747         ELSE
748            IDUM1( 5 ) = 1
749         END IF
750         IDUM2( 5 ) = 5
751         CALL PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB,
752     $                  JB, DESCB, 13, 5, IDUM1, IDUM2, INFO )
753         CALL PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2,
754     $                  INFO )
755      END IF
756*
757      IWORK( 1 ) = LIWMIN
758      WORK( 1 ) = DCMPLX( DBLE( LWOPT ) )
759      RWORK( 1 ) = DBLE( LRWOPT )
760*
761      IF( INFO.NE.0 ) THEN
762         CALL PXERBLA( ICTXT, 'PZHEGVX ', -INFO )
763         RETURN
764      ELSE IF( LQUERY ) THEN
765         RETURN
766      END IF
767*
768*     Form a Cholesky factorization of sub( B ).
769*
770      CALL PZPOTRF( UPLO, N, B, IB, JB, DESCB, INFO )
771      IF( INFO.NE.0 ) THEN
772         IWORK( 1 ) = LIWMIN
773         WORK( 1 ) = DCMPLX( DBLE( LWOPT ) )
774         RWORK( 1 ) = DBLE( LRWOPT )
775         IFAIL( 1 ) = INFO
776         INFO = IERRNPD
777         RETURN
778      END IF
779*
780*     Transform problem to standard eigenvalue problem and solve.
781*
782      CALL PZHENGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB,
783     $               DESCB, SCALE, WORK, LWORK, INFO )
784      CALL PZHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL,
785     $              IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK,
786     $              LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, ICLUSTR,
787     $              GAP, INFO )
788*
789      IF( WANTZ ) THEN
790*
791*        Backtransform eigenvectors to the original problem.
792*
793         NEIG = M
794         IF( IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN
795*
796*           For sub( A )*x=(lambda)*sub( B )*x and
797*           sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
798*           x = inv(L)'*y or inv(U)*y
799*
800            IF( UPPER ) THEN
801               TRANS = 'N'
802            ELSE
803               TRANS = 'C'
804            END IF
805*
806            CALL PZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
807     $                   B, IB, JB, DESCB, Z, IZ, JZ, DESCZ )
808*
809         ELSE IF( IBTYPE.EQ.3 ) THEN
810*
811*           For sub( B )*sub( A )*x=(lambda)*x;
812*           backtransform eigenvectors: x = L*y or U'*y
813*
814            IF( UPPER ) THEN
815               TRANS = 'C'
816            ELSE
817               TRANS = 'N'
818            END IF
819*
820            CALL PZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
821     $                   B, IB, JB, DESCB, Z, IZ, JZ, DESCZ )
822         END IF
823      END IF
824*
825      IF( SCALE.NE.ONE ) THEN
826         CALL DSCAL( N, SCALE, W, 1 )
827      END IF
828*
829      IWORK( 1 ) = LIWMIN
830      WORK( 1 ) = DCMPLX( DBLE( LWOPT ) )
831      RWORK( 1 ) = DBLE( LRWOPT )
832      RETURN
833*
834*     End of PZHEGVX
835*
836      END
837