1      SUBROUTINE PSSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ,
2     $                    JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL,
3     $                    ICLUSTR, GAP, INFO )
4*
5*  -- ScaLAPACK routine (version 1.7) --
6*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7*     and University of California, Berkeley.
8*     November 15, 1997
9*
10*     .. Scalar Arguments ..
11      INTEGER            INFO, IZ, JZ, LIWORK, LWORK, M, N
12      REAL               ORFAC
13*     ..
14*     .. Array Arguments ..
15      INTEGER            DESCZ( * ), IBLOCK( * ), ICLUSTR( * ),
16     $                   IFAIL( * ), ISPLIT( * ), IWORK( * )
17      REAL               D( * ), E( * ), GAP( * ), W( * ), WORK( * ),
18     $                   Z( * )
19*     ..
20*
21*  Purpose
22*  =======
23*
24*  PSSTEIN computes the eigenvectors of a symmetric tridiagonal matrix
25*  in parallel, using inverse iteration. The eigenvectors found
26*  correspond to user specified eigenvalues. PSSTEIN does not
27*  orthogonalize vectors that are on different processes. The extent
28*  of orthogonalization is controlled by the input parameter LWORK.
29*  Eigenvectors that are to be orthogonalized are computed by the same
30*  process. PSSTEIN decides on the allocation of work among the
31*  processes and then calls SSTEIN2 (modified LAPACK routine) on each
32*  individual process. If insufficient workspace is allocated, the
33*  expected orthogonalization may not be done.
34*
35*  Note : If the eigenvectors obtained are not orthogonal, increase
36*         LWORK and run the code again.
37*
38*  Notes
39*  =====
40*
41*
42*  Each global data object is described by an associated description
43*  vector.  This vector stores the information required to establish
44*  the mapping between an object element and its corresponding process
45*  and memory location.
46*
47*  Let A be a generic term for any 2D block cyclicly distributed array.
48*  Such a global array has an associated description vector DESCA.
49*  In the following comments, the character _ should be read as
50*  "of the global array".
51*
52*  NOTATION        STORED IN      EXPLANATION
53*  --------------- -------------- --------------------------------------
54*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
55*                                 DTYPE_A = 1.
56*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
57*                                 the BLACS process grid A is distribu-
58*                                 ted over. The context itself is glo-
59*                                 bal, but the handle (the integer
60*                                 value) may vary.
61*  M_A    (global) DESCA( M_ )    The number of rows in the global
62*                                 array A.
63*  N_A    (global) DESCA( N_ )    The number of columns in the global
64*                                 array A.
65*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
66*                                 the rows of the array.
67*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
68*                                 the columns of the array.
69*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
70*                                 row of the array A is distributed.
71*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
72*                                 first column of the array A is
73*                                 distributed.
74*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
75*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
76*
77*  Let K be the number of rows or columns of a distributed matrix,
78*  and assume that its process grid has dimension r x c.
79*  LOCr( K ) denotes the number of elements of K that a process
80*  would receive if K were distributed over the r processes of its
81*  process column.
82*  Similarly, LOCc( K ) denotes the number of elements of K that a
83*  process would receive if K were distributed over the c processes of
84*  its process row.
85*  The values of LOCr() and LOCc() may be determined via a call to the
86*  ScaLAPACK tool function, NUMROC:
87*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
88*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
89*  An upper bound for these quantities may be computed by:
90*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
91*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
92*
93*
94*  Arguments
95*  =========
96*
97*          P = NPROW * NPCOL is the total number of processes
98*
99*  N       (global input) INTEGER
100*          The order of the tridiagonal matrix T.  N >= 0.
101*
102*  D       (global input) REAL array, dimension (N)
103*          The n diagonal elements of the tridiagonal matrix T.
104*
105*  E       (global input) REAL array, dimension (N-1)
106*          The (n-1) off-diagonal elements of the tridiagonal matrix T.
107*
108*  M       (global input) INTEGER
109*          The total number of eigenvectors to be found. 0 <= M <= N.
110*
111*  W       (global input/global output) REAL array, dim (M)
112*          On input, the first M elements of W contain all the
113*          eigenvalues for which eigenvectors are to be computed. The
114*          eigenvalues should be grouped by split-off block and ordered
115*          from smallest to largest within the block (The output array
116*          W from PSSTEBZ with ORDER='b' is expected here). This
117*          array should be replicated on all processes.
118*          On output, the first M elements contain the input
119*          eigenvalues in ascending order.
120*
121*          Note : To obtain orthogonal vectors, it is best if
122*          eigenvalues are computed to highest accuracy ( this can be
123*          done by setting ABSTOL to the underflow threshold =
124*          SLAMCH('U') --- ABSTOL is an input parameter
125*          to PSSTEBZ )
126*
127*  IBLOCK  (global input) INTEGER array, dimension (N)
128*          The submatrix indices associated with the corresponding
129*          eigenvalues in W -- 1 for eigenvalues belonging to the
130*          first submatrix from the top, 2 for those belonging to
131*          the second submatrix, etc. (The output array IBLOCK
132*          from PSSTEBZ is expected here).
133*
134*  ISPLIT  (global input) INTEGER array, dimension (N)
135*          The splitting points, at which T breaks up into submatrices.
136*          The first submatrix consists of rows/columns 1 to ISPLIT(1),
137*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2),
138*          etc., and the NSPLIT-th consists of rows/columns
139*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N (The output array
140*          ISPLIT from PSSTEBZ is expected here.)
141*
142*  ORFAC   (global input) REAL
143*          ORFAC specifies which eigenvectors should be orthogonalized.
144*          Eigenvectors that correspond to eigenvalues which are within
145*          ORFAC*||T|| of each other are to be orthogonalized.
146*          However, if the workspace is insufficient (see LWORK), this
147*          tolerance may be decreased until all eigenvectors to be
148*          orthogonalized can be stored in one process.
149*          No orthogonalization will be done if ORFAC equals zero.
150*          A default value of 10^-3 is used if ORFAC is negative.
151*          ORFAC should be identical on all processes.
152*
153*  Z       (local output) REAL array,
154*          dimension (DESCZ(DLEN_), N/npcol + NB)
155*          Z contains the computed eigenvectors associated with the
156*          specified eigenvalues. Any vector which fails to converge is
157*          set to its current iterate after MAXITS iterations ( See
158*          SSTEIN2 ).
159*          On output, Z is distributed across the P processes in block
160*          cyclic format.
161*
162*  IZ      (global input) INTEGER
163*          Z's global row index, which points to the beginning of the
164*          submatrix which is to be operated on.
165*
166*  JZ      (global input) INTEGER
167*          Z's global column index, which points to the beginning of
168*          the submatrix which is to be operated on.
169*
170*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
171*          The array descriptor for the distributed matrix Z.
172*
173*  WORK    (local workspace/global output) REAL array,
174*          dimension ( LWORK )
175*          On output, WORK(1) gives a lower bound on the
176*          workspace ( LWORK ) that guarantees the user desired
177*          orthogonalization (see ORFAC).
178*          Note that this may overestimate the minimum workspace needed.
179*
180*  LWORK   (local input) integer
181*          LWORK  controls the extent of orthogonalization which can be
182*          done. The number of eigenvectors for which storage is
183*          allocated on each process is
184*                NVEC = floor(( LWORK- max(5*N,NP00*MQ00) )/N).
185*          Eigenvectors corresponding to eigenvalue clusters of size
186*          NVEC - ceil(M/P) + 1 are guaranteed to be orthogonal ( the
187*          orthogonality is similar to that obtained from SSTEIN2).
188*          Note : LWORK  must be no smaller than:
189*               max(5*N,NP00*MQ00) + ceil(M/P)*N,
190*          and should have the same input value on all processes.
191*          It is the minimum value of LWORK input on different processes
192*          that is significant.
193*
194*          If LWORK = -1, then LWORK is global input and a workspace
195*          query is assumed; the routine only calculates the minimum
196*          and optimal size for all work arrays. Each of these
197*          values is returned in the first entry of the corresponding
198*          work array, and no error message is issued by PXERBLA.
199*
200*  IWORK   (local workspace/global output) INTEGER array,
201*          dimension ( 3*N+P+1 )
202*          On return, IWORK(1) contains the amount of integer workspace
203*          required.
204*          On return, the IWORK(2) through IWORK(P+2) indicate
205*          the eigenvectors computed by each process. Process I computes
206*          eigenvectors indexed IWORK(I+2)+1 thru' IWORK(I+3).
207*
208*  LIWORK  (local input) INTEGER
209*          Size of array IWORK.  Must be >= 3*N + P + 1
210*
211*          If LIWORK = -1, then LIWORK is global input and a workspace
212*          query is assumed; the routine only calculates the minimum
213*          and optimal size for all work arrays. Each of these
214*          values is returned in the first entry of the corresponding
215*          work array, and no error message is issued by PXERBLA.
216*
217*  IFAIL   (global output) integer array, dimension (M)
218*          On normal exit, all elements of IFAIL are zero.
219*          If one or more eigenvectors fail to converge after MAXITS
220*          iterations (as in SSTEIN), then INFO > 0 is returned.
221*          If mod(INFO,M+1)>0, then
222*          for I=1 to mod(INFO,M+1), the eigenvector
223*          corresponding to the eigenvalue W(IFAIL(I)) failed to
224*          converge ( W refers to the array of eigenvalues on output ).
225*
226*  ICLUSTR (global output) integer array, dimension (2*P)
227*          This output array contains indices of eigenvectors
228*          corresponding to a cluster of eigenvalues that could not be
229*          orthogonalized due to insufficient workspace (see LWORK,
230*          ORFAC and INFO). Eigenvectors corresponding to clusters of
231*          eigenvalues indexed ICLUSTR(2*I-1) to ICLUSTR(2*I), I = 1 to
232*          INFO/(M+1), could not be orthogonalized due to lack of
233*          workspace. Hence the eigenvectors corresponding to these
234*          clusters may not be orthogonal. ICLUSTR is a zero terminated
235*          array --- ( ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0 )
236*          if and only if K is the number of clusters.
237*
238*  GAP     (global output) REAL array, dimension (P)
239*          This output array contains the gap between eigenvalues whose
240*          eigenvectors could not be orthogonalized. The INFO/M output
241*          values in this array correspond to the INFO/(M+1) clusters
242*          indicated by the array ICLUSTR. As a result, the dot product
243*          between eigenvectors corresponding to the I^th cluster may be
244*          as high as ( O(n)*macheps ) / GAP(I).
245*
246*  INFO    (global output) INTEGER
247*          = 0:  successful exit
248*          < 0:  If the i-th argument is an array and the j-entry had
249*                an illegal value, then INFO = -(i*100+j), if the i-th
250*                argument is a scalar and had an illegal value, then
251*                INFO = -i.
252*          < 0 :  if INFO = -I, the I-th argument had an illegal value
253*          > 0 :  if mod(INFO,M+1) = I, then I eigenvectors failed to
254*                 converge in MAXITS iterations. Their indices are
255*                 stored in the array IFAIL.
256*                 if INFO/(M+1) = I, then eigenvectors corresponding to
257*                 I clusters of eigenvalues could not be orthogonalized
258*                 due to insufficient workspace. The indices of the
259*                 clusters are stored in the array ICLUSTR.
260*
261*  =====================================================================
262*
263*     .. Intrinsic Functions ..
264      INTRINSIC          ABS, MAX, MIN, MOD, REAL
265*     ..
266*     .. External Functions ..
267      INTEGER            ICEIL, NUMROC
268      EXTERNAL           ICEIL, NUMROC
269*     ..
270*     .. External Subroutines ..
271      EXTERNAL           BLACS_GRIDINFO, CHK1MAT, IGAMN2D, IGEBR2D,
272     $                   IGEBS2D, PCHK1MAT, PSLAEVSWP, PXERBLA, SGEBR2D,
273     $                   SGEBS2D, SLASRT2, SSTEIN2
274*     ..
275*     .. Parameters ..
276      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
277     $                   MB_, NB_, RSRC_, CSRC_, LLD_
278      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
279     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
280     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
281      REAL               ZERO, NEGONE, ODM1, FIVE, ODM3, ODM18
282      PARAMETER          ( ZERO = 0.0E+0, NEGONE = -1.0E+0,
283     $                   ODM1 = 1.0E-1, FIVE = 5.0E+0, ODM3 = 1.0E-3,
284     $                   ODM18 = 1.0E-18 )
285*     ..
286*     .. Local Scalars ..
287      LOGICAL            LQUERY, SORTED
288      INTEGER            B1, BN, BNDRY, CLSIZ, COL, I, IFIRST, IINFO,
289     $                   ILAST, IM, INDRW, ITMP, J, K, LGCLSIZ, LLWORK,
290     $                   LOAD, LOCINFO, MAXVEC, MQ00, MYCOL, MYROW,
291     $                   NBLK, NERR, NEXT, NP00, NPCOL, NPROW, NVS,
292     $                   OLNBLK, P, ROW, SELF, TILL, TOTERR
293      REAL               DIFF, MINGAP, ONENRM, ORGFAC, ORTOL, TMPFAC
294*     ..
295*     .. Local Arrays ..
296      INTEGER            IDUM1( 1 ), IDUM2( 1 )
297*     ..
298*     .. Executable Statements ..
299*       This is just to keep ftnchek happy
300      IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
301     $    RSRC_.LT.0 )RETURN
302*
303      CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL )
304      SELF = MYROW*NPCOL + MYCOL
305*
306*     Make sure that we belong to this context (before calling PCHK1MAT)
307*
308      INFO = 0
309      IF( NPROW.EQ.-1 ) THEN
310         INFO = -( 1200+CTXT_ )
311      ELSE
312*
313*        Make sure that NPROW>0 and NPCOL>0 before calling NUMROC
314*
315         CALL CHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, INFO )
316         IF( INFO.EQ.0 ) THEN
317*
318*           Now we know that our context is good enough to
319*           perform the rest of the checks
320*
321            NP00 = NUMROC( N, DESCZ( MB_ ), 0, 0, NPROW )
322            MQ00 = NUMROC( M, DESCZ( NB_ ), 0, 0, NPCOL )
323            P = NPROW*NPCOL
324*
325*           Compute the maximum number of vectors per process
326*
327            LLWORK = LWORK
328            CALL IGAMN2D( DESCZ( CTXT_ ), 'A', ' ', 1, 1, LLWORK, 1, 1,
329     $                    1, -1, -1, -1 )
330            INDRW = MAX( 5*N, NP00*MQ00 )
331            IF( N.NE.0 )
332     $         MAXVEC = ( LLWORK-INDRW ) / N
333            LOAD = ICEIL( M, P )
334            IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
335               TMPFAC = ORFAC
336               CALL SGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC,
337     $                       1 )
338            ELSE
339               CALL SGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, TMPFAC,
340     $                       1, 0, 0 )
341            END IF
342*
343            LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
344            IF( M.LT.0 .OR. M.GT.N ) THEN
345               INFO = -4
346            ELSE IF( MAXVEC.LT.LOAD .AND. .NOT.LQUERY ) THEN
347               INFO = -14
348            ELSE IF( LIWORK.LT.3*N+P+1 .AND. .NOT.LQUERY ) THEN
349               INFO = -16
350            ELSE
351               DO 10 I = 2, M
352                  IF( IBLOCK( I ).LT.IBLOCK( I-1 ) ) THEN
353                     INFO = -6
354                     GO TO 20
355                  END IF
356                  IF( IBLOCK( I ).EQ.IBLOCK( I-1 ) .AND. W( I ).LT.
357     $                W( I-1 ) ) THEN
358                     INFO = -5
359                     GO TO 20
360                  END IF
361   10          CONTINUE
362   20          CONTINUE
363               IF( INFO.EQ.0 ) THEN
364                  IF( ABS( TMPFAC-ORFAC ).GT.FIVE*ABS( TMPFAC ) )
365     $               INFO = -8
366               END IF
367            END IF
368*
369         END IF
370         IDUM1( 1 ) = M
371         IDUM2( 1 ) = 4
372         CALL PCHK1MAT( N, 1, N, 1, IZ, JZ, DESCZ, 12, 1, IDUM1, IDUM2,
373     $                  INFO )
374         WORK( 1 ) = REAL( MAX( 5*N, NP00*MQ00 )+ICEIL( M, P )*N )
375         IWORK( 1 ) = 3*N + P + 1
376      END IF
377      IF( INFO.NE.0 ) THEN
378         CALL PXERBLA( DESCZ( CTXT_ ), 'PSSTEIN', -INFO )
379         RETURN
380      ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN
381         RETURN
382      END IF
383*
384      DO 30 I = 1, M
385         IFAIL( I ) = 0
386   30 CONTINUE
387      DO 40 I = 1, P + 1
388         IWORK( I ) = 0
389   40 CONTINUE
390      DO 50 I = 1, P
391         GAP( I ) = NEGONE
392         ICLUSTR( 2*I-1 ) = 0
393         ICLUSTR( 2*I ) = 0
394   50 CONTINUE
395*
396*
397*     Quick return if possible
398*
399      IF( N.EQ.0 .OR. M.EQ.0 )
400     $   RETURN
401*
402      IF( ORFAC.GE.ZERO ) THEN
403         TMPFAC = ORFAC
404      ELSE
405         TMPFAC = ODM3
406      END IF
407      ORGFAC = TMPFAC
408*
409*     Allocate the work among the processes
410*
411      ILAST = M / LOAD
412      IF( MOD( M, LOAD ).EQ.0 )
413     $   ILAST = ILAST - 1
414      OLNBLK = -1
415      NVS = 0
416      NEXT = 1
417      IM = 0
418      ONENRM = ZERO
419      DO 100 I = 0, ILAST - 1
420         NEXT = NEXT + LOAD
421         J = NEXT - 1
422         IF( J.GT.NVS ) THEN
423            NBLK = IBLOCK( NEXT )
424            IF( NBLK.EQ.IBLOCK( NEXT-1 ) .AND. NBLK.NE.OLNBLK ) THEN
425*
426*              Compute orthogonalization criterion
427*
428               IF( NBLK.EQ.1 ) THEN
429                  B1 = 1
430               ELSE
431                  B1 = ISPLIT( NBLK-1 ) + 1
432               END IF
433               BN = ISPLIT( NBLK )
434*
435               ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) )
436               ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) )
437               DO 60 J = B1 + 1, BN - 1
438                  ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+
439     $                     ABS( E( J ) ) )
440   60          CONTINUE
441               OLNBLK = NBLK
442            END IF
443            TILL = NVS + MAXVEC
444   70       CONTINUE
445            J = NEXT - 1
446            IF( TMPFAC.GT.ODM18 ) THEN
447               ORTOL = TMPFAC*ONENRM
448               DO 80 J = NEXT - 1, MIN( TILL, M-1 )
449                  IF( IBLOCK( J+1 ).NE.IBLOCK( J ) .OR. W( J+1 )-
450     $                W( J ).GE.ORTOL ) THEN
451                     GO TO 90
452                  END IF
453   80          CONTINUE
454               IF( J.EQ.M .AND. TILL.GE.M )
455     $            GO TO 90
456               TMPFAC = TMPFAC*ODM1
457               GO TO 70
458            END IF
459   90       CONTINUE
460            J = MIN( J, TILL )
461         END IF
462         IF( SELF.EQ.I )
463     $      IM = MAX( 0, J-NVS )
464*
465         IWORK( I+1 ) = NVS
466         NVS = MAX( J, NVS )
467  100 CONTINUE
468      IF( SELF.EQ.ILAST )
469     $   IM = M - NVS
470      IWORK( ILAST+1 ) = NVS
471      DO 110 I = ILAST + 2, P + 1
472         IWORK( I ) = M
473  110 CONTINUE
474*
475      CLSIZ = 1
476      LGCLSIZ = 1
477      ILAST = 0
478      NBLK = 0
479      BNDRY = 2
480      K = 1
481      DO 140 I = 1, M
482         IF( IBLOCK( I ).NE.NBLK ) THEN
483            NBLK = IBLOCK( I )
484            IF( NBLK.EQ.1 ) THEN
485               B1 = 1
486            ELSE
487               B1 = ISPLIT( NBLK-1 ) + 1
488            END IF
489            BN = ISPLIT( NBLK )
490*
491            ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) )
492            ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) )
493            DO 120 J = B1 + 1, BN - 1
494               ONENRM = MAX( ONENRM, ABS( D( J ) )+ABS( E( J-1 ) )+
495     $                  ABS( E( J ) ) )
496  120       CONTINUE
497*
498         END IF
499         IF( I.GT.1 ) THEN
500            DIFF = W( I ) - W( I-1 )
501            IF( IBLOCK( I ).NE.IBLOCK( I-1 ) .OR. I.EQ.M .OR. DIFF.GT.
502     $          ORGFAC*ONENRM ) THEN
503               IFIRST = ILAST
504               IF( I.EQ.M ) THEN
505                  IF( IBLOCK( M ).NE.IBLOCK( M-1 ) .OR. DIFF.GT.ORGFAC*
506     $                ONENRM ) THEN
507                     ILAST = M - 1
508                  ELSE
509                     ILAST = M
510                  END IF
511               ELSE
512                  ILAST = I - 1
513               END IF
514               CLSIZ = ILAST - IFIRST
515               IF( CLSIZ.GT.1 ) THEN
516                  IF( LGCLSIZ.LT.CLSIZ )
517     $               LGCLSIZ = CLSIZ
518                  MINGAP = ONENRM
519  130             CONTINUE
520                  IF( BNDRY.GT.P+1 )
521     $               GO TO 150
522                  IF( IWORK( BNDRY ).GT.IFIRST .AND. IWORK( BNDRY ).LT.
523     $                ILAST ) THEN
524                     MINGAP = MIN( W( IWORK( BNDRY )+1 )-
525     $                        W( IWORK( BNDRY ) ), MINGAP )
526                  ELSE IF( IWORK( BNDRY ).GE.ILAST ) THEN
527                     IF( MINGAP.LT.ONENRM ) THEN
528                        ICLUSTR( 2*K-1 ) = IFIRST + 1
529                        ICLUSTR( 2*K ) = ILAST
530                        GAP( K ) = MINGAP / ONENRM
531                        K = K + 1
532                     END IF
533                     GO TO 140
534                  END IF
535                  BNDRY = BNDRY + 1
536                  GO TO 130
537               END IF
538            END IF
539         END IF
540  140 CONTINUE
541  150 CONTINUE
542      INFO = ( K-1 )*( M+1 )
543*
544*     Call SSTEIN2 to find the eigenvectors
545*
546      CALL SSTEIN2( N, D, E, IM, W( IWORK( SELF+1 )+1 ),
547     $              IBLOCK( IWORK( SELF+1 )+1 ), ISPLIT, ORGFAC,
548     $              WORK( INDRW+1 ), N, WORK, IWORK( P+2 ),
549     $              IFAIL( IWORK( SELF+1 )+1 ), LOCINFO )
550*
551*     Redistribute the eigenvector matrix to conform with the block
552*     cyclic distribution of the input matrix
553*
554*
555      DO 160 I = 1, M
556         IWORK( P+1+I ) = I
557  160 CONTINUE
558*
559      CALL SLASRT2( 'I', M, W, IWORK( P+2 ), IINFO )
560*
561      DO 170 I = 1, M
562         IWORK( M+P+1+IWORK( P+1+I ) ) = I
563  170 CONTINUE
564*
565*
566      DO 180 I = 1, LOCINFO
567         ITMP = IWORK( SELF+1 ) + I
568         IFAIL( ITMP ) = IFAIL( ITMP ) + ITMP - I
569         IFAIL( ITMP ) = IWORK( M+P+1+IFAIL( ITMP ) )
570  180 CONTINUE
571*
572      DO 190 I = 1, K - 1
573         ICLUSTR( 2*I-1 ) = IWORK( M+P+1+ICLUSTR( 2*I-1 ) )
574         ICLUSTR( 2*I ) = IWORK( M+P+1+ICLUSTR( 2*I ) )
575  190 CONTINUE
576*
577*
578* Still need to apply the above permutation to IFAIL
579*
580*
581      TOTERR = 0
582      DO 210 I = 1, P
583         IF( SELF.EQ.I-1 ) THEN
584            CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, LOCINFO, 1 )
585            IF( LOCINFO.NE.0 ) THEN
586               CALL IGEBS2D( DESCZ( CTXT_ ), 'ALL', ' ', LOCINFO, 1,
587     $                       IFAIL( IWORK( I )+1 ), LOCINFO )
588               DO 200 J = 1, LOCINFO
589                  IFAIL( TOTERR+J ) = IFAIL( IWORK( I )+J )
590  200          CONTINUE
591               TOTERR = TOTERR + LOCINFO
592            END IF
593         ELSE
594*
595            ROW = ( I-1 ) / NPCOL
596            COL = MOD( I-1, NPCOL )
597*
598            CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', 1, 1, NERR, 1,
599     $                    ROW, COL )
600            IF( NERR.NE.0 ) THEN
601               CALL IGEBR2D( DESCZ( CTXT_ ), 'ALL', ' ', NERR, 1,
602     $                       IFAIL( TOTERR+1 ), NERR, ROW, COL )
603               TOTERR = TOTERR + NERR
604            END IF
605         END IF
606  210 CONTINUE
607      INFO = INFO + TOTERR
608*
609*
610      CALL PSLAEVSWP( N, WORK( INDRW+1 ), N, Z, IZ, JZ, DESCZ, IWORK,
611     $                IWORK( M+P+2 ), WORK, INDRW )
612*
613      DO 220 I = 2, P
614         IWORK( I ) = IWORK( M+P+1+IWORK( I ) )
615  220 CONTINUE
616*
617*
618*     Sort the IWORK array
619*
620*
621  230 CONTINUE
622      SORTED = .TRUE.
623      DO 240 I = 2, P - 1
624         IF( IWORK( I ).GT.IWORK( I+1 ) ) THEN
625            ITMP = IWORK( I+1 )
626            IWORK( I+1 ) = IWORK( I )
627            IWORK( I ) = ITMP
628            SORTED = .FALSE.
629         END IF
630  240 CONTINUE
631      IF( .NOT.SORTED )
632     $   GO TO 230
633*
634      DO 250 I = P + 1, 1, -1
635         IWORK( I+1 ) = IWORK( I )
636  250 CONTINUE
637*
638      WORK( 1 ) = ( LGCLSIZ+LOAD-1 )*N + INDRW
639      IWORK( 1 ) = 3*N + P + 1
640*
641*     End of PSSTEIN
642*
643      END
644