1      RECURSIVE SUBROUTINE PSLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H,
2     $                              DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND,
3     $                              SR, SI, V, DESCV, NH, T, DESCT, NV,
4     $                              WV, DESCW, WORK, LWORK, IWORK,
5     $                              LIWORK, RECLEVEL )
6*
7*     Contribution from the Department of Computing Science and HPC2N,
8*     Umea University, Sweden
9*
10*  -- ScaLAPACK auxiliary routine (version 2.0.1) --
11*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12*     Univ. of Colorado Denver and University of California, Berkeley.
13*     January, 2012
14*
15      IMPLICIT NONE
16*
17*     .. Scalar Arguments ..
18      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LWORK, N, ND, NH, NS,
19     $                   NV, NW, LIWORK, RECLEVEL
20      LOGICAL            WANTT, WANTZ
21*     ..
22*     .. Array Arguments ..
23      INTEGER            DESCH( * ), DESCZ( * ), DESCT( * ), DESCV( * ),
24     $                   DESCW( * ), IWORK( * )
25      REAL               H( * ), SI( KBOT ), SR( KBOT ), T( * ),
26     $                   V( * ), WORK( * ), WV( * ),
27     $                   Z( * )
28*     ..
29*
30*  Purpose
31*  =======
32*
33*  Aggressive early deflation:
34*
35*  This subroutine accepts as input an upper Hessenberg matrix H and
36*  performs an orthogonal similarity transformation designed to detect
37*  and deflate fully converged eigenvalues from a trailing principal
38*  submatrix.  On output H has been overwritten by a new Hessenberg
39*  matrix that is a perturbation of an orthogonal similarity
40*  transformation of H.  It is to be hoped that the final version of H
41*  has many zero subdiagonal entries.
42*
43*  Notes
44*  =====
45*
46*  Each global data object is described by an associated description
47*  vector.  This vector stores the information required to establish
48*  the mapping between an object element and its corresponding process
49*  and memory location.
50*
51*  Let A be a generic term for any 2D block cyclicly distributed array.
52*  Such a global array has an associated description vector DESCA.
53*  In the following comments, the character _ should be read as
54*  "of the global array".
55*
56*  NOTATION        STORED IN      EXPLANATION
57*  --------------- -------------- --------------------------------------
58*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
59*                                 DTYPE_A = 1.
60*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61*                                 the BLACS process grid A is distribu-
62*                                 ted over. The context itself is glo-
63*                                 bal, but the handle (the integer
64*                                 value) may vary.
65*  M_A    (global) DESCA( M_ )    The number of rows in the global
66*                                 array A.
67*  N_A    (global) DESCA( N_ )    The number of columns in the global
68*                                 array A.
69*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
70*                                 the rows of the array.
71*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
72*                                 the columns of the array.
73*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74*                                 row of the array A is distributed.
75*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76*                                 first column of the array A is
77*                                 distributed.
78*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
79*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
80*
81*  Let K be the number of rows or columns of a distributed matrix,
82*  and assume that its process grid has dimension p x q.
83*  LOCr( K ) denotes the number of elements of K that a process
84*  would receive if K were distributed over the p processes of its
85*  process column.
86*  Similarly, LOCc( K ) denotes the number of elements of K that a
87*  process would receive if K were distributed over the q processes of
88*  its process row.
89*  The values of LOCr() and LOCc() may be determined via a call to the
90*  ScaLAPACK tool function, NUMROC:
91*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93*  An upper bound for these quantities may be computed by:
94*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97*  Arguments
98*  =========
99*
100*  WANTT   (global input) LOGICAL
101*          If .TRUE., then the Hessenberg matrix H is fully updated
102*          so that the quasi-triangular Schur factor may be
103*          computed (in cooperation with the calling subroutine).
104*          If .FALSE., then only enough of H is updated to preserve
105*          the eigenvalues.
106*
107*  WANTZ   (global input) LOGICAL
108*          If .TRUE., then the orthogonal matrix Z is updated so
109*          so that the orthogonal Schur factor may be computed
110*          (in cooperation with the calling subroutine).
111*          If .FALSE., then Z is not referenced.
112*
113*  N       (global input) INTEGER
114*          The order of the matrix H and (if WANTZ is .TRUE.) the
115*          order of the orthogonal matrix Z.
116*
117*  KTOP    (global input) INTEGER
118*          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119*          KBOT and KTOP together determine an isolated block
120*          along the diagonal of the Hessenberg matrix.
121*
122*  KBOT    (global input) INTEGER
123*          It is assumed without a check that either
124*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
125*          determine an isolated block along the diagonal of the
126*          Hessenberg matrix.
127*
128*  NW      (global input) INTEGER
129*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
130*
131*  H       (local input/output) REAL array, dimension
132*             (DESCH(LLD_),*)
133*          On input the initial N-by-N section of H stores the
134*          Hessenberg matrix undergoing aggressive early deflation.
135*          On output H has been transformed by an orthogonal
136*          similarity transformation, perturbed, and the returned
137*          to Hessenberg form that (it is to be hoped) has some
138*          zero subdiagonal entries.
139*
140*  DESCH   (global and local input) INTEGER array of dimension DLEN_.
141*          The array descriptor for the distributed matrix H.
142*
143*  ILOZ    (global input) INTEGER
144*  IHIZ    (global input) INTEGER
145*          Specify the rows of Z to which transformations must be
146*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147*
148*  Z       (input/output) REAL array, dimension
149*             (DESCH(LLD_),*)
150*          IF WANTZ is .TRUE., then on output, the orthogonal
151*          similarity transformation mentioned above has been
152*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153*          If WANTZ is .FALSE., then Z is unreferenced.
154*
155*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
156*          The array descriptor for the distributed matrix Z.
157*
158*  NS      (global output) INTEGER
159*          The number of unconverged (ie approximate) eigenvalues
160*          returned in SR and SI that may be used as shifts by the
161*          calling subroutine.
162*
163*  ND      (global output) INTEGER
164*          The number of converged eigenvalues uncovered by this
165*          subroutine.
166*
167*  SR      (global output) REAL array, dimension KBOT
168*  SI      (global output) REAL array, dimension KBOT
169*          On output, the real and imaginary parts of approximate
170*          eigenvalues that may be used for shifts are stored in
171*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173*          The real and imaginary parts of converged eigenvalues
174*          are stored in SR(KBOT-ND+1) through SR(KBOT) and
175*          SI(KBOT-ND+1) through SI(KBOT), respectively.
176*
177*  V       (global workspace) REAL array, dimension
178*             (DESCV(LLD_),*)
179*          An NW-by-NW distributed work array.
180*
181*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
182*          The array descriptor for the distributed matrix V.
183*
184*  NH      (input) INTEGER scalar
185*          The number of columns of T.  NH.GE.NW.
186*
187*  T       (global workspace) REAL array, dimension
188*             (DESCV(LLD_),*)
189*
190*  DESCT   (global and local input) INTEGER array of dimension DLEN_.
191*          The array descriptor for the distributed matrix T.
192*
193*  NV      (global input) INTEGER
194*          The number of rows of work array WV available for
195*          workspace.  NV.GE.NW.
196*
197*  WV      (global workspace) REAL array, dimension
198*             (DESCW(LLD_),*)
199*
200*  DESCW   (global and local input) INTEGER array of dimension DLEN_.
201*          The array descriptor for the distributed matrix WV.
202*
203*  WORK    (local workspace) REAL array, dimension LWORK.
204*          On exit, WORK(1) is set to an estimate of the optimal value
205*          of LWORK for the given values of N, NW, KTOP and KBOT.
206*
207*  LWORK   (local input) INTEGER
208*          The dimension of the work array WORK.  LWORK = 2*NW
209*          suffices, but greater efficiency may result from larger
210*          values of LWORK.
211*
212*          If LWORK = -1, then a workspace query is assumed; PSLAQR3
213*          only estimates the optimal workspace size for the given
214*          values of N, NW, KTOP and KBOT.  The estimate is returned
215*          in WORK(1).  No error message related to LWORK is issued
216*          by XERBLA.  Neither H nor Z are accessed.
217*
218*  IWORK   (local workspace) INTEGER array, dimension (LIWORK)
219*
220*  LIWORK  (local input) INTEGER
221*          The length of the workspace array IWORK
222*
223*  ================================================================
224*  Based on contributions by
225*        Robert Granat and Meiyue Shao,
226*        Department of Computing Science and HPC2N,
227*        Umea University, Sweden
228*
229*  ================================================================
230*     .. Parameters ..
231      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
232     $                   LLD_, MB_, M_, NB_, N_, RSRC_
233      INTEGER            RECMAX
234      LOGICAL            SORTGRAD
235      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
236     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
237     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9, RECMAX = 3,
238     $                     SORTGRAD = .FALSE. )
239      REAL               ZERO, ONE
240      PARAMETER          ( ZERO = 0.0, ONE = 1.0 )
241*     ..
242*     .. Local Scalars ..
243      REAL               AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
244     $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP,
245     $                   ELEM, ELEM1, ELEM2, ELEM3, R1, ANORM, RNORM,
246     $                   RESAED
247      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
248     $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
249     $                   LWKOPT, NMIN, LLDH, LLDZ, LLDT, LLDV, LLDWV,
250     $                   ICTXT, NPROW, NMAX, NPCOL, MYROW, MYCOL, NB,
251     $                   IROFFH, M, RCOLS, TAUROWS, RROWS, TAUCOLS,
252     $                   ITAU, IR, IPW, NPROCS, MLOC, IROFFHH,
253     $                   ICOFFHH, HHRSRC, HHCSRC, HHROWS, HHCOLS,
254     $                   IROFFZZ, ICOFFZZ, ZZRSRC, ZZCSRC, ZZROWS,
255     $                   ZZCOLS, IERR, TZROWS0, TZCOLS0, IERR0, IPT0,
256     $                   IPZ0, IPW0, NB2, ROUND, LILST, KK, LILST0,
257     $                   IWRK1, RSRC, CSRC, LWK4, LWK5, IWRK2, LWK6,
258     $                   LWK7, LWK8, ILWKOPT, TZROWS, TZCOLS, NSEL,
259     $                   NPMIN, ICTXT_NEW, MYROW_NEW, MYCOL_NEW
260      LOGICAL            BULGE, SORTED, LQUERY
261*     ..
262*     .. Local Arrays ..
263      INTEGER            PAR( 6 ), DESCR( DLEN_ ),
264     $                   DESCTAU( DLEN_ ), DESCHH( DLEN_ ),
265     $                   DESCZZ( DLEN_ ), DESCTZ0( DLEN_ ),
266     $                   PMAP( 64*64 )
267      REAL               DDUM( 1 )
268*     ..
269*     .. External Functions ..
270      REAL               SLAMCH, PSLANGE
271      INTEGER            PILAENVX, NUMROC, INDXG2P, ICEIL, BLACS_PNUM
272      EXTERNAL           SLAMCH, PILAENVX, NUMROC, INDXG2P, PSLANGE,
273     $                   ICEIL, BLACS_PNUM
274*     ..
275*     .. External Subroutines ..
276      EXTERNAL           PSCOPY, PSGEHRD, PSGEMM, SLABAD, PSLACPY,
277     $                   PSLAQR1, SLANV2, PSLAQR0, PSLARF, PSLARFG,
278     $                   PSLASET, PSTRORD, PSELGET, PSELSET,
279     $                   PSLAMVE, BLACS_GRIDINFO, BLACS_GRIDMAP,
280     $                   BLACS_GRIDEXIT, PSGEMR2D
281*     ..
282*     .. Intrinsic Functions ..
283      INTRINSIC          ABS, FLOAT, INT, MAX, MIN, SQRT
284*     ..
285*     .. Executable Statements ..
286      ICTXT = DESCH( CTXT_ )
287      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
288      NPROCS = NPROW*NPCOL
289*
290*     Extract local leading dimensions, blockfactors, offset for
291*     keeping the alignment requirements and size of deflation window.
292*
293      LLDH  = DESCH( LLD_ )
294      LLDZ  = DESCZ( LLD_ )
295      LLDT  = DESCT( LLD_ )
296      LLDV  = DESCV( LLD_ )
297      LLDWV = DESCW( LLD_ )
298      NB = DESCH( MB_ )
299      IROFFH = MOD( KTOP - 1, NB )
300      JW = MIN( NW, KBOT-KTOP+1 )
301      NSEL = NB+JW
302*
303*     Extract environment variables for parallel eigenvalue reordering.
304*
305      PAR(1) = PILAENVX(ICTXT, 17, 'PSLAQR3', 'SV', JW, NB, -1, -1)
306      PAR(2) = PILAENVX(ICTXT, 18, 'PSLAQR3', 'SV', JW, NB, -1, -1)
307      PAR(3) = PILAENVX(ICTXT, 19, 'PSLAQR3', 'SV', JW, NB, -1, -1)
308      PAR(4) = PILAENVX(ICTXT, 20, 'PSLAQR3', 'SV', JW, NB, -1, -1)
309      PAR(5) = PILAENVX(ICTXT, 21, 'PSLAQR3', 'SV', JW, NB, -1, -1)
310      PAR(6) = PILAENVX(ICTXT, 22, 'PSLAQR3', 'SV', JW, NB, -1, -1)
311*
312*     Check if workspace query.
313*
314      LQUERY = LWORK.EQ.-1 .OR. LIWORK.EQ.-1
315*
316*     Estimate optimal workspace.
317*
318      IF( JW.LE.2 ) THEN
319         LWKOPT = 1
320      ELSE
321*
322*        Workspace query calls to PSGEHRD and PSORMHR.
323*
324         TAUROWS = NUMROC( 1, 1, MYCOL, DESCV(RSRC_), NPROW )
325         TAUCOLS = NUMROC( JW+IROFFH, NB, MYCOL, DESCV(CSRC_),
326     $        NPCOL )
327         CALL PSGEHRD( JW, 1, JW, T, 1, 1, DESCT, WORK, WORK, -1,
328     $        INFO )
329         LWK1 = INT( WORK( 1 ) ) + TAUROWS*TAUCOLS
330*
331*        Workspace query call to PSORMHR.
332*
333         CALL PSORMHR( 'Right', 'No', JW, JW, 1, JW, T, 1, 1, DESCT,
334     $        WORK, V, 1, 1, DESCV, WORK, -1, INFO )
335         LWK2 = INT( WORK( 1 ) )
336*
337*        Workspace query call to PSLAQR0.
338*
339         NMIN = PILAENVX( ICTXT, 12, 'PSLAQR3', 'SV', JW, 1, JW, LWORK )
340         NMAX = ( N-1 ) / 3
341         IF( JW+IROFFH.GT.NMIN .AND. JW+IROFFH.LE.NMAX
342     $        .AND. RECLEVEL.LT.RECMAX ) THEN
343            CALL PSLAQR0( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
344     $           JW+IROFFH, T, DESCT, SR, SI, 1, JW, V, DESCV,
345     $           WORK, -1, IWORK, LIWORK-NSEL, INFQR,
346     $           RECLEVEL+1 )
347            LWK3 = INT( WORK( 1 ) )
348            IWRK1 = IWORK( 1 )
349         ELSE
350            RSRC = DESCT( RSRC_ )
351            CSRC = DESCT( CSRC_ )
352            DESCT( RSRC_ ) = 0
353            DESCT( CSRC_ ) = 0
354            CALL PSLAQR1( .TRUE., .TRUE., JW+IROFFH, 1, JW+IROFFH, T,
355     $           DESCT, SR, SI, 1, JW+IROFFH, V, DESCV, WORK, -1,
356     $           IWORK, LIWORK-NSEL, INFQR )
357            DESCT( RSRC_ ) = RSRC
358            DESCT( CSRC_ ) = CSRC
359            LWK3 = INT( WORK( 1 ) )
360            IWRK1 = IWORK( 1 )
361         END IF
362*
363*        Workspace in case of alignment problems.
364*
365         TZROWS0 = NUMROC( JW+IROFFH, NB, MYROW, 0, NPROW )
366         TZCOLS0 = NUMROC( JW+IROFFH, NB, MYCOL, 0, NPCOL )
367         LWK4 = 2 * TZROWS0*TZCOLS0
368*
369*        Workspace check for reordering.
370*
371         CALL PSTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1, 1,
372     $        DESCT, V, 1, 1, DESCV, DDUM, DDUM, MLOC, WORK, -1,
373     $        IWORK, LIWORK-NSEL, INFO )
374         LWK5 = INT( WORK( 1 ) )
375         IWRK2 = IWORK( 1 )
376*
377*        Extra workspace for reflecting back spike
378*        (workspace for PSLARF approximated for simplicity).
379*
380         RROWS =  NUMROC( N+IROFFH, NB, MYROW, DESCV(RSRC_), NPROW )
381         RCOLS =  NUMROC( 1, 1, MYCOL, DESCV(CSRC_), NPCOL )
382         LWK6 = RROWS*RCOLS + TAUROWS*TAUCOLS +
383     $        2*ICEIL(ICEIL(JW+IROFFH,NB),NPROW)*NB
384     $         *ICEIL(ICEIL(JW+IROFFH,NB),NPCOL)*NB
385*
386*        Extra workspace needed by PBLAS update calls
387*        (also estimated for simplicity).
388*
389         LWK7 = MAX( ICEIL(ICEIL(JW,NB),NPROW)*NB *
390     $               ICEIL(ICEIL(N-KBOT,NB),NPCOL)*NB,
391     $               ICEIL(ICEIL(IHIZ-ILOZ+1,NB),NPROW)*NB *
392     $               ICEIL(ICEIL(JW,NB),NPCOL)*NB,
393     $               ICEIL(ICEIL(KBOT-JW,NB),NPROW)*NB *
394     $               ICEIL(ICEIL(JW,NB),NPCOL)*NB )
395*
396*        Residual check workspace.
397*
398         TZROWS = NUMROC( JW+IROFFH, NB, MYROW, DESCT(RSRC_), NPROW )
399         TZCOLS = NUMROC( JW+IROFFH, NB, MYCOL, DESCT(CSRC_), NPCOL )
400         LWK8 = 2*TZROWS*TZCOLS
401*
402*        Optimal workspace.
403*
404         LWKOPT = MAX( LWK1, LWK2, LWK3+LWK4, LWK5, LWK6, LWK7, LWK8 )
405         ILWKOPT = MAX( IWRK1, IWRK2 )
406      END IF
407*
408*     Quick return in case of workspace query.
409*
410      WORK( 1 ) = FLOAT( LWKOPT )
411*
412*     IWORK(1:NSEL) is used as the array SELECT for PSTRORD.
413*
414      IWORK( 1 ) = ILWKOPT + NSEL
415      IF( LQUERY )
416     $   RETURN
417*
418*     Nothing to do for an empty active block ...
419      NS = 0
420      ND = 0
421      IF( KTOP.GT.KBOT )
422     $   RETURN
423*     ... nor for an empty deflation window.
424*
425      IF( NW.LT.1 )
426     $   RETURN
427*
428*     Machine constants.
429*
430      SAFMIN = SLAMCH( 'SAFE MINIMUM' )
431      SAFMAX = ONE / SAFMIN
432      CALL SLABAD( SAFMIN, SAFMAX )
433      ULP = SLAMCH( 'PRECISION' )
434      SMLNUM = SAFMIN*( FLOAT( N ) / ULP )
435*
436*     Setup deflation window.
437*
438      JW = MIN( NW, KBOT-KTOP+1 )
439      KWTOP = KBOT - JW + 1
440      IF( KWTOP.EQ.KTOP ) THEN
441         S = ZERO
442      ELSE
443         CALL PSELGET( 'All', '1-Tree', S, H, KWTOP, KWTOP-1, DESCH )
444      END IF
445*
446      IF( KBOT.EQ.KWTOP ) THEN
447*
448*        1-by-1 deflation window: not much to do.
449*
450         CALL PSELGET( 'All', '1-Tree', SR( KWTOP ), H, KWTOP, KWTOP,
451     $        DESCH )
452         SI( KWTOP ) = ZERO
453         NS = 1
454         ND = 0
455         IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( SR( KWTOP ) ) ) )
456     $        THEN
457            NS = 0
458            ND = 1
459            IF( KWTOP.GT.KTOP )
460     $         CALL PSELSET( H, KWTOP, KWTOP-1 , DESCH, ZERO )
461         END IF
462         RETURN
463      END IF
464*
465      IF( KWTOP.EQ.KTOP .AND. KBOT-KWTOP.EQ.1 ) THEN
466*
467*        2-by-2 deflation window: a little more to do.
468*
469         CALL PSELGET( 'All', '1-Tree', AA, H, KWTOP, KWTOP, DESCH )
470         CALL PSELGET( 'All', '1-Tree', BB, H, KWTOP, KWTOP+1, DESCH )
471         CALL PSELGET( 'All', '1-Tree', CC, H, KWTOP+1, KWTOP, DESCH )
472         CALL PSELGET( 'All', '1-Tree', DD, H, KWTOP+1, KWTOP+1, DESCH )
473         CALL SLANV2( AA, BB, CC, DD, SR(KWTOP), SI(KWTOP),
474     $        SR(KWTOP+1), SI(KWTOP+1), CS, SN )
475         NS = 0
476         ND = 2
477         IF( CC.EQ.ZERO ) THEN
478            I = KWTOP
479            IF( I+2.LE.N .AND. WANTT )
480     $         CALL PSROT( N-I-1, H, I, I+2, DESCH, DESCH(M_), H, I+1,
481     $              I+2, DESCH, DESCH(M_), CS, SN, WORK, LWORK, INFO )
482            IF( I.GT.1 )
483     $         CALL PSROT( I-1, H, 1, I, DESCH, 1, H, 1, I+1, DESCH, 1,
484     $              CS, SN, WORK, LWORK, INFO )
485            IF( WANTZ )
486     $         CALL PSROT( IHIZ-ILOZ+1, Z, ILOZ, I, DESCZ, 1, Z, ILOZ,
487     $              I+1, DESCZ, 1, CS, SN, WORK, LWORK, INFO )
488            CALL PSELSET( H, I, I, DESCH, AA )
489            CALL PSELSET( H, I, I+1, DESCH, BB )
490            CALL PSELSET( H, I+1, I, DESCH, CC )
491            CALL PSELSET( H, I+1, I+1, DESCH, DD )
492         END IF
493         WORK( 1 ) = FLOAT( LWKOPT )
494         RETURN
495      END IF
496*
497*     Calculate new value for IROFFH in case deflation window
498*     was adjusted.
499*
500      IROFFH = MOD( KWTOP - 1, NB )
501*
502*     Adjust number of rows and columns of T matrix descriptor
503*     to prepare for call to PDBTRORD.
504*
505      DESCT( M_ ) = JW+IROFFH
506      DESCT( N_ ) = JW+IROFFH
507*
508*     Convert to spike-triangular form.  (In case of a rare QR failure,
509*     this routine continues to do aggressive early deflation using that
510*     part of the deflation window that converged using INFQR here and
511*     there to keep track.)
512*
513*     Copy the trailing submatrix to the working space.
514*
515      CALL PSLASET( 'All', IROFFH, JW+IROFFH, ZERO, ONE, T, 1, 1,
516     $     DESCT )
517      CALL PSLASET( 'All', JW, IROFFH, ZERO, ZERO, T, 1+IROFFH, 1,
518     $     DESCT )
519      CALL PSLACPY( 'All', 1, JW, H, KWTOP, KWTOP, DESCH, T, 1+IROFFH,
520     $     1+IROFFH, DESCT )
521      CALL PSLACPY( 'Upper', JW-1, JW-1, H, KWTOP+1, KWTOP, DESCH, T,
522     $     1+IROFFH+1, 1+IROFFH, DESCT )
523      IF( JW.GT.2 )
524     $   CALL PSLASET( 'Lower', JW-2, JW-2, ZERO, ZERO, T, 1+IROFFH+2,
525     $        1+IROFFH, DESCT )
526      CALL PSLACPY( 'All', JW-1, 1, H, KWTOP+1, KWTOP+JW-1, DESCH, T,
527     $     1+IROFFH+1, 1+IROFFH+JW-1, DESCT )
528*
529*     Initialize the working orthogonal matrix.
530*
531      CALL PSLASET( 'All', JW+IROFFH, JW+IROFFH, ZERO, ONE, V, 1, 1,
532     $     DESCV )
533*
534*     Compute the Schur form of T.
535*
536      NPMIN = PILAENVX( ICTXT, 23, 'PSLAQR3', 'SV', JW, NB, NPROW,
537     $     NPCOL )
538      NMIN = PILAENVX( ICTXT, 12, 'PSLAQR3', 'SV', JW, 1, JW, LWORK )
539      NMAX = ( N-1 ) / 3
540      IF( MIN(NPROW, NPCOL).LE.NPMIN+1 .OR. RECLEVEL.GE.1 ) THEN
541*
542*        The AED window is large enough.
543*        Compute the Schur decomposition with all processors.
544*
545         IF( JW+IROFFH.GT.NMIN .AND. JW+IROFFH.LE.NMAX
546     $        .AND. RECLEVEL.LT.RECMAX ) THEN
547            CALL PSLAQR0( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
548     $           JW+IROFFH, T, DESCT, SR( KWTOP-IROFFH ),
549     $           SI( KWTOP-IROFFH ), 1+IROFFH, JW+IROFFH, V, DESCV,
550     $           WORK, LWORK, IWORK(NSEL+1), LIWORK-NSEL, INFQR,
551     $           RECLEVEL+1 )
552         ELSE
553            IF( DESCT(RSRC_).EQ.0 .AND. DESCT(CSRC_).EQ.0 ) THEN
554               IF( JW+IROFFH.GT.DESCT( MB_ ) ) THEN
555                  CALL PSLAQR1( .TRUE., .TRUE., JW+IROFFH, 1,
556     $                 JW+IROFFH, T, DESCT, SR( KWTOP-IROFFH ),
557     $                 SI( KWTOP-IROFFH ), 1, JW+IROFFH, V,
558     $                 DESCV, WORK, LWORK, IWORK(NSEL+1), LIWORK-NSEL,
559     $                 INFQR )
560               ELSE
561                  IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
562                     CALL SLAHQR( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
563     $                    JW+IROFFH, T, DESCT(LLD_),
564     $                    SR( KWTOP-IROFFH ), SI( KWTOP-IROFFH ),
565     $                    1+IROFFH, JW+IROFFH, V, DESCV(LLD_), INFQR )
566                  ELSE
567                     INFQR = 0
568                  END IF
569                  IF( NPROCS.GT.1 )
570     $               CALL IGAMN2D( ICTXT, 'All', '1-Tree', 1, 1, INFQR,
571     $                    1, -1, -1, -1, -1, -1 )
572               END IF
573            ELSEIF( JW+IROFFH.LE.DESCT( MB_ ) ) THEN
574               IF( MYROW.EQ.DESCT(RSRC_) .AND. MYCOL.EQ.DESCT(CSRC_) )
575     $              THEN
576                  CALL SLAHQR( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
577     $                 JW+IROFFH, T, DESCT(LLD_),
578     $                 SR( KWTOP-IROFFH ), SI( KWTOP-IROFFH ),
579     $                 1+IROFFH, JW+IROFFH, V, DESCV(LLD_), INFQR )
580               ELSE
581                  INFQR = 0
582               END IF
583               IF( NPROCS.GT.1 )
584     $         CALL IGAMN2D( ICTXT, 'All', '1-Tree', 1, 1, INFQR,
585     $              1, -1, -1, -1, -1, -1 )
586            ELSE
587               TZROWS0 = NUMROC( JW+IROFFH, NB, MYROW, 0, NPROW )
588               TZCOLS0 = NUMROC( JW+IROFFH, NB, MYCOL, 0, NPCOL )
589               CALL DESCINIT( DESCTZ0, JW+IROFFH, JW+IROFFH, NB, NB, 0,
590     $              0, ICTXT, MAX(1,TZROWS0), IERR0 )
591               IPT0 = 1
592               IPZ0 = IPT0 + MAX(1,TZROWS0)*TZCOLS0
593               IPW0 = IPZ0 + MAX(1,TZROWS0)*TZCOLS0
594               CALL PSLAMVE( 'All', JW+IROFFH, JW+IROFFH, T, 1, 1,
595     $              DESCT, WORK(IPT0), 1, 1, DESCTZ0, WORK(IPW0) )
596               CALL PSLASET( 'All', JW+IROFFH, JW+IROFFH, ZERO, ONE,
597     $              WORK(IPZ0), 1, 1, DESCTZ0 )
598               CALL PSLAQR1( .TRUE., .TRUE., JW+IROFFH, 1,
599     $              JW+IROFFH, WORK(IPT0), DESCTZ0,
600     $              SR( KWTOP-IROFFH ), SI( KWTOP-IROFFH ),
601     $              1, JW+IROFFH, WORK(IPZ0),
602     $              DESCTZ0, WORK(IPW0), LWORK-IPW0+1, IWORK(NSEL+1),
603     $              LIWORK-NSEL, INFQR )
604               CALL PSLAMVE( 'All', JW+IROFFH, JW+IROFFH, WORK(IPT0), 1,
605     $              1, DESCTZ0, T, 1, 1, DESCT, WORK(IPW0) )
606               CALL PSLAMVE( 'All', JW+IROFFH, JW+IROFFH, WORK(IPZ0), 1,
607     $              1, DESCTZ0, V, 1, 1, DESCV, WORK(IPW0) )
608            END IF
609         END IF
610      ELSE
611*
612*        The AED window is too small.
613*        Redistribute the AED window to a subgrid
614*        and do the computation on the subgrid.
615*
616         ICTXT_NEW = ICTXT
617         DO 20 I = 0, NPMIN-1
618            DO 10 J = 0, NPMIN-1
619               PMAP( J+1+I*NPMIN ) = BLACS_PNUM( ICTXT, I, J )
620 10         CONTINUE
621 20      CONTINUE
622         CALL BLACS_GRIDMAP( ICTXT_NEW, PMAP, NPMIN, NPMIN, NPMIN )
623         CALL BLACS_GRIDINFO( ICTXT_NEW, NPMIN, NPMIN, MYROW_NEW,
624     $        MYCOL_NEW )
625         IF( MYROW.GE.NPMIN .OR. MYCOL.GE.NPMIN ) ICTXT_NEW = -1
626         IF( ICTXT_NEW.GE.0 ) THEN
627            TZROWS0 = NUMROC( JW, NB, MYROW_NEW, 0, NPMIN )
628            TZCOLS0 = NUMROC( JW, NB, MYCOL_NEW, 0, NPMIN )
629            CALL DESCINIT( DESCTZ0, JW, JW, NB, NB, 0,
630     $           0, ICTXT_NEW, MAX(1,TZROWS0), IERR0 )
631            IPT0 = 1
632            IPZ0 = IPT0 + MAX(1,TZROWS0)*MAX(1,TZCOLS0)
633            IPW0 = IPZ0 + MAX(1,TZROWS0)*MAX(1,TZCOLS0)
634         ELSE
635            IPT0 = 1
636            IPZ0 = 2
637            IPW0 = 3
638            DESCTZ0( CTXT_ ) = -1
639            INFQR = 0
640         END IF
641         CALL PSGEMR2D( JW, JW, T, 1+IROFFH, 1+IROFFH, DESCT,
642     $        WORK(IPT0), 1, 1, DESCTZ0, ICTXT )
643         IF( ICTXT_NEW.GE.0 ) THEN
644            CALL PSLASET( 'All', JW, JW, ZERO, ONE, WORK(IPZ0), 1, 1,
645     $           DESCTZ0 )
646            NMIN = PILAENVX( ICTXT_NEW, 12, 'PSLAQR3', 'SV', JW, 1, JW,
647     $           LWORK )
648            IF( JW.GT.NMIN .AND. JW.LE.NMAX .AND. RECLEVEL.LT.1 ) THEN
649               CALL PSLAQR0( .TRUE., .TRUE., JW, 1, JW, WORK(IPT0),
650     $              DESCTZ0, SR( KWTOP ), SI( KWTOP ), 1, JW,
651     $              WORK(IPZ0), DESCTZ0, WORK(IPW0), LWORK-IPW0+1,
652     $              IWORK(NSEL+1), LIWORK-NSEL, INFQR,
653     $              RECLEVEL+1 )
654            ELSE
655               CALL PSLAQR1( .TRUE., .TRUE., JW, 1, JW, WORK(IPT0),
656     $              DESCTZ0, SR( KWTOP ), SI( KWTOP ), 1, JW,
657     $              WORK(IPZ0), DESCTZ0, WORK(IPW0), LWORK-IPW0+1,
658     $              IWORK(NSEL+1), LIWORK-NSEL, INFQR )
659            END IF
660         END IF
661         CALL PSGEMR2D( JW, JW, WORK(IPT0), 1, 1, DESCTZ0, T, 1+IROFFH,
662     $        1+IROFFH, DESCT, ICTXT )
663         CALL PSGEMR2D( JW, JW, WORK(IPZ0), 1, 1, DESCTZ0, V, 1+IROFFH,
664     $        1+IROFFH, DESCV, ICTXT )
665         IF( ICTXT_NEW.GE.0 )
666     $      CALL BLACS_GRIDEXIT( ICTXT_NEW )
667         IF( MYROW+MYCOL.GT.0 ) THEN
668            DO 40 J = 0, JW-1
669               SR( KWTOP+J ) = ZERO
670               SI( KWTOP+J ) = ZERO
671 40         CONTINUE
672         END IF
673         CALL IGAMN2D( ICTXT, 'All', '1-Tree', 1, 1, INFQR, 1, -1, -1,
674     $        -1, -1, -1 )
675         CALL SGSUM2D( ICTXT, 'All', ' ', JW, 1, SR(KWTOP), JW, -1, -1 )
676         CALL SGSUM2D( ICTXT, 'All', ' ', JW, 1, SI(KWTOP), JW, -1, -1 )
677      END IF
678*
679*     Adjust INFQR for offset from block border in submatrices.
680*
681      IF( INFQR.NE.0 )
682     $   INFQR = INFQR - IROFFH
683*
684*     PSTRORD needs a clean margin near the diagonal.
685*
686      DO 50 J = 1, JW - 3
687         CALL PSELSET( T, J+2, J, DESCT, ZERO )
688         CALL PSELSET( T, J+3, J, DESCT, ZERO )
689 50   CONTINUE
690      IF( JW.GT.2 )
691     $   CALL PSELSET( T, JW, JW-2, DESCT, ZERO )
692*
693*     Check local residual for AED Schur decomposition.
694*
695      RESAED = 0.0
696*
697*     Clean up the array SELECT for PSTRORD.
698*
699      DO 60 J = 1, NSEL
700         IWORK( J ) = 0
701 60   CONTINUE
702*
703*     Set local M counter to zero.
704*
705      MLOC = 0
706*
707*     Outer deflation detection loop (label 80).
708*     In this loop a bunch of undeflatable eigenvalues
709*     are moved simultaneously.
710*
711      DO 70 J = 1, IROFFH + INFQR
712         IWORK( J ) = 1
713 70   CONTINUE
714*
715      NS = JW
716      ILST = INFQR + 1 + IROFFH
717      IF( ILST.GT.1 ) THEN
718         CALL PSELGET( 'All', '1-Tree', ELEM, T, ILST, ILST-1, DESCT )
719         BULGE = ELEM.NE.ZERO
720         IF( BULGE ) ILST = ILST+1
721      END IF
722*
723 80   CONTINUE
724      IF( ILST.LE.NS+IROFFH ) THEN
725*
726*        Find the top-left corner of the local window.
727*
728         LILST = MAX(ILST,NS+IROFFH-NB+1)
729         IF( LILST.GT.1 ) THEN
730            CALL PSELGET( 'All', '1-Tree', ELEM, T, LILST, LILST-1,
731     $           DESCT )
732            BULGE = ELEM.NE.ZERO
733            IF( BULGE ) LILST = LILST+1
734         END IF
735*
736*        Lock all eigenvalues outside the local window.
737*
738         DO 90 J = IROFFH+1, LILST-1
739            IWORK( J ) = 1
740 90      CONTINUE
741         LILST0 = LILST
742*
743*        Inner deflation detection loop (label 100).
744*        In this loop, the undeflatable eigenvalues are moved to the
745*        top-left corner of the local window.
746*
747 100     CONTINUE
748         IF( LILST.LE.NS+IROFFH ) THEN
749            IF( NS.EQ.1 ) THEN
750               BULGE = .FALSE.
751            ELSE
752               CALL PSELGET( 'All', '1-Tree', ELEM, T, NS+IROFFH,
753     $              NS+IROFFH-1, DESCT )
754               BULGE = ELEM.NE.ZERO
755            END IF
756*
757*           Small spike tip test for deflation.
758*
759            IF( .NOT.BULGE ) THEN
760*
761*              Real eigenvalue.
762*
763               CALL PSELGET( 'All', '1-Tree', ELEM, T, NS+IROFFH,
764     $              NS+IROFFH, DESCT )
765               FOO = ABS( ELEM )
766               IF( FOO.EQ.ZERO )
767     $            FOO = ABS( S )
768               CALL PSELGET( 'All', '1-Tree', ELEM, V, 1+IROFFH,
769     $              NS+IROFFH, DESCV )
770               IF( ABS( S*ELEM ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
771*
772*                 Deflatable.
773*
774                  NS = NS - 1
775               ELSE
776*
777*                 Undeflatable: move it up out of the way.
778*
779                  IFST = NS
780                  DO 110 J = LILST, JW+IROFFH
781                     IWORK( J ) = 0
782 110              CONTINUE
783                  IWORK( IFST+IROFFH ) = 1
784                  CALL PSTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1,
785     $                 1, DESCT, V, 1, 1, DESCV, WORK,
786     $                 WORK(JW+IROFFH+1), MLOC,
787     $                 WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
788     $                 IWORK(NSEL+1), LIWORK-NSEL, INFO )
789*
790*                 Adjust the array SELECT explicitly so that it does not
791*                 rely on the output of PSTRORD.
792*
793                  IWORK( IFST+IROFFH ) = 0
794                  IWORK( LILST ) = 1
795                  LILST = LILST + 1
796*
797*                 In case of a rare exchange failure, adjust the
798*                 pointers ILST and LILST to the current place to avoid
799*                 unexpected behaviors.
800*
801                  IF( INFO.NE.0 ) THEN
802                     LILST = MAX(INFO, LILST)
803                     ILST = MAX(INFO, ILST)
804                  END IF
805               END IF
806            ELSE
807*
808*              Complex conjugate pair.
809*
810               CALL PSELGET( 'All', '1-Tree', ELEM1, T, NS+IROFFH,
811     $              NS+IROFFH, DESCT )
812               CALL PSELGET( 'All', '1-Tree', ELEM2, T, NS+IROFFH,
813     $              NS+IROFFH-1, DESCT )
814               CALL PSELGET( 'All', '1-Tree', ELEM3, T, NS+IROFFH-1,
815     $              NS+IROFFH, DESCT )
816               FOO = ABS( ELEM1 ) + SQRT( ABS( ELEM2 ) )*
817     $              SQRT( ABS( ELEM3 ) )
818               IF( FOO.EQ.ZERO )
819     $            FOO = ABS( S )
820               CALL PSELGET( 'All', '1-Tree', ELEM1, V, 1+IROFFH,
821     $              NS+IROFFH, DESCV )
822               CALL PSELGET( 'All', '1-Tree', ELEM2, V, 1+IROFFH,
823     $              NS+IROFFH-1, DESCV )
824               IF( MAX( ABS( S*ELEM1 ), ABS( S*ELEM2 ) ).LE.
825     $              MAX( SMLNUM, ULP*FOO ) ) THEN
826*
827*                 Deflatable.
828*
829                  NS = NS - 2
830               ELSE
831*
832*                 Undeflatable: move them up out of the way.
833*
834                  IFST = NS
835                  DO 120 J = LILST, JW+IROFFH
836                     IWORK( J ) = 0
837 120              CONTINUE
838                  IWORK( IFST+IROFFH ) = 1
839                  IWORK( IFST+IROFFH-1 ) = 1
840                  CALL PSTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1,
841     $                 1, DESCT, V, 1, 1, DESCV, WORK,
842     $                 WORK(JW+IROFFH+1), MLOC,
843     $                 WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
844     $                 IWORK(NSEL+1), LIWORK-NSEL, INFO )
845*
846*                 Adjust the array SELECT explicitly so that it does not
847*                 rely on the output of PSTRORD.
848*
849                  IWORK( IFST+IROFFH ) = 0
850                  IWORK( IFST+IROFFH-1 ) = 0
851                  IWORK( LILST ) = 1
852                  IWORK( LILST+1 ) = 1
853                  LILST = LILST + 2
854*
855*                 In case of a rare exchange failure, adjust the
856*                 pointers ILST and LILST to the current place to avoid
857*                 unexpected behaviors.
858*
859                  IF( INFO.NE.0 ) THEN
860                     LILST = MAX(INFO, LILST)
861                     ILST = MAX(INFO, ILST)
862                  END IF
863               END IF
864            END IF
865*
866*           End of inner deflation detection loop.
867*
868            GO TO 100
869         END IF
870*
871*        Unlock the eigenvalues outside the local window.
872*        Then undeflatable eigenvalues are moved to the proper position.
873*
874         DO 130 J = ILST, LILST0-1
875            IWORK( J ) = 0
876 130     CONTINUE
877         CALL PSTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1, 1,
878     $        DESCT, V, 1, 1, DESCV, WORK, WORK(JW+IROFFH+1),
879     $        M, WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
880     $        IWORK(NSEL+1), LIWORK-NSEL, INFO )
881         ILST = M + 1
882*
883*        In case of a rare exchange failure, adjust the pointer ILST to
884*        the current place to avoid unexpected behaviors.
885*
886         IF( INFO.NE.0 )
887     $      ILST = MAX(INFO, ILST)
888*
889*        End of outer deflation detection loop.
890*
891         GO TO 80
892      END IF
893
894*
895*     Post-reordering step: copy output eigenvalues to output.
896*
897      CALL SCOPY( JW, WORK(1+IROFFH), 1, SR( KWTOP ), 1 )
898      CALL SCOPY( JW, WORK(JW+2*IROFFH+1), 1, SI( KWTOP ), 1 )
899*
900*     Check local residual for reordered AED Schur decomposition.
901*
902      RESAED = 0.0
903*
904*     Return to Hessenberg form.
905*
906      IF( NS.EQ.0 )
907     $   S = ZERO
908*
909      IF( NS.LT.JW .AND. SORTGRAD ) THEN
910*
911*        Sorting diagonal blocks of T improves accuracy for
912*        graded matrices.  Bubble sort deals well with exchange
913*        failures. Eigenvalues/shifts from T are also restored.
914*
915         ROUND = 0
916         SORTED = .FALSE.
917         I = NS + 1 + IROFFH
918 140     CONTINUE
919         IF( SORTED )
920     $      GO TO 180
921         SORTED = .TRUE.
922         ROUND = ROUND + 1
923*
924         KEND = I - 1
925         I = INFQR + 1 + IROFFH
926         IF( I.EQ.NS+IROFFH ) THEN
927            K = I + 1
928         ELSE IF( SI( KWTOP-IROFFH + I-1 ).EQ.ZERO ) THEN
929            K = I + 1
930         ELSE
931            K = I + 2
932         END IF
933 150     CONTINUE
934         IF( K.LE.KEND ) THEN
935            IF( K.EQ.I+1 ) THEN
936               EVI = ABS( SR( KWTOP-IROFFH+I-1 ) )
937            ELSE
938               EVI = ABS( SR( KWTOP-IROFFH+I-1 ) ) +
939     $              ABS( SI( KWTOP-IROFFH+I-1 ) )
940            END IF
941*
942            IF( K.EQ.KEND ) THEN
943               EVK = ABS( SR( KWTOP-IROFFH+K-1 ) )
944            ELSEIF( SI( KWTOP-IROFFH+K-1 ).EQ.ZERO ) THEN
945               EVK = ABS( SR( KWTOP-IROFFH+K-1 ) )
946            ELSE
947               EVK = ABS( SR( KWTOP-IROFFH+K-1 ) ) +
948     $              ABS( SI( KWTOP-IROFFH+K-1 ) )
949            END IF
950*
951            IF( EVI.GE.EVK ) THEN
952               I = K
953            ELSE
954               MLOC = 0
955               SORTED = .FALSE.
956               IFST = I
957               ILST = K
958               DO 160 J = 1, I-1
959                  IWORK( J ) = 1
960                  MLOC = MLOC + 1
961 160           CONTINUE
962               IF( K.EQ.I+2 ) THEN
963                  IWORK( I ) = 0
964                  IWORK(I+1) = 0
965               ELSE
966                  IWORK( I ) = 0
967               END IF
968               IF( K.NE.KEND .AND. SI( KWTOP-IROFFH+K-1 ).NE.ZERO ) THEN
969                  IWORK( K ) = 1
970                  IWORK(K+1) = 1
971                  MLOC = MLOC + 2
972               ELSE
973                  IWORK( K ) = 1
974                  IF( K.LT.KEND ) IWORK(K+1) = 0
975                  MLOC = MLOC + 1
976               END IF
977               DO 170 J = K+2, JW+IROFFH
978                  IWORK( J ) = 0
979 170           CONTINUE
980               CALL PSTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1, 1,
981     $              DESCT, V, 1, 1, DESCV, WORK, WORK(JW+IROFFH+1), M,
982     $              WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
983     $              IWORK(NSEL+1), LIWORK-NSEL, IERR )
984               CALL SCOPY( JW, WORK(1+IROFFH), 1, SR( KWTOP ), 1 )
985               CALL SCOPY( JW, WORK(JW+2*IROFFH+1), 1, SI( KWTOP ), 1 )
986               IF( IERR.EQ.0 ) THEN
987                  I = ILST
988               ELSE
989                  I = K
990               END IF
991            END IF
992            IF( I.EQ.KEND ) THEN
993               K = I + 1
994            ELSE IF( SI( KWTOP-IROFFH+I-1 ).EQ.ZERO ) THEN
995               K = I + 1
996            ELSE
997               K = I + 2
998            END IF
999            GO TO 150
1000         END IF
1001         GO TO 140
1002 180     CONTINUE
1003      END IF
1004*
1005*     Restore number of rows and columns of T matrix descriptor.
1006*
1007      DESCT( M_ ) = NW+IROFFH
1008      DESCT( N_ ) = NH+IROFFH
1009*
1010      IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
1011         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
1012*
1013*           Reflect spike back into lower triangle.
1014*
1015            RROWS = NUMROC( NS+IROFFH, NB, MYROW, DESCV(RSRC_), NPROW )
1016            RCOLS = NUMROC( 1, 1, MYCOL, DESCV(CSRC_), NPCOL )
1017            CALL DESCINIT( DESCR, NS+IROFFH, 1, NB, 1, DESCV(RSRC_),
1018     $           DESCV(CSRC_), ICTXT, MAX(1, RROWS), INFO )
1019            TAUROWS = NUMROC( 1, 1, MYCOL, DESCV(RSRC_), NPROW )
1020            TAUCOLS = NUMROC( JW+IROFFH, NB, MYCOL, DESCV(CSRC_),
1021     $           NPCOL )
1022            CALL DESCINIT( DESCTAU, 1, JW+IROFFH, 1, NB, DESCV(RSRC_),
1023     $           DESCV(CSRC_), ICTXT, MAX(1, TAUROWS), INFO )
1024*
1025            IR = 1
1026            ITAU = IR + DESCR( LLD_ ) * RCOLS
1027            IPW  = ITAU + DESCTAU( LLD_ ) * TAUCOLS
1028*
1029            CALL PSLASET( 'All', NS+IROFFH, 1, ZERO, ZERO, WORK(ITAU),
1030     $           1, 1, DESCTAU )
1031*
1032            CALL PSCOPY( NS, V, 1+IROFFH, 1+IROFFH, DESCV, DESCV(M_),
1033     $           WORK(IR), 1+IROFFH, 1, DESCR, 1 )
1034            CALL PSLARFG( NS, BETA, 1+IROFFH, 1, WORK(IR), 2+IROFFH, 1,
1035     $           DESCR, 1, WORK(ITAU+IROFFH) )
1036            CALL PSELSET( WORK(IR), 1+IROFFH, 1, DESCR, ONE )
1037*
1038            CALL PSLASET( 'Lower', JW-2, JW-2, ZERO, ZERO, T, 3+IROFFH,
1039     $           1+IROFFH, DESCT )
1040*
1041            CALL PSLARF( 'Left', NS, JW, WORK(IR), 1+IROFFH, 1, DESCR,
1042     $           1, WORK(ITAU+IROFFH), T, 1+IROFFH, 1+IROFFH,
1043     $           DESCT, WORK( IPW ) )
1044            CALL PSLARF( 'Right', NS, NS, WORK(IR), 1+IROFFH, 1, DESCR,
1045     $           1, WORK(ITAU+IROFFH), T, 1+IROFFH, 1+IROFFH,
1046     $           DESCT, WORK( IPW ) )
1047            CALL PSLARF( 'Right', JW, NS, WORK(IR), 1+IROFFH, 1, DESCR,
1048     $           1, WORK(ITAU+IROFFH), V, 1+IROFFH, 1+IROFFH,
1049     $           DESCV, WORK( IPW ) )
1050*
1051            ITAU = 1
1052            IPW = ITAU + DESCTAU( LLD_ ) * TAUCOLS
1053            CALL PSGEHRD( JW+IROFFH, 1+IROFFH, NS+IROFFH, T, 1, 1,
1054     $           DESCT, WORK(ITAU), WORK( IPW ), LWORK-IPW+1, INFO )
1055         END IF
1056*
1057*        Copy updated reduced window into place.
1058*
1059         IF( KWTOP.GT.1 ) THEN
1060            CALL PSELGET( 'All', '1-Tree', ELEM, V, 1+IROFFH,
1061     $           1+IROFFH, DESCV )
1062            CALL PSELSET( H, KWTOP, KWTOP-1, DESCH, S*ELEM )
1063         END IF
1064         CALL PSLACPY( 'Upper', JW-1, JW-1, T, 1+IROFFH+1, 1+IROFFH,
1065     $        DESCT, H, KWTOP+1, KWTOP, DESCH )
1066         CALL PSLACPY( 'All', 1, JW, T, 1+IROFFH, 1+IROFFH, DESCT, H,
1067     $        KWTOP, KWTOP, DESCH )
1068         CALL PSLACPY( 'All', JW-1, 1, T, 1+IROFFH+1, 1+IROFFH+JW-1,
1069     $        DESCT, H, KWTOP+1, KWTOP+JW-1, DESCH )
1070*
1071*        Accumulate orthogonal matrix in order to update
1072*        H and Z, if requested.
1073*
1074         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
1075            CALL PSORMHR( 'Right', 'No', JW+IROFFH, NS+IROFFH, 1+IROFFH,
1076     $           NS+IROFFH, T, 1, 1, DESCT, WORK(ITAU), V, 1,
1077     $           1, DESCV, WORK( IPW ), LWORK-IPW+1, INFO )
1078         END IF
1079*
1080*        Update vertical slab in H.
1081*
1082         IF( WANTT ) THEN
1083            LTOP = 1
1084         ELSE
1085            LTOP = KTOP
1086         END IF
1087         KLN = MAX( 0, KWTOP-LTOP )
1088         IROFFHH = MOD( LTOP-1, NB )
1089         ICOFFHH = MOD( KWTOP-1, NB )
1090         HHRSRC = INDXG2P( LTOP, NB, MYROW, DESCH(RSRC_), NPROW )
1091         HHCSRC = INDXG2P( KWTOP, NB, MYCOL, DESCH(CSRC_), NPCOL )
1092         HHROWS = NUMROC( KLN+IROFFHH, NB, MYROW, HHRSRC, NPROW )
1093         HHCOLS = NUMROC( JW+ICOFFHH, NB, MYCOL, HHCSRC, NPCOL )
1094         CALL DESCINIT( DESCHH, KLN+IROFFHH, JW+ICOFFHH, NB, NB,
1095     $        HHRSRC, HHCSRC, ICTXT, MAX(1, HHROWS), IERR )
1096         CALL PSGEMM( 'No', 'No', KLN, JW, JW, ONE, H, LTOP,
1097     $        KWTOP, DESCH, V, 1+IROFFH, 1+IROFFH, DESCV, ZERO,
1098     $        WORK, 1+IROFFHH, 1+ICOFFHH, DESCHH )
1099         CALL PSLACPY( 'All', KLN, JW, WORK, 1+IROFFHH, 1+ICOFFHH,
1100     $        DESCHH, H, LTOP, KWTOP, DESCH )
1101*
1102*        Update horizontal slab in H.
1103*
1104         IF( WANTT ) THEN
1105            KLN = N-KBOT
1106            IROFFHH = MOD( KWTOP-1, NB )
1107            ICOFFHH = MOD( KBOT, NB )
1108            HHRSRC = INDXG2P( KWTOP, NB, MYROW, DESCH(RSRC_), NPROW )
1109            HHCSRC = INDXG2P( KBOT+1, NB, MYCOL, DESCH(CSRC_), NPCOL )
1110            HHROWS = NUMROC( JW+IROFFHH, NB, MYROW, HHRSRC, NPROW )
1111            HHCOLS = NUMROC( KLN+ICOFFHH, NB, MYCOL, HHCSRC, NPCOL )
1112            CALL DESCINIT( DESCHH, JW+IROFFHH, KLN+ICOFFHH, NB, NB,
1113     $           HHRSRC, HHCSRC, ICTXT, MAX(1, HHROWS), IERR )
1114            CALL PSGEMM( 'Tr', 'No', JW, KLN, JW, ONE, V,
1115     $           1+IROFFH, 1+IROFFH, DESCV, H, KWTOP, KBOT+1,
1116     $           DESCH, ZERO, WORK, 1+IROFFHH, 1+ICOFFHH, DESCHH )
1117            CALL PSLACPY( 'All', JW, KLN, WORK, 1+IROFFHH, 1+ICOFFHH,
1118     $           DESCHH, H, KWTOP, KBOT+1, DESCH )
1119         END IF
1120*
1121*        Update vertical slab in Z.
1122*
1123         IF( WANTZ ) THEN
1124            KLN = IHIZ-ILOZ+1
1125            IROFFZZ = MOD( ILOZ-1, NB )
1126            ICOFFZZ = MOD( KWTOP-1, NB )
1127            ZZRSRC = INDXG2P( ILOZ, NB, MYROW, DESCZ(RSRC_), NPROW )
1128            ZZCSRC = INDXG2P( KWTOP, NB, MYCOL, DESCZ(CSRC_), NPCOL )
1129            ZZROWS = NUMROC( KLN+IROFFZZ, NB, MYROW, ZZRSRC, NPROW )
1130            ZZCOLS = NUMROC( JW+ICOFFZZ, NB, MYCOL, ZZCSRC, NPCOL )
1131            CALL DESCINIT( DESCZZ, KLN+IROFFZZ, JW+ICOFFZZ, NB, NB,
1132     $           ZZRSRC, ZZCSRC, ICTXT, MAX(1, ZZROWS), IERR )
1133            CALL PSGEMM( 'No', 'No', KLN, JW, JW, ONE, Z, ILOZ,
1134     $           KWTOP, DESCZ, V, 1+IROFFH, 1+IROFFH, DESCV,
1135     $           ZERO, WORK, 1+IROFFZZ, 1+ICOFFZZ, DESCZZ )
1136            CALL PSLACPY( 'All', KLN, JW, WORK, 1+IROFFZZ, 1+ICOFFZZ,
1137     $           DESCZZ, Z, ILOZ, KWTOP, DESCZ )
1138         END IF
1139      END IF
1140*
1141*     Return the number of deflations (ND) and the number of shifts (NS).
1142*     (Subtracting INFQR from the spike length takes care of the case of
1143*     a rare QR failure while calculating eigenvalues of the deflation
1144*     window.)
1145*
1146      ND = JW - NS
1147      NS = NS - INFQR
1148*
1149*     Return optimal workspace.
1150*
1151      WORK( 1 ) = FLOAT( LWKOPT )
1152      IWORK( 1 ) = ILWKOPT + NSEL
1153*
1154*     End of PSLAQR3
1155*
1156      END
1157