1      RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H,
2     $                              DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND,
3     $                              SR, SI, V, DESCV, NH, T, DESCT, NV,
4     $                              WV, DESCW, WORK, LWORK, IWORK,
5     $                              LIWORK, RECLEVEL )
6*
7*     Contribution from the Department of Computing Science and HPC2N,
8*     Umea University, Sweden
9*
10*  -- ScaLAPACK auxiliary routine (version 2.0.1) --
11*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12*     Univ. of Colorado Denver and University of California, Berkeley.
13*     January, 2012
14*
15      IMPLICIT NONE
16*
17*     .. Scalar Arguments ..
18      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LWORK, N, ND, NH, NS,
19     $                   NV, NW, LIWORK, RECLEVEL
20      LOGICAL            WANTT, WANTZ
21*     ..
22*     .. Array Arguments ..
23      INTEGER            DESCH( * ), DESCZ( * ), DESCT( * ), DESCV( * ),
24     $                   DESCW( * ), IWORK( * )
25      DOUBLE PRECISION   H( * ), SI( KBOT ), SR( KBOT ), T( * ),
26     $                   V( * ), WORK( * ), WV( * ),
27     $                   Z( * )
28*     ..
29*
30*  Purpose
31*  =======
32*
33*  Aggressive early deflation:
34*
35*  This subroutine accepts as input an upper Hessenberg matrix H and
36*  performs an orthogonal similarity transformation designed to detect
37*  and deflate fully converged eigenvalues from a trailing principal
38*  submatrix.  On output H has been overwritten by a new Hessenberg
39*  matrix that is a perturbation of an orthogonal similarity
40*  transformation of H.  It is to be hoped that the final version of H
41*  has many zero subdiagonal entries.
42*
43*  Notes
44*  =====
45*
46*  Each global data object is described by an associated description
47*  vector.  This vector stores the information required to establish
48*  the mapping between an object element and its corresponding process
49*  and memory location.
50*
51*  Let A be a generic term for any 2D block cyclicly distributed array.
52*  Such a global array has an associated description vector DESCA.
53*  In the following comments, the character _ should be read as
54*  "of the global array".
55*
56*  NOTATION        STORED IN      EXPLANATION
57*  --------------- -------------- --------------------------------------
58*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
59*                                 DTYPE_A = 1.
60*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61*                                 the BLACS process grid A is distribu-
62*                                 ted over. The context itself is glo-
63*                                 bal, but the handle (the integer
64*                                 value) may vary.
65*  M_A    (global) DESCA( M_ )    The number of rows in the global
66*                                 array A.
67*  N_A    (global) DESCA( N_ )    The number of columns in the global
68*                                 array A.
69*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
70*                                 the rows of the array.
71*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
72*                                 the columns of the array.
73*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74*                                 row of the array A is distributed.
75*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76*                                 first column of the array A is
77*                                 distributed.
78*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
79*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
80*
81*  Let K be the number of rows or columns of a distributed matrix,
82*  and assume that its process grid has dimension p x q.
83*  LOCr( K ) denotes the number of elements of K that a process
84*  would receive if K were distributed over the p processes of its
85*  process column.
86*  Similarly, LOCc( K ) denotes the number of elements of K that a
87*  process would receive if K were distributed over the q processes of
88*  its process row.
89*  The values of LOCr() and LOCc() may be determined via a call to the
90*  ScaLAPACK tool function, NUMROC:
91*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93*  An upper bound for these quantities may be computed by:
94*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97*  Arguments
98*  =========
99*
100*  WANTT   (global input) LOGICAL
101*          If .TRUE., then the Hessenberg matrix H is fully updated
102*          so that the quasi-triangular Schur factor may be
103*          computed (in cooperation with the calling subroutine).
104*          If .FALSE., then only enough of H is updated to preserve
105*          the eigenvalues.
106*
107*  WANTZ   (global input) LOGICAL
108*          If .TRUE., then the orthogonal matrix Z is updated so
109*          so that the orthogonal Schur factor may be computed
110*          (in cooperation with the calling subroutine).
111*          If .FALSE., then Z is not referenced.
112*
113*  N       (global input) INTEGER
114*          The order of the matrix H and (if WANTZ is .TRUE.) the
115*          order of the orthogonal matrix Z.
116*
117*  KTOP    (global input) INTEGER
118*          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119*          KBOT and KTOP together determine an isolated block
120*          along the diagonal of the Hessenberg matrix.
121*
122*  KBOT    (global input) INTEGER
123*          It is assumed without a check that either
124*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
125*          determine an isolated block along the diagonal of the
126*          Hessenberg matrix.
127*
128*  NW      (global input) INTEGER
129*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
130*
131*  H       (local input/output) DOUBLE PRECISION array, dimension
132*             (DESCH(LLD_),*)
133*          On input the initial N-by-N section of H stores the
134*          Hessenberg matrix undergoing aggressive early deflation.
135*          On output H has been transformed by an orthogonal
136*          similarity transformation, perturbed, and the returned
137*          to Hessenberg form that (it is to be hoped) has some
138*          zero subdiagonal entries.
139*
140*  DESCH   (global and local input) INTEGER array of dimension DLEN_.
141*          The array descriptor for the distributed matrix H.
142*
143*  ILOZ    (global input) INTEGER
144*  IHIZ    (global input) INTEGER
145*          Specify the rows of Z to which transformations must be
146*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147*
148*  Z       (input/output) DOUBLE PRECISION array, dimension
149*             (DESCH(LLD_),*)
150*          IF WANTZ is .TRUE., then on output, the orthogonal
151*          similarity transformation mentioned above has been
152*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153*          If WANTZ is .FALSE., then Z is unreferenced.
154*
155*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
156*          The array descriptor for the distributed matrix Z.
157*
158*  NS      (global output) INTEGER
159*          The number of unconverged (ie approximate) eigenvalues
160*          returned in SR and SI that may be used as shifts by the
161*          calling subroutine.
162*
163*  ND      (global output) INTEGER
164*          The number of converged eigenvalues uncovered by this
165*          subroutine.
166*
167*  SR      (global output) DOUBLE PRECISION array, dimension KBOT
168*  SI      (global output) DOUBLE PRECISION array, dimension KBOT
169*          On output, the real and imaginary parts of approximate
170*          eigenvalues that may be used for shifts are stored in
171*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173*          The real and imaginary parts of converged eigenvalues
174*          are stored in SR(KBOT-ND+1) through SR(KBOT) and
175*          SI(KBOT-ND+1) through SI(KBOT), respectively.
176*
177*  V       (global workspace) DOUBLE PRECISION array, dimension
178*             (DESCV(LLD_),*)
179*          An NW-by-NW distributed work array.
180*
181*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
182*          The array descriptor for the distributed matrix V.
183*
184*  NH      (input) INTEGER scalar
185*          The number of columns of T.  NH.GE.NW.
186*
187*  T       (global workspace) DOUBLE PRECISION array, dimension
188*             (DESCV(LLD_),*)
189*
190*  DESCT   (global and local input) INTEGER array of dimension DLEN_.
191*          The array descriptor for the distributed matrix T.
192*
193*  NV      (global input) INTEGER
194*          The number of rows of work array WV available for
195*          workspace.  NV.GE.NW.
196*
197*  WV      (global workspace) DOUBLE PRECISION array, dimension
198*             (DESCW(LLD_),*)
199*
200*  DESCW   (global and local input) INTEGER array of dimension DLEN_.
201*          The array descriptor for the distributed matrix WV.
202*
203*  WORK    (local workspace) DOUBLE PRECISION array, dimension LWORK.
204*          On exit, WORK(1) is set to an estimate of the optimal value
205*          of LWORK for the given values of N, NW, KTOP and KBOT.
206*
207*  LWORK   (local input) INTEGER
208*          The dimension of the work array WORK.  LWORK = 2*NW
209*          suffices, but greater efficiency may result from larger
210*          values of LWORK.
211*
212*          If LWORK = -1, then a workspace query is assumed; PDLAQR3
213*          only estimates the optimal workspace size for the given
214*          values of N, NW, KTOP and KBOT.  The estimate is returned
215*          in WORK(1).  No error message related to LWORK is issued
216*          by XERBLA.  Neither H nor Z are accessed.
217*
218*  IWORK   (local workspace) INTEGER array, dimension (LIWORK)
219*
220*  LIWORK  (local input) INTEGER
221*          The length of the workspace array IWORK
222*
223*  ================================================================
224*  Based on contributions by
225*        Robert Granat and Meiyue Shao,
226*        Department of Computing Science and HPC2N,
227*        Umea University, Sweden
228*
229*  ================================================================
230*     .. Parameters ..
231      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
232     $                   LLD_, MB_, M_, NB_, N_, RSRC_
233      INTEGER            RECMAX
234      LOGICAL            SORTGRAD
235      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
236     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
237     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9, RECMAX = 3,
238     $                     SORTGRAD = .FALSE. )
239      DOUBLE PRECISION   ZERO, ONE
240      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
241*     ..
242*     .. Local Scalars ..
243      DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
244     $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP,
245     $                   ELEM, ELEM1, ELEM2, ELEM3, R1, ANORM, RNORM,
246     $                   RESAED
247      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
248     $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
249     $                   LWKOPT, NMIN, LLDH, LLDZ, LLDT, LLDV, LLDWV,
250     $                   ICTXT, NPROW, NMAX, NPCOL, MYROW, MYCOL, NB,
251     $                   IROFFH, M, RCOLS, TAUROWS, RROWS, TAUCOLS,
252     $                   ITAU, IR, IPW, NPROCS, MLOC, IROFFHH,
253     $                   ICOFFHH, HHRSRC, HHCSRC, HHROWS, HHCOLS,
254     $                   IROFFZZ, ICOFFZZ, ZZRSRC, ZZCSRC, ZZROWS,
255     $                   ZZCOLS, IERR, TZROWS0, TZCOLS0, IERR0, IPT0,
256     $                   IPZ0, IPW0, NB2, ROUND, LILST, KK, LILST0,
257     $                   IWRK1, RSRC, CSRC, LWK4, LWK5, IWRK2, LWK6,
258     $                   LWK7, LWK8, ILWKOPT, TZROWS, TZCOLS, NSEL,
259     $                   NPMIN, ICTXT_NEW, MYROW_NEW, MYCOL_NEW
260      LOGICAL            BULGE, SORTED, LQUERY
261*     ..
262*     .. Local Arrays ..
263      INTEGER            PAR( 6 ), DESCR( DLEN_ ),
264     $                   DESCTAU( DLEN_ ), DESCHH( DLEN_ ),
265     $                   DESCZZ( DLEN_ ), DESCTZ0( DLEN_ ),
266     $                   PMAP( 64*64 )
267      DOUBLE PRECISION   DDUM( 1 )
268*     ..
269*     .. External Functions ..
270      DOUBLE PRECISION   DLAMCH, PDLANGE
271      INTEGER            PILAENVX, NUMROC, INDXG2P, ICEIL, BLACS_PNUM
272      EXTERNAL           DLAMCH, PILAENVX, NUMROC, INDXG2P, PDLANGE,
273     $                   MPI_WTIME, ICEIL, BLACS_PNUM
274*     ..
275*     .. External Subroutines ..
276      EXTERNAL           PDCOPY, PDGEHRD, PDGEMM, DLABAD, PDLACPY,
277     $                   PDLAQR1, DLANV2, PDLAQR0, PDLARF, PDLARFG,
278     $                   PDLASET, PDTRORD, PDELGET, PDELSET,
279     $                   PDLAMVE, BLACS_GRIDINFO, BLACS_GRIDMAP,
280     $                   BLACS_GRIDEXIT, PDGEMR2D
281*     ..
282*     .. Intrinsic Functions ..
283      INTRINSIC          ABS, DBLE, INT, MAX, MIN, SQRT
284*     ..
285*     .. Executable Statements ..
286      ICTXT = DESCH( CTXT_ )
287      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
288      NPROCS = NPROW*NPCOL
289*
290*     Extract local leading dimensions, blockfactors, offset for
291*     keeping the alignment requirements and size of deflation window.
292*
293      LLDH  = DESCH( LLD_ )
294      LLDZ  = DESCZ( LLD_ )
295      LLDT  = DESCT( LLD_ )
296      LLDV  = DESCV( LLD_ )
297      LLDWV = DESCW( LLD_ )
298      NB = DESCH( MB_ )
299      IROFFH = MOD( KTOP - 1, NB )
300      JW = MIN( NW, KBOT-KTOP+1 )
301      NSEL = NB+JW
302*
303*     Extract environment variables for parallel eigenvalue reordering.
304*
305      PAR(1) = PILAENVX(ICTXT, 17, 'PDLAQR3', 'SV', JW, NB, -1, -1)
306      PAR(2) = PILAENVX(ICTXT, 18, 'PDLAQR3', 'SV', JW, NB, -1, -1)
307      PAR(3) = PILAENVX(ICTXT, 19, 'PDLAQR3', 'SV', JW, NB, -1, -1)
308      PAR(4) = PILAENVX(ICTXT, 20, 'PDLAQR3', 'SV', JW, NB, -1, -1)
309      PAR(5) = PILAENVX(ICTXT, 21, 'PDLAQR3', 'SV', JW, NB, -1, -1)
310      PAR(6) = PILAENVX(ICTXT, 22, 'PDLAQR3', 'SV', JW, NB, -1, -1)
311*
312*     Check if workspace query.
313*
314      LQUERY = LWORK.EQ.-1 .OR. LIWORK.EQ.-1
315*
316*     Estimate optimal workspace.
317*
318      IF( JW.LE.2 ) THEN
319         LWKOPT = 1
320      ELSE
321*
322*        Workspace query calls to PDGEHRD and PDORMHR.
323*
324         TAUROWS = NUMROC( 1, 1, MYCOL, DESCV(RSRC_), NPROW )
325         TAUCOLS = NUMROC( JW+IROFFH, NB, MYCOL, DESCV(CSRC_),
326     $        NPCOL )
327         CALL PDGEHRD( JW, 1, JW, T, 1, 1, DESCT, WORK, WORK, -1,
328     $        INFO )
329         LWK1 = INT( WORK( 1 ) ) + TAUROWS*TAUCOLS
330*
331*        Workspace query call to PDORMHR.
332*
333         CALL PDORMHR( 'Right', 'No', JW, JW, 1, JW, T, 1, 1, DESCT,
334     $        WORK, V, 1, 1, DESCV, WORK, -1, INFO )
335         LWK2 = INT( WORK( 1 ) )
336*
337*        Workspace query call to PDLAQR0.
338*
339         NMIN = PILAENVX( ICTXT, 12, 'PDLAQR3', 'SV', JW, 1, JW, LWORK )
340         NMAX = ( N-1 ) / 3
341         IF( JW+IROFFH.GT.NMIN .AND. JW+IROFFH.LE.NMAX
342     $        .AND. RECLEVEL.LT.RECMAX ) THEN
343            CALL PDLAQR0( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
344     $           JW+IROFFH, T, DESCT, SR, SI, 1, JW, V, DESCV,
345     $           WORK, -1, IWORK, LIWORK-NSEL, INFQR,
346     $           RECLEVEL+1 )
347            LWK3 = INT( WORK( 1 ) )
348            IWRK1 = IWORK( 1 )
349         ELSE
350            RSRC = DESCT( RSRC_ )
351            CSRC = DESCT( CSRC_ )
352            DESCT( RSRC_ ) = 0
353            DESCT( CSRC_ ) = 0
354            CALL PDLAQR1( .TRUE., .TRUE., JW+IROFFH, 1, JW+IROFFH, T,
355     $           DESCT, SR, SI, 1, JW+IROFFH, V, DESCV, WORK, -1,
356     $           IWORK, LIWORK-NSEL, INFQR )
357            DESCT( RSRC_ ) = RSRC
358            DESCT( CSRC_ ) = CSRC
359            LWK3 = INT( WORK( 1 ) )
360            IWRK1 = IWORK( 1 )
361         END IF
362*
363*        Workspace in case of alignment problems.
364*
365         TZROWS0 = NUMROC( JW+IROFFH, NB, MYROW, 0, NPROW )
366         TZCOLS0 = NUMROC( JW+IROFFH, NB, MYCOL, 0, NPCOL )
367         LWK4 = 2 * TZROWS0*TZCOLS0
368*
369*        Workspace check for reordering.
370*
371         CALL PDTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1, 1,
372     $        DESCT, V, 1, 1, DESCV, DDUM, DDUM, MLOC, WORK, -1,
373     $        IWORK, LIWORK-NSEL, INFO )
374         LWK5 = INT( WORK( 1 ) )
375         IWRK2 = IWORK( 1 )
376*
377*        Extra workspace for reflecting back spike
378*        (workspace for PDLARF approximated for simplicity).
379*
380         RROWS =  NUMROC( N+IROFFH, NB, MYROW, DESCV(RSRC_), NPROW )
381         RCOLS =  NUMROC( 1, 1, MYCOL, DESCV(CSRC_), NPCOL )
382         LWK6 = RROWS*RCOLS + TAUROWS*TAUCOLS +
383     $        2*ICEIL(ICEIL(JW+IROFFH,NB),NPROW)*NB
384     $         *ICEIL(ICEIL(JW+IROFFH,NB),NPCOL)*NB
385*
386*        Extra workspace needed by PBLAS update calls
387*        (also estimated for simplicity).
388*
389         LWK7 = MAX( ICEIL(ICEIL(JW,NB),NPROW)*NB *
390     $               ICEIL(ICEIL(N-KBOT,NB),NPCOL)*NB,
391     $               ICEIL(ICEIL(IHIZ-ILOZ+1,NB),NPROW)*NB *
392     $               ICEIL(ICEIL(JW,NB),NPCOL)*NB,
393     $               ICEIL(ICEIL(KBOT-JW,NB),NPROW)*NB *
394     $               ICEIL(ICEIL(JW,NB),NPCOL)*NB )
395*
396*        Residual check workspace.
397*
398         LWK8 = 0
399*
400*        Optimal workspace.
401*
402         LWKOPT = MAX( LWK1, LWK2, LWK3+LWK4, LWK5, LWK6, LWK7, LWK8 )
403         ILWKOPT = MAX( IWRK1, IWRK2 )
404      END IF
405*
406*     Quick return in case of workspace query.
407*
408      WORK( 1 ) = DBLE( LWKOPT )
409*
410*     IWORK(1:NSEL) is used as the array SELECT for PDTRORD.
411*
412      IWORK( 1 ) = ILWKOPT + NSEL
413      IF( LQUERY )
414     $   RETURN
415*
416*     Nothing to do for an empty active block ...
417      NS = 0
418      ND = 0
419      IF( KTOP.GT.KBOT )
420     $   RETURN
421*     ... nor for an empty deflation window.
422*
423      IF( NW.LT.1 )
424     $   RETURN
425*
426*     Machine constants.
427*
428      SAFMIN = DLAMCH( 'SAFE MINIMUM' )
429      SAFMAX = ONE / SAFMIN
430      CALL DLABAD( SAFMIN, SAFMAX )
431      ULP = DLAMCH( 'PRECISION' )
432      SMLNUM = SAFMIN*( DBLE( N ) / ULP )
433*
434*     Setup deflation window.
435*
436      JW = MIN( NW, KBOT-KTOP+1 )
437      KWTOP = KBOT - JW + 1
438      IF( KWTOP.EQ.KTOP ) THEN
439         S = ZERO
440      ELSE
441         CALL PDELGET( 'All', '1-Tree', S, H, KWTOP, KWTOP-1, DESCH )
442      END IF
443*
444      IF( KBOT.EQ.KWTOP ) THEN
445*
446*        1-by-1 deflation window: not much to do.
447*
448         CALL PDELGET( 'All', '1-Tree', SR( KWTOP ), H, KWTOP, KWTOP,
449     $        DESCH )
450         SI( KWTOP ) = ZERO
451         NS = 1
452         ND = 0
453         IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( SR( KWTOP ) ) ) )
454     $        THEN
455            NS = 0
456            ND = 1
457            IF( KWTOP.GT.KTOP )
458     $         CALL PDELSET( H, KWTOP, KWTOP-1 , DESCH, ZERO )
459         END IF
460         RETURN
461      END IF
462*
463      IF( KWTOP.EQ.KTOP .AND. KBOT-KWTOP.EQ.1 ) THEN
464*
465*        2-by-2 deflation window: a little more to do.
466*
467         CALL PDELGET( 'All', '1-Tree', AA, H, KWTOP, KWTOP, DESCH )
468         CALL PDELGET( 'All', '1-Tree', BB, H, KWTOP, KWTOP+1, DESCH )
469         CALL PDELGET( 'All', '1-Tree', CC, H, KWTOP+1, KWTOP, DESCH )
470         CALL PDELGET( 'All', '1-Tree', DD, H, KWTOP+1, KWTOP+1, DESCH )
471         CALL DLANV2( AA, BB, CC, DD, SR(KWTOP), SI(KWTOP),
472     $        SR(KWTOP+1), SI(KWTOP+1), CS, SN )
473         NS = 0
474         ND = 2
475         IF( CC.EQ.ZERO ) THEN
476            I = KWTOP
477            IF( I+2.LE.N .AND. WANTT )
478     $         CALL PDROT( N-I-1, H, I, I+2, DESCH, DESCH(M_), H, I+1,
479     $              I+2, DESCH, DESCH(M_), CS, SN, WORK, LWORK, INFO )
480            IF( I.GT.1 )
481     $         CALL PDROT( I-1, H, 1, I, DESCH, 1, H, 1, I+1, DESCH, 1,
482     $              CS, SN, WORK, LWORK, INFO )
483            IF( WANTZ )
484     $         CALL PDROT( IHIZ-ILOZ+1, Z, ILOZ, I, DESCZ, 1, Z, ILOZ,
485     $              I+1, DESCZ, 1, CS, SN, WORK, LWORK, INFO )
486            CALL PDELSET( H, I, I, DESCH, AA )
487            CALL PDELSET( H, I, I+1, DESCH, BB )
488            CALL PDELSET( H, I+1, I, DESCH, CC )
489            CALL PDELSET( H, I+1, I+1, DESCH, DD )
490         END IF
491         WORK( 1 ) = DBLE( LWKOPT )
492         RETURN
493      END IF
494*
495*     Calculate new value for IROFFH in case deflation window
496*     was adjusted.
497*
498      IROFFH = MOD( KWTOP - 1, NB )
499*
500*     Adjust number of rows and columns of T matrix descriptor
501*     to prepare for call to PDBTRORD.
502*
503      DESCT( M_ ) = JW+IROFFH
504      DESCT( N_ ) = JW+IROFFH
505*
506*     Convert to spike-triangular form.  (In case of a rare QR failure,
507*     this routine continues to do aggressive early deflation using that
508*     part of the deflation window that converged using INFQR here and
509*     there to keep track.)
510*
511*     Copy the trailing submatrix to the working space.
512*
513      CALL PDLASET( 'All', IROFFH, JW+IROFFH, ZERO, ONE, T, 1, 1,
514     $     DESCT )
515      CALL PDLASET( 'All', JW, IROFFH, ZERO, ZERO, T, 1+IROFFH, 1,
516     $     DESCT )
517      CALL PDLACPY( 'All', 1, JW, H, KWTOP, KWTOP, DESCH, T, 1+IROFFH,
518     $     1+IROFFH, DESCT )
519      CALL PDLACPY( 'Upper', JW-1, JW-1, H, KWTOP+1, KWTOP, DESCH, T,
520     $     1+IROFFH+1, 1+IROFFH, DESCT )
521      IF( JW.GT.2 )
522     $   CALL PDLASET( 'Lower', JW-2, JW-2, ZERO, ZERO, T, 1+IROFFH+2,
523     $        1+IROFFH, DESCT )
524      CALL PDLACPY( 'All', JW-1, 1, H, KWTOP+1, KWTOP+JW-1, DESCH, T,
525     $     1+IROFFH+1, 1+IROFFH+JW-1, DESCT )
526*
527*     Initialize the working orthogonal matrix.
528*
529      CALL PDLASET( 'All', JW+IROFFH, JW+IROFFH, ZERO, ONE, V, 1, 1,
530     $     DESCV )
531*
532*     Compute the Schur form of T.
533*
534      NPMIN = PILAENVX( ICTXT, 23, 'PDLAQR3', 'SV', JW, NB, NPROW,
535     $     NPCOL )
536      NMIN = PILAENVX( ICTXT, 12, 'PDLAQR3', 'SV', JW, 1, JW, LWORK )
537      NMAX = ( N-1 ) / 3
538      IF( MIN(NPROW, NPCOL).LE.NPMIN+1 .OR. RECLEVEL.GE.1 ) THEN
539*
540*        The AED window is large enough.
541*        Compute the Schur decomposition with all processors.
542*
543         IF( JW+IROFFH.GT.NMIN .AND. JW+IROFFH.LE.NMAX
544     $        .AND. RECLEVEL.LT.RECMAX ) THEN
545            CALL PDLAQR0( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
546     $           JW+IROFFH, T, DESCT, SR( KWTOP-IROFFH ),
547     $           SI( KWTOP-IROFFH ), 1+IROFFH, JW+IROFFH, V, DESCV,
548     $           WORK, LWORK, IWORK(NSEL+1), LIWORK-NSEL, INFQR,
549     $           RECLEVEL+1 )
550         ELSE
551            IF( DESCT(RSRC_).EQ.0 .AND. DESCT(CSRC_).EQ.0 ) THEN
552               IF( JW+IROFFH.GT.DESCT( MB_ ) ) THEN
553                  CALL PDLAQR1( .TRUE., .TRUE., JW+IROFFH, 1,
554     $                 JW+IROFFH, T, DESCT, SR( KWTOP-IROFFH ),
555     $                 SI( KWTOP-IROFFH ), 1, JW+IROFFH, V,
556     $                 DESCV, WORK, LWORK, IWORK(NSEL+1), LIWORK-NSEL,
557     $                 INFQR )
558               ELSE
559                  IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
560                     CALL DLAHQR( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
561     $                    JW+IROFFH, T, DESCT(LLD_),
562     $                    SR( KWTOP-IROFFH ), SI( KWTOP-IROFFH ),
563     $                    1+IROFFH, JW+IROFFH, V, DESCV(LLD_), INFQR )
564                  ELSE
565                     INFQR = 0
566                  END IF
567                  IF( NPROCS.GT.1 )
568     $               CALL IGAMN2D( ICTXT, 'All', '1-Tree', 1, 1, INFQR,
569     $                    1, -1, -1, -1, -1, -1 )
570               END IF
571            ELSEIF( JW+IROFFH.LE.DESCT( MB_ ) ) THEN
572               IF( MYROW.EQ.DESCT(RSRC_) .AND. MYCOL.EQ.DESCT(CSRC_) )
573     $              THEN
574                  CALL DLAHQR( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH,
575     $                 JW+IROFFH, T, DESCT(LLD_),
576     $                 SR( KWTOP-IROFFH ), SI( KWTOP-IROFFH ),
577     $                 1+IROFFH, JW+IROFFH, V, DESCV(LLD_), INFQR )
578               ELSE
579                  INFQR = 0
580               END IF
581               IF( NPROCS.GT.1 )
582     $         CALL IGAMN2D( ICTXT, 'All', '1-Tree', 1, 1, INFQR,
583     $              1, -1, -1, -1, -1, -1 )
584            ELSE
585               TZROWS0 = NUMROC( JW+IROFFH, NB, MYROW, 0, NPROW )
586               TZCOLS0 = NUMROC( JW+IROFFH, NB, MYCOL, 0, NPCOL )
587               CALL DESCINIT( DESCTZ0, JW+IROFFH, JW+IROFFH, NB, NB, 0,
588     $              0, ICTXT, MAX(1,TZROWS0), IERR0 )
589               IPT0 = 1
590               IPZ0 = IPT0 + MAX(1,TZROWS0)*TZCOLS0
591               IPW0 = IPZ0 + MAX(1,TZROWS0)*TZCOLS0
592               CALL PDLAMVE( 'All', JW+IROFFH, JW+IROFFH, T, 1, 1,
593     $              DESCT, WORK(IPT0), 1, 1, DESCTZ0, WORK(IPW0) )
594               CALL PDLASET( 'All', JW+IROFFH, JW+IROFFH, ZERO, ONE,
595     $              WORK(IPZ0), 1, 1, DESCTZ0 )
596               CALL PDLAQR1( .TRUE., .TRUE., JW+IROFFH, 1,
597     $              JW+IROFFH, WORK(IPT0), DESCTZ0,
598     $              SR( KWTOP-IROFFH ), SI( KWTOP-IROFFH ),
599     $              1, JW+IROFFH, WORK(IPZ0),
600     $              DESCTZ0, WORK(IPW0), LWORK-IPW0+1, IWORK(NSEL+1),
601     $              LIWORK-NSEL, INFQR )
602               CALL PDLAMVE( 'All', JW+IROFFH, JW+IROFFH, WORK(IPT0), 1,
603     $              1, DESCTZ0, T, 1, 1, DESCT, WORK(IPW0) )
604               CALL PDLAMVE( 'All', JW+IROFFH, JW+IROFFH, WORK(IPZ0), 1,
605     $              1, DESCTZ0, V, 1, 1, DESCV, WORK(IPW0) )
606            END IF
607         END IF
608      ELSE
609*
610*        The AED window is too small.
611*        Redistribute the AED window to a subgrid
612*        and do the computation on the subgrid.
613*
614         ICTXT_NEW = ICTXT
615         DO 20 I = 0, NPMIN-1
616            DO 10 J = 0, NPMIN-1
617               PMAP( J+1+I*NPMIN ) = BLACS_PNUM( ICTXT, I, J )
618 10         CONTINUE
619 20      CONTINUE
620         CALL BLACS_GRIDMAP( ICTXT_NEW, PMAP, NPMIN, NPMIN, NPMIN )
621         CALL BLACS_GRIDINFO( ICTXT_NEW, NPMIN, NPMIN, MYROW_NEW,
622     $        MYCOL_NEW )
623         IF( MYROW.GE.NPMIN .OR. MYCOL.GE.NPMIN ) ICTXT_NEW = -1
624         IF( ICTXT_NEW.GE.0 ) THEN
625            TZROWS0 = NUMROC( JW, NB, MYROW_NEW, 0, NPMIN )
626            TZCOLS0 = NUMROC( JW, NB, MYCOL_NEW, 0, NPMIN )
627            CALL DESCINIT( DESCTZ0, JW, JW, NB, NB, 0,
628     $           0, ICTXT_NEW, MAX(1,TZROWS0), IERR0 )
629            IPT0 = 1
630            IPZ0 = IPT0 + MAX(1,TZROWS0)*MAX(1,TZCOLS0)
631            IPW0 = IPZ0 + MAX(1,TZROWS0)*MAX(1,TZCOLS0)
632         ELSE
633            IPT0 = 1
634            IPZ0 = 2
635            IPW0 = 3
636            DESCTZ0( CTXT_ ) = -1
637            INFQR = 0
638         END IF
639         CALL PDGEMR2D( JW, JW, T, 1+IROFFH, 1+IROFFH, DESCT,
640     $        WORK(IPT0), 1, 1, DESCTZ0, ICTXT )
641         IF( ICTXT_NEW.GE.0 ) THEN
642            CALL PDLASET( 'All', JW, JW, ZERO, ONE, WORK(IPZ0), 1, 1,
643     $           DESCTZ0 )
644            NMIN = PILAENVX( ICTXT_NEW, 12, 'PDLAQR3', 'SV', JW, 1, JW,
645     $           LWORK )
646            IF( JW.GT.NMIN .AND. JW.LE.NMAX .AND. RECLEVEL.LT.1 ) THEN
647               CALL PDLAQR0( .TRUE., .TRUE., JW, 1, JW, WORK(IPT0),
648     $              DESCTZ0, SR( KWTOP ), SI( KWTOP ), 1, JW,
649     $              WORK(IPZ0), DESCTZ0, WORK(IPW0), LWORK-IPW0+1,
650     $              IWORK(NSEL+1), LIWORK-NSEL, INFQR,
651     $              RECLEVEL+1 )
652            ELSE
653               CALL PDLAQR1( .TRUE., .TRUE., JW, 1, JW, WORK(IPT0),
654     $              DESCTZ0, SR( KWTOP ), SI( KWTOP ), 1, JW,
655     $              WORK(IPZ0), DESCTZ0, WORK(IPW0), LWORK-IPW0+1,
656     $              IWORK(NSEL+1), LIWORK-NSEL, INFQR )
657            END IF
658         END IF
659         CALL PDGEMR2D( JW, JW, WORK(IPT0), 1, 1, DESCTZ0, T, 1+IROFFH,
660     $        1+IROFFH, DESCT, ICTXT )
661         CALL PDGEMR2D( JW, JW, WORK(IPZ0), 1, 1, DESCTZ0, V, 1+IROFFH,
662     $        1+IROFFH, DESCV, ICTXT )
663         IF( ICTXT_NEW.GE.0 )
664     $      CALL BLACS_GRIDEXIT( ICTXT_NEW )
665         IF( MYROW+MYCOL.GT.0 ) THEN
666            DO 40 J = 0, JW-1
667               SR( KWTOP+J ) = ZERO
668               SI( KWTOP+J ) = ZERO
669 40         CONTINUE
670         END IF
671         CALL IGAMN2D( ICTXT, 'All', '1-Tree', 1, 1, INFQR, 1, -1, -1,
672     $        -1, -1, -1 )
673         CALL DGSUM2D( ICTXT, 'All', ' ', JW, 1, SR(KWTOP), JW, -1, -1 )
674         CALL DGSUM2D( ICTXT, 'All', ' ', JW, 1, SI(KWTOP), JW, -1, -1 )
675      END IF
676*
677*     Adjust INFQR for offset from block border in submatrices.
678*
679      IF( INFQR.NE.0 )
680     $   INFQR = INFQR - IROFFH
681*
682*     PDTRORD needs a clean margin near the diagonal.
683*
684      DO 50 J = 1, JW - 3
685         CALL PDELSET( T, J+2, J, DESCT, ZERO )
686         CALL PDELSET( T, J+3, J, DESCT, ZERO )
687 50   CONTINUE
688      IF( JW.GT.2 )
689     $   CALL PDELSET( T, JW, JW-2, DESCT, ZERO )
690*
691*     Check local residual for AED Schur decomposition.
692*
693      RESAED = 0.0D+00
694*
695*     Clean up the array SELECT for PDTRORD.
696*
697      DO 60 J = 1, NSEL
698         IWORK( J ) = 0
699 60   CONTINUE
700*
701*     Set local M counter to zero.
702*
703      MLOC = 0
704*
705*     Outer deflation detection loop (label 80).
706*     In this loop a bunch of undeflatable eigenvalues
707*     are moved simultaneously.
708*
709      DO 70 J = 1, IROFFH + INFQR
710         IWORK( J ) = 1
711 70   CONTINUE
712*
713      NS = JW
714      ILST = INFQR + 1 + IROFFH
715      IF( ILST.GT.1 ) THEN
716         CALL PDELGET( 'All', '1-Tree', ELEM, T, ILST, ILST-1, DESCT )
717         BULGE = ELEM.NE.ZERO
718         IF( BULGE ) ILST = ILST+1
719      END IF
720*
721 80   CONTINUE
722      IF( ILST.LE.NS+IROFFH ) THEN
723*
724*        Find the top-left corner of the local window.
725*
726         LILST = MAX(ILST,NS+IROFFH-NB+1)
727         IF( LILST.GT.1 ) THEN
728            CALL PDELGET( 'All', '1-Tree', ELEM, T, LILST, LILST-1,
729     $           DESCT )
730            BULGE = ELEM.NE.ZERO
731            IF( BULGE ) LILST = LILST+1
732         END IF
733*
734*        Lock all eigenvalues outside the local window.
735*
736         DO 90 J = IROFFH+1, LILST-1
737            IWORK( J ) = 1
738 90      CONTINUE
739         LILST0 = LILST
740*
741*        Inner deflation detection loop (label 100).
742*        In this loop, the undeflatable eigenvalues are moved to the
743*        top-left corner of the local window.
744*
745 100     CONTINUE
746         IF( LILST.LE.NS+IROFFH ) THEN
747            IF( NS.EQ.1 ) THEN
748               BULGE = .FALSE.
749            ELSE
750               CALL PDELGET( 'All', '1-Tree', ELEM, T, NS+IROFFH,
751     $              NS+IROFFH-1, DESCT )
752               BULGE = ELEM.NE.ZERO
753            END IF
754*
755*           Small spike tip test for deflation.
756*
757            IF( .NOT.BULGE ) THEN
758*
759*              Real eigenvalue.
760*
761               CALL PDELGET( 'All', '1-Tree', ELEM, T, NS+IROFFH,
762     $              NS+IROFFH, DESCT )
763               FOO = ABS( ELEM )
764               IF( FOO.EQ.ZERO )
765     $            FOO = ABS( S )
766               CALL PDELGET( 'All', '1-Tree', ELEM, V, 1+IROFFH,
767     $              NS+IROFFH, DESCV )
768               IF( ABS( S*ELEM ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
769*
770*                 Deflatable.
771*
772                  NS = NS - 1
773               ELSE
774*
775*                 Undeflatable: move it up out of the way.
776*
777                  IFST = NS
778                  DO 110 J = LILST, JW+IROFFH
779                     IWORK( J ) = 0
780 110              CONTINUE
781                  IWORK( IFST+IROFFH ) = 1
782                  CALL PDTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1,
783     $                 1, DESCT, V, 1, 1, DESCV, WORK,
784     $                 WORK(JW+IROFFH+1), MLOC,
785     $                 WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
786     $                 IWORK(NSEL+1), LIWORK-NSEL, INFO )
787*
788*                 Adjust the array SELECT explicitly so that it does not
789*                 rely on the output of PDTRORD.
790*
791                  IWORK( IFST+IROFFH ) = 0
792                  IWORK( LILST ) = 1
793                  LILST = LILST + 1
794*
795*                 In case of a rare exchange failure, adjust the
796*                 pointers ILST and LILST to the current place to avoid
797*                 unexpected behaviors.
798*
799                  IF( INFO.NE.0 ) THEN
800                     LILST = MAX(INFO, LILST)
801                     ILST = MAX(INFO, ILST)
802                  END IF
803               END IF
804            ELSE
805*
806*              Complex conjugate pair.
807*
808               CALL PDELGET( 'All', '1-Tree', ELEM1, T, NS+IROFFH,
809     $              NS+IROFFH, DESCT )
810               CALL PDELGET( 'All', '1-Tree', ELEM2, T, NS+IROFFH,
811     $              NS+IROFFH-1, DESCT )
812               CALL PDELGET( 'All', '1-Tree', ELEM3, T, NS+IROFFH-1,
813     $              NS+IROFFH, DESCT )
814               FOO = ABS( ELEM1 ) + SQRT( ABS( ELEM2 ) )*
815     $              SQRT( ABS( ELEM3 ) )
816               IF( FOO.EQ.ZERO )
817     $            FOO = ABS( S )
818               CALL PDELGET( 'All', '1-Tree', ELEM1, V, 1+IROFFH,
819     $              NS+IROFFH, DESCV )
820               CALL PDELGET( 'All', '1-Tree', ELEM2, V, 1+IROFFH,
821     $              NS+IROFFH-1, DESCV )
822               IF( MAX( ABS( S*ELEM1 ), ABS( S*ELEM2 ) ).LE.
823     $              MAX( SMLNUM, ULP*FOO ) ) THEN
824*
825*                 Deflatable.
826*
827                  NS = NS - 2
828               ELSE
829*
830*                 Undeflatable: move them up out of the way.
831*
832                  IFST = NS
833                  DO 120 J = LILST, JW+IROFFH
834                     IWORK( J ) = 0
835 120              CONTINUE
836                  IWORK( IFST+IROFFH ) = 1
837                  IWORK( IFST+IROFFH-1 ) = 1
838                  CALL PDTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1,
839     $                 1, DESCT, V, 1, 1, DESCV, WORK,
840     $                 WORK(JW+IROFFH+1), MLOC,
841     $                 WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
842     $                 IWORK(NSEL+1), LIWORK-NSEL, INFO )
843*
844*                 Adjust the array SELECT explicitly so that it does not
845*                 rely on the output of PDTRORD.
846*
847                  IWORK( IFST+IROFFH ) = 0
848                  IWORK( IFST+IROFFH-1 ) = 0
849                  IWORK( LILST ) = 1
850                  IWORK( LILST+1 ) = 1
851                  LILST = LILST + 2
852*
853*                 In case of a rare exchange failure, adjust the
854*                 pointers ILST and LILST to the current place to avoid
855*                 unexpected behaviors.
856*
857                  IF( INFO.NE.0 ) THEN
858                     LILST = MAX(INFO, LILST)
859                     ILST = MAX(INFO, ILST)
860                  END IF
861               END IF
862            END IF
863*
864*           End of inner deflation detection loop.
865*
866            GO TO 100
867         END IF
868*
869*        Unlock the eigenvalues outside the local window.
870*        Then undeflatable eigenvalues are moved to the proper position.
871*
872         DO 130 J = ILST, LILST0-1
873            IWORK( J ) = 0
874 130     CONTINUE
875         CALL PDTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1, 1,
876     $        DESCT, V, 1, 1, DESCV, WORK, WORK(JW+IROFFH+1),
877     $        M, WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
878     $        IWORK(NSEL+1), LIWORK-NSEL, INFO )
879         ILST = M + 1
880*
881*        In case of a rare exchange failure, adjust the pointer ILST to
882*        the current place to avoid unexpected behaviors.
883*
884         IF( INFO.NE.0 )
885     $      ILST = MAX(INFO, ILST)
886*
887*        End of outer deflation detection loop.
888*
889         GO TO 80
890      END IF
891
892*
893*     Post-reordering step: copy output eigenvalues to output.
894*
895      CALL DCOPY( JW, WORK(1+IROFFH), 1, SR( KWTOP ), 1 )
896      CALL DCOPY( JW, WORK(JW+2*IROFFH+1), 1, SI( KWTOP ), 1 )
897*
898*     Check local residual for reordered AED Schur decomposition.
899*
900      RESAED = 0.0D+00
901*
902*     Return to Hessenberg form.
903*
904      IF( NS.EQ.0 )
905     $   S = ZERO
906*
907      IF( NS.LT.JW .AND. SORTGRAD ) THEN
908*
909*        Sorting diagonal blocks of T improves accuracy for
910*        graded matrices.  Bubble sort deals well with exchange
911*        failures. Eigenvalues/shifts from T are also restored.
912*
913         ROUND = 0
914         SORTED = .FALSE.
915         I = NS + 1 + IROFFH
916 140     CONTINUE
917         IF( SORTED )
918     $      GO TO 180
919         SORTED = .TRUE.
920         ROUND = ROUND + 1
921*
922         KEND = I - 1
923         I = INFQR + 1 + IROFFH
924         IF( I.EQ.NS+IROFFH ) THEN
925            K = I + 1
926         ELSE IF( SI( KWTOP-IROFFH + I-1 ).EQ.ZERO ) THEN
927            K = I + 1
928         ELSE
929            K = I + 2
930         END IF
931 150     CONTINUE
932         IF( K.LE.KEND ) THEN
933            IF( K.EQ.I+1 ) THEN
934               EVI = ABS( SR( KWTOP-IROFFH+I-1 ) )
935            ELSE
936               EVI = ABS( SR( KWTOP-IROFFH+I-1 ) ) +
937     $              ABS( SI( KWTOP-IROFFH+I-1 ) )
938            END IF
939*
940            IF( K.EQ.KEND ) THEN
941               EVK = ABS( SR( KWTOP-IROFFH+K-1 ) )
942            ELSEIF( SI( KWTOP-IROFFH+K-1 ).EQ.ZERO ) THEN
943               EVK = ABS( SR( KWTOP-IROFFH+K-1 ) )
944            ELSE
945               EVK = ABS( SR( KWTOP-IROFFH+K-1 ) ) +
946     $              ABS( SI( KWTOP-IROFFH+K-1 ) )
947            END IF
948*
949            IF( EVI.GE.EVK ) THEN
950               I = K
951            ELSE
952               MLOC = 0
953               SORTED = .FALSE.
954               IFST = I
955               ILST = K
956               DO 160 J = 1, I-1
957                  IWORK( J ) = 1
958                  MLOC = MLOC + 1
959 160           CONTINUE
960               IF( K.EQ.I+2 ) THEN
961                  IWORK( I ) = 0
962                  IWORK(I+1) = 0
963               ELSE
964                  IWORK( I ) = 0
965               END IF
966               IF( K.NE.KEND .AND. SI( KWTOP-IROFFH+K-1 ).NE.ZERO ) THEN
967                  IWORK( K ) = 1
968                  IWORK(K+1) = 1
969                  MLOC = MLOC + 2
970               ELSE
971                  IWORK( K ) = 1
972                  IF( K.LT.KEND ) IWORK(K+1) = 0
973                  MLOC = MLOC + 1
974               END IF
975               DO 170 J = K+2, JW+IROFFH
976                  IWORK( J ) = 0
977 170           CONTINUE
978               CALL PDTRORD( 'Vectors', IWORK, PAR, JW+IROFFH, T, 1, 1,
979     $              DESCT, V, 1, 1, DESCV, WORK, WORK(JW+IROFFH+1), M,
980     $              WORK(2*(JW+IROFFH)+1), LWORK-2*(JW+IROFFH),
981     $              IWORK(NSEL+1), LIWORK-NSEL, IERR )
982               CALL DCOPY( JW, WORK(1+IROFFH), 1, SR( KWTOP ), 1 )
983               CALL DCOPY( JW, WORK(JW+2*IROFFH+1), 1, SI( KWTOP ), 1 )
984               IF( IERR.EQ.0 ) THEN
985                  I = ILST
986               ELSE
987                  I = K
988               END IF
989            END IF
990            IF( I.EQ.KEND ) THEN
991               K = I + 1
992            ELSE IF( SI( KWTOP-IROFFH+I-1 ).EQ.ZERO ) THEN
993               K = I + 1
994            ELSE
995               K = I + 2
996            END IF
997            GO TO 150
998         END IF
999         GO TO 140
1000 180     CONTINUE
1001      END IF
1002*
1003*     Restore number of rows and columns of T matrix descriptor.
1004*
1005      DESCT( M_ ) = NW+IROFFH
1006      DESCT( N_ ) = NH+IROFFH
1007*
1008      IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
1009         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
1010*
1011*           Reflect spike back into lower triangle.
1012*
1013            RROWS = NUMROC( NS+IROFFH, NB, MYROW, DESCV(RSRC_), NPROW )
1014            RCOLS = NUMROC( 1, 1, MYCOL, DESCV(CSRC_), NPCOL )
1015            CALL DESCINIT( DESCR, NS+IROFFH, 1, NB, 1, DESCV(RSRC_),
1016     $           DESCV(CSRC_), ICTXT, MAX(1, RROWS), INFO )
1017            TAUROWS = NUMROC( 1, 1, MYCOL, DESCV(RSRC_), NPROW )
1018            TAUCOLS = NUMROC( JW+IROFFH, NB, MYCOL, DESCV(CSRC_),
1019     $           NPCOL )
1020            CALL DESCINIT( DESCTAU, 1, JW+IROFFH, 1, NB, DESCV(RSRC_),
1021     $           DESCV(CSRC_), ICTXT, MAX(1, TAUROWS), INFO )
1022*
1023            IR = 1
1024            ITAU = IR + DESCR( LLD_ ) * RCOLS
1025            IPW  = ITAU + DESCTAU( LLD_ ) * TAUCOLS
1026*
1027            CALL PDLASET( 'All', NS+IROFFH, 1, ZERO, ZERO, WORK(ITAU),
1028     $           1, 1, DESCTAU )
1029*
1030            CALL PDCOPY( NS, V, 1+IROFFH, 1+IROFFH, DESCV, DESCV(M_),
1031     $           WORK(IR), 1+IROFFH, 1, DESCR, 1 )
1032            CALL PDLARFG( NS, BETA, 1+IROFFH, 1, WORK(IR), 2+IROFFH, 1,
1033     $           DESCR, 1, WORK(ITAU+IROFFH) )
1034            CALL PDELSET( WORK(IR), 1+IROFFH, 1, DESCR, ONE )
1035*
1036            CALL PDLASET( 'Lower', JW-2, JW-2, ZERO, ZERO, T, 3+IROFFH,
1037     $           1+IROFFH, DESCT )
1038*
1039            CALL PDLARF( 'Left', NS, JW, WORK(IR), 1+IROFFH, 1, DESCR,
1040     $           1, WORK(ITAU+IROFFH), T, 1+IROFFH, 1+IROFFH,
1041     $           DESCT, WORK( IPW ) )
1042            CALL PDLARF( 'Right', NS, NS, WORK(IR), 1+IROFFH, 1, DESCR,
1043     $           1, WORK(ITAU+IROFFH), T, 1+IROFFH, 1+IROFFH,
1044     $           DESCT, WORK( IPW ) )
1045            CALL PDLARF( 'Right', JW, NS, WORK(IR), 1+IROFFH, 1, DESCR,
1046     $           1, WORK(ITAU+IROFFH), V, 1+IROFFH, 1+IROFFH,
1047     $           DESCV, WORK( IPW ) )
1048*
1049            ITAU = 1
1050            IPW = ITAU + DESCTAU( LLD_ ) * TAUCOLS
1051            CALL PDGEHRD( JW+IROFFH, 1+IROFFH, NS+IROFFH, T, 1, 1,
1052     $           DESCT, WORK(ITAU), WORK( IPW ), LWORK-IPW+1, INFO )
1053         END IF
1054*
1055*        Copy updated reduced window into place.
1056*
1057         IF( KWTOP.GT.1 ) THEN
1058            CALL PDELGET( 'All', '1-Tree', ELEM, V, 1+IROFFH,
1059     $           1+IROFFH, DESCV )
1060            CALL PDELSET( H, KWTOP, KWTOP-1, DESCH, S*ELEM )
1061         END IF
1062         CALL PDLACPY( 'Upper', JW-1, JW-1, T, 1+IROFFH+1, 1+IROFFH,
1063     $        DESCT, H, KWTOP+1, KWTOP, DESCH )
1064         CALL PDLACPY( 'All', 1, JW, T, 1+IROFFH, 1+IROFFH, DESCT, H,
1065     $        KWTOP, KWTOP, DESCH )
1066         CALL PDLACPY( 'All', JW-1, 1, T, 1+IROFFH+1, 1+IROFFH+JW-1,
1067     $        DESCT, H, KWTOP+1, KWTOP+JW-1, DESCH )
1068*
1069*        Accumulate orthogonal matrix in order to update
1070*        H and Z, if requested.
1071*
1072         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
1073            CALL PDORMHR( 'Right', 'No', JW+IROFFH, NS+IROFFH, 1+IROFFH,
1074     $           NS+IROFFH, T, 1, 1, DESCT, WORK(ITAU), V, 1,
1075     $           1, DESCV, WORK( IPW ), LWORK-IPW+1, INFO )
1076         END IF
1077*
1078*        Update vertical slab in H.
1079*
1080         IF( WANTT ) THEN
1081            LTOP = 1
1082         ELSE
1083            LTOP = KTOP
1084         END IF
1085         KLN = MAX( 0, KWTOP-LTOP )
1086         IROFFHH = MOD( LTOP-1, NB )
1087         ICOFFHH = MOD( KWTOP-1, NB )
1088         HHRSRC = INDXG2P( LTOP, NB, MYROW, DESCH(RSRC_), NPROW )
1089         HHCSRC = INDXG2P( KWTOP, NB, MYCOL, DESCH(CSRC_), NPCOL )
1090         HHROWS = NUMROC( KLN+IROFFHH, NB, MYROW, HHRSRC, NPROW )
1091         HHCOLS = NUMROC( JW+ICOFFHH, NB, MYCOL, HHCSRC, NPCOL )
1092         CALL DESCINIT( DESCHH, KLN+IROFFHH, JW+ICOFFHH, NB, NB,
1093     $        HHRSRC, HHCSRC, ICTXT, MAX(1, HHROWS), IERR )
1094         CALL PDGEMM( 'No', 'No', KLN, JW, JW, ONE, H, LTOP,
1095     $        KWTOP, DESCH, V, 1+IROFFH, 1+IROFFH, DESCV, ZERO,
1096     $        WORK, 1+IROFFHH, 1+ICOFFHH, DESCHH )
1097         CALL PDLACPY( 'All', KLN, JW, WORK, 1+IROFFHH, 1+ICOFFHH,
1098     $        DESCHH, H, LTOP, KWTOP, DESCH )
1099*
1100*        Update horizontal slab in H.
1101*
1102         IF( WANTT ) THEN
1103            KLN = N-KBOT
1104            IROFFHH = MOD( KWTOP-1, NB )
1105            ICOFFHH = MOD( KBOT, NB )
1106            HHRSRC = INDXG2P( KWTOP, NB, MYROW, DESCH(RSRC_), NPROW )
1107            HHCSRC = INDXG2P( KBOT+1, NB, MYCOL, DESCH(CSRC_), NPCOL )
1108            HHROWS = NUMROC( JW+IROFFHH, NB, MYROW, HHRSRC, NPROW )
1109            HHCOLS = NUMROC( KLN+ICOFFHH, NB, MYCOL, HHCSRC, NPCOL )
1110            CALL DESCINIT( DESCHH, JW+IROFFHH, KLN+ICOFFHH, NB, NB,
1111     $           HHRSRC, HHCSRC, ICTXT, MAX(1, HHROWS), IERR )
1112            CALL PDGEMM( 'Tr', 'No', JW, KLN, JW, ONE, V,
1113     $           1+IROFFH, 1+IROFFH, DESCV, H, KWTOP, KBOT+1,
1114     $           DESCH, ZERO, WORK, 1+IROFFHH, 1+ICOFFHH, DESCHH )
1115            CALL PDLACPY( 'All', JW, KLN, WORK, 1+IROFFHH, 1+ICOFFHH,
1116     $           DESCHH, H, KWTOP, KBOT+1, DESCH )
1117         END IF
1118*
1119*        Update vertical slab in Z.
1120*
1121         IF( WANTZ ) THEN
1122            KLN = IHIZ-ILOZ+1
1123            IROFFZZ = MOD( ILOZ-1, NB )
1124            ICOFFZZ = MOD( KWTOP-1, NB )
1125            ZZRSRC = INDXG2P( ILOZ, NB, MYROW, DESCZ(RSRC_), NPROW )
1126            ZZCSRC = INDXG2P( KWTOP, NB, MYCOL, DESCZ(CSRC_), NPCOL )
1127            ZZROWS = NUMROC( KLN+IROFFZZ, NB, MYROW, ZZRSRC, NPROW )
1128            ZZCOLS = NUMROC( JW+ICOFFZZ, NB, MYCOL, ZZCSRC, NPCOL )
1129            CALL DESCINIT( DESCZZ, KLN+IROFFZZ, JW+ICOFFZZ, NB, NB,
1130     $           ZZRSRC, ZZCSRC, ICTXT, MAX(1, ZZROWS), IERR )
1131            CALL PDGEMM( 'No', 'No', KLN, JW, JW, ONE, Z, ILOZ,
1132     $           KWTOP, DESCZ, V, 1+IROFFH, 1+IROFFH, DESCV,
1133     $           ZERO, WORK, 1+IROFFZZ, 1+ICOFFZZ, DESCZZ )
1134            CALL PDLACPY( 'All', KLN, JW, WORK, 1+IROFFZZ, 1+ICOFFZZ,
1135     $           DESCZZ, Z, ILOZ, KWTOP, DESCZ )
1136         END IF
1137      END IF
1138*
1139*     Return the number of deflations (ND) and the number of shifts (NS).
1140*     (Subtracting INFQR from the spike length takes care of the case of
1141*     a rare QR failure while calculating eigenvalues of the deflation
1142*     window.)
1143*
1144      ND = JW - NS
1145      NS = NS - INFQR
1146*
1147*     Return optimal workspace.
1148*
1149      WORK( 1 ) = DBLE( LWKOPT )
1150      IWORK( 1 ) = ILWKOPT + NSEL
1151*
1152*     End of PDLAQR3
1153*
1154      END
1155