1      SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA,
2     $                    ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT,
3     $                    V, LDV, WR, WI, WORK, LWORK )
4*
5*     Contribution from the Department of Computing Science and HPC2N,
6*     Umea University, Sweden
7*
8*  -- ScaLAPACK routine (version 2.0.2) --
9*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10*     May 1 2012
11*
12      IMPLICIT NONE
13*
14*     .. Scalar Arguments ..
15      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDT, LDV, LWORK, N, ND,
16     $                   NS, NW
17      LOGICAL            WANTT, WANTZ
18*     ..
19*     .. Array Arguments ..
20      INTEGER            DESCA( * ), DESCZ( * )
21      DOUBLE PRECISION   A( * ), SI( KBOT ), SR( KBOT ), T( LDT, * ),
22     $                   V( LDV, * ), WORK( * ), WI( * ), WR( * ),
23     $                   Z( * )
24*     ..
25*
26*  Purpose
27*  =======
28*
29*  Aggressive early deflation:
30*
31*  PDLAQR2 accepts as input an upper Hessenberg matrix A and performs an
32*  orthogonal similarity transformation designed to detect and deflate
33*  fully converged eigenvalues from a trailing principal submatrix.  On
34*  output A has been overwritten by a new Hessenberg matrix that is a
35*  perturbation of an orthogonal similarity transformation of A.  It is
36*  to be hoped that the final version of H has many zero subdiagonal
37*  entries.
38*
39*  This routine handles small deflation windows which is affordable by
40*  one processor. Normally, it is called by PDLAQR1. All the inputs are
41*  assumed to be valid without checking.
42*
43*  Notes
44*  =====
45*
46*  Each global data object is described by an associated description
47*  vector.  This vector stores the information required to establish
48*  the mapping between an object element and its corresponding process
49*  and memory location.
50*
51*  Let A be a generic term for any 2D block cyclicly distributed array.
52*  Such a global array has an associated description vector DESCA.
53*  In the following comments, the character _ should be read as
54*  "of the global array".
55*
56*  NOTATION        STORED IN      EXPLANATION
57*  --------------- -------------- --------------------------------------
58*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
59*                                 DTYPE_A = 1.
60*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61*                                 the BLACS process grid A is distribu-
62*                                 ted over. The context itself is glo-
63*                                 bal, but the handle (the integer
64*                                 value) may vary.
65*  M_A    (global) DESCA( M_ )    The number of rows in the global
66*                                 array A.
67*  N_A    (global) DESCA( N_ )    The number of columns in the global
68*                                 array A.
69*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
70*                                 the rows of the array.
71*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
72*                                 the columns of the array.
73*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74*                                 row of the array A is distributed.
75*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76*                                 first column of the array A is
77*                                 distributed.
78*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
79*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
80*
81*  Let K be the number of rows or columns of a distributed matrix,
82*  and assume that its process grid has dimension p x q.
83*  LOCr( K ) denotes the number of elements of K that a process
84*  would receive if K were distributed over the p processes of its
85*  process column.
86*  Similarly, LOCc( K ) denotes the number of elements of K that a
87*  process would receive if K were distributed over the q processes of
88*  its process row.
89*  The values of LOCr() and LOCc() may be determined via a call to the
90*  ScaLAPACK tool function, NUMROC:
91*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93*  An upper bound for these quantities may be computed by:
94*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97*  Arguments
98*  =========
99*
100*  WANTT   (global input) LOGICAL
101*          If .TRUE., then the Hessenberg matrix H is fully updated
102*          so that the quasi-triangular Schur factor may be
103*          computed (in cooperation with the calling subroutine).
104*          If .FALSE., then only enough of H is updated to preserve
105*          the eigenvalues.
106*
107*  WANTZ   (global input) LOGICAL
108*          If .TRUE., then the orthogonal matrix Z is updated so
109*          so that the orthogonal Schur factor may be computed
110*          (in cooperation with the calling subroutine).
111*          If .FALSE., then Z is not referenced.
112*
113*  N       (global input) INTEGER
114*          The order of the matrix H and (if WANTZ is .TRUE.) the
115*          order of the orthogonal matrix Z.
116*
117*  KTOP    (global input) INTEGER
118*  KBOT    (global input) INTEGER
119*          It is assumed without a check that either
120*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
121*          determine an isolated block along the diagonal of the
122*          Hessenberg matrix. However, H(KTOP,KTOP-1)=0 is not
123*          essentially necessary if WANTT is .TRUE. .
124*
125*  NW      (global input) INTEGER
126*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
127*          Normally NW .GE. 3 if PDLAQR2 is called by PDLAQR1.
128*
129*  A       (local input/output) DOUBLE PRECISION array, dimension
130*          (DESCH(LLD_),*)
131*          On input the initial N-by-N section of A stores the
132*          Hessenberg matrix undergoing aggressive early deflation.
133*          On output A has been transformed by an orthogonal
134*          similarity transformation, perturbed, and the returned
135*          to Hessenberg form that (it is to be hoped) has some
136*          zero subdiagonal entries.
137*
138*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
139*          The array descriptor for the distributed matrix A.
140*
141*  ILOZ    (global input) INTEGER
142*  IHIZ    (global input) INTEGER
143*          Specify the rows of Z to which transformations must be
144*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
145*
146*  Z       (input/output) DOUBLE PRECISION array, dimension
147*          (DESCH(LLD_),*)
148*          IF WANTZ is .TRUE., then on output, the orthogonal
149*          similarity transformation mentioned above has been
150*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
151*          If WANTZ is .FALSE., then Z is unreferenced.
152*
153*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
154*          The array descriptor for the distributed matrix Z.
155*
156*  NS      (global output) INTEGER
157*          The number of unconverged (ie approximate) eigenvalues
158*          returned in SR and SI that may be used as shifts by the
159*          calling subroutine.
160*
161*  ND      (global output) INTEGER
162*          The number of converged eigenvalues uncovered by this
163*          subroutine.
164*
165*  SR      (global output) DOUBLE PRECISION array, dimension KBOT
166*  SI      (global output) DOUBLE PRECISION array, dimension KBOT
167*          On output, the real and imaginary parts of approximate
168*          eigenvalues that may be used for shifts are stored in
169*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
170*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
171*          On proc #0, the real and imaginary parts of converged
172*          eigenvalues are stored in SR(KBOT-ND+1) through SR(KBOT) and
173*          SI(KBOT-ND+1) through SI(KBOT), respectively. On other
174*          processors, these entries are set to zero.
175*
176*  T       (local workspace) DOUBLE PRECISION array, dimension LDT*NW.
177*
178*  LDT     (local input) INTEGER
179*          The leading dimension of the array T.
180*          LDT >= NW.
181*
182*  V       (local workspace) DOUBLE PRECISION array, dimension LDV*NW.
183*
184*  LDV     (local input) INTEGER
185*          The leading dimension of the array V.
186*          LDV >= NW.
187*
188*  WR      (local workspace) DOUBLE PRECISION array, dimension KBOT.
189*  WI      (local workspace) DOUBLE PRECISION array, dimension KBOT.
190*
191*  WORK    (local workspace) DOUBLE PRECISION array, dimension LWORK.
192*
193*  LWORK   (local input) INTEGER
194*          WORK(LWORK) is a local array and LWORK is assumed big enough
195*          so that LWORK >= NW*NW.
196*
197*  ================================================================
198*  Implemented by
199*        Meiyue Shao, Department of Computing Science and HPC2N,
200*        Umea University, Sweden
201*
202*  ================================================================
203*  References:
204*        B. Kagstrom, D. Kressner, and M. Shao,
205*        On Aggressive Early Deflation in Parallel Variants of the QR
206*        Algorithm.
207*        Para 2010, to appear.
208*
209*  ================================================================
210*     .. Parameters ..
211      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
212     $                   LLD_, MB_, M_, NB_, N_, RSRC_
213      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
214     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
215     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
216      DOUBLE PRECISION   ZERO, ONE
217      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
218*     ..
219*     .. Local Scalars ..
220      INTEGER            CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
221     $                   ICOL2, INFO, II, IROW, IROW1, IROW2, ITMP1,
222     $                   ITMP2, J, JAFIRST, JJ, K, L, LDA, LDZ, LLDTMP,
223     $                   MYCOL, MYROW, NODE, NPCOL, NPROW, DBLK,
224     $                   HSTEP, VSTEP, KKROW, KKCOL, KLN, LTOP, LEFT,
225     $                   RIGHT, UP, DOWN, D1, D2
226*     ..
227*     .. Local Arrays ..
228      INTEGER            DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
229     $                   DESCWV( 9 )
230*     ..
231*     .. External Functions ..
232      INTEGER            NUMROC
233      EXTERNAL           NUMROC
234*     ..
235*     .. External Subroutines ..
236      EXTERNAL           BLACS_GRIDINFO, INFOG2L, DLASET,
237     $                   DLAQR3, DESCINIT, PDGEMM, PDGEMR2D, DGEMM,
238     $                   DLAMOV, DGESD2D, DGERV2D, DGEBS2D, DGEBR2D,
239     $                   IGEBS2D, IGEBR2D
240*     ..
241*     .. Intrinsic Functions ..
242      INTRINSIC          MAX, MIN, MOD
243*     ..
244*     .. Executable Statements ..
245*
246      INFO = 0
247*
248      IF( N.EQ.0 )
249     $   RETURN
250*
251*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)
252*
253      HBL = DESCA( MB_ )
254      CONTXT = DESCA( CTXT_ )
255      LDA = DESCA( LLD_ )
256      IAFIRST = DESCA( RSRC_ )
257      JAFIRST = DESCA( CSRC_ )
258      LDZ = DESCZ( LLD_ )
259      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
260      NODE = MYROW*NPCOL + MYCOL
261      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
262      RIGHT = MOD( MYCOL+1, NPCOL )
263      UP = MOD( MYROW+NPROW-1, NPROW )
264      DOWN = MOD( MYROW+1, NPROW )
265*
266*     I1 and I2 are the indices of the first row and last column of A
267*     to which transformations must be applied.
268*
269      I = KBOT
270      L = KTOP
271      IF( WANTT ) THEN
272         I1 = 1
273         I2 = N
274         LTOP = 1
275      ELSE
276         I1 = L
277         I2 = I
278         LTOP = L
279      END IF
280*
281*     Begin Aggressive Early Deflation.
282*
283      DBLK = NW
284      CALL INFOG2L( I-DBLK+1, I-DBLK+1, DESCA, NPROW, NPCOL, MYROW,
285     $     MYCOL, IROW, ICOL, II, JJ )
286      IF ( MYROW .EQ. II ) THEN
287         CALL DESCINIT( DESCT, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
288     $        LDT, INFO )
289         CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
290     $        LDV, INFO )
291      ELSE
292         CALL DESCINIT( DESCT, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
293     $        1, INFO )
294         CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, II, JJ, CONTXT,
295     $        1, INFO )
296      END IF
297      CALL PDGEMR2D( DBLK, DBLK, A, I-DBLK+1, I-DBLK+1, DESCA, T, 1, 1,
298     $     DESCT, CONTXT )
299      IF ( MYROW .EQ. II .AND. MYCOL .EQ. JJ ) THEN
300         CALL DLASET( 'All', DBLK, DBLK, ZERO, ONE, V, LDV )
301         CALL DLAQR3( .TRUE., .TRUE., DBLK, 1, DBLK, DBLK-1, T, LDT, 1,
302     $        DBLK, V, LDV, NS, ND, WR, WI, WORK, DBLK, DBLK,
303     $        WORK( DBLK*DBLK+1 ), DBLK, DBLK, WORK( 2*DBLK*DBLK+1 ),
304     $        DBLK, WORK( 3*DBLK*DBLK+1 ), LWORK-3*DBLK*DBLK )
305         CALL DGEBS2D( CONTXT, 'All', ' ', DBLK, DBLK, V, LDV )
306         CALL IGEBS2D( CONTXT, 'All', ' ', 1, 1, ND, 1 )
307      ELSE
308         CALL DGEBR2D( CONTXT, 'All', ' ', DBLK, DBLK, V, LDV, II, JJ )
309         CALL IGEBR2D( CONTXT, 'All', ' ', 1, 1, ND, 1, II, JJ )
310      END IF
311*
312      IF( ND .GT. 0 ) THEN
313*
314*        Copy the local matrix back to the diagonal block.
315*
316         CALL PDGEMR2D( DBLK, DBLK, T, 1, 1, DESCT, A, I-DBLK+1,
317     $        I-DBLK+1, DESCA, CONTXT )
318*
319*        Update T and Z.
320*
321         IF( MOD( I-DBLK, HBL )+DBLK .LE. HBL ) THEN
322*
323*           Simplest case: the deflation window is located on one
324*           processor.
325*           Call DGEMM directly to perform the update.
326*
327            HSTEP = LWORK / DBLK
328            VSTEP = HSTEP
329*
330*           Update horizontal slab in A.
331*
332            IF( WANTT ) THEN
333               CALL INFOG2L( I-DBLK+1, I+1, DESCA, NPROW, NPCOL, MYROW,
334     $              MYCOL, IROW, ICOL, II, JJ )
335               IF( MYROW .EQ. II ) THEN
336                  ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
337                  DO 10 KKCOL = ICOL, ICOL1, HSTEP
338                     KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
339                     CALL DGEMM( 'T', 'N', DBLK, KLN, DBLK, ONE, V,
340     $                    LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO, WORK,
341     $                    DBLK )
342                     CALL DLAMOV( 'A', DBLK, KLN, WORK, DBLK,
343     $                    A( IROW+(KKCOL-1)*LDA ), LDA )
344   10             CONTINUE
345               END IF
346            END IF
347*
348*           Update vertical slab in A.
349*
350            CALL INFOG2L( LTOP, I-DBLK+1, DESCA, NPROW, NPCOL, MYROW,
351     $           MYCOL, IROW, ICOL, II, JJ )
352            IF( MYCOL .EQ. JJ ) THEN
353               CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
354     $              MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
355               IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
356               DO 20 KKROW = IROW, IROW1, VSTEP
357                  KLN = MIN( VSTEP, IROW1-KKROW+1 )
358                  CALL DGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
359     $                 A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ZERO, WORK,
360     $                 KLN )
361                  CALL DLAMOV( 'A', KLN, DBLK, WORK, KLN,
362     $                 A( KKROW+(ICOL-1)*LDA ), LDA )
363   20          CONTINUE
364            END IF
365*
366*           Update vertical slab in Z.
367*
368            IF( WANTZ ) THEN
369               CALL INFOG2L( ILOZ, I-DBLK+1, DESCZ, NPROW, NPCOL, MYROW,
370     $              MYCOL, IROW, ICOL, II, JJ )
371               IF( MYCOL .EQ. JJ ) THEN
372                  CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
373     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
374                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
375                  DO 30 KKROW = IROW, IROW1, VSTEP
376                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
377                     CALL DGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
378     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
379     $                    WORK, KLN )
380                     CALL DLAMOV( 'A', KLN, DBLK, WORK, KLN,
381     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ )
382   30             CONTINUE
383               END IF
384            END IF
385*
386         ELSE IF( MOD( I-DBLK, HBL )+DBLK .LE. 2*HBL ) THEN
387*
388*           More complicated case: the deflation window lay on a 2x2
389*           processor mesh.
390*           Call DGEMM locally and communicate by pair.
391*
392            D1 = HBL - MOD( I-DBLK, HBL )
393            D2 = DBLK - D1
394            HSTEP = LWORK / DBLK
395            VSTEP = HSTEP
396*
397*           Update horizontal slab in A.
398*
399            IF( WANTT ) THEN
400               CALL INFOG2L( I-DBLK+1, I+1, DESCA, NPROW, NPCOL, MYROW,
401     $              MYCOL, IROW, ICOL, II, JJ )
402               IF( MYROW .EQ. UP ) THEN
403                  IF( MYROW .EQ. II ) THEN
404                     ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
405                     DO 40 KKCOL = ICOL, ICOL1, HSTEP
406                        KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
407                        CALL DGEMM( 'T', 'N', DBLK, KLN, DBLK, ONE, V,
408     $                       DBLK, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO,
409     $                       WORK, DBLK )
410                        CALL DLAMOV( 'A', DBLK, KLN, WORK, DBLK,
411     $                       A( IROW+(KKCOL-1)*LDA ), LDA )
412   40                CONTINUE
413                  END IF
414               ELSE
415                  IF( MYROW .EQ. II ) THEN
416                     ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
417                     DO 50 KKCOL = ICOL, ICOL1, HSTEP
418                        KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
419                        CALL DGEMM( 'T', 'N', D2, KLN, D1, ONE,
420     $                       V( 1, D1+1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
421     $                       LDA, ZERO, WORK( D1+1 ), DBLK )
422                        CALL DGESD2D( CONTXT, D2, KLN, WORK( D1+1 ),
423     $                       DBLK, DOWN, MYCOL )
424                        CALL DGERV2D( CONTXT, D1, KLN, WORK, DBLK, DOWN,
425     $                       MYCOL )
426                        CALL DGEMM( 'T', 'N', D1, KLN, D1, ONE,
427     $                       V, LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
428     $                       WORK, DBLK )
429                        CALL DLAMOV( 'A', D1, KLN, WORK, DBLK,
430     $                       A( IROW+(KKCOL-1)*LDA ), LDA )
431   50                CONTINUE
432                  ELSE IF( UP .EQ. II ) THEN
433                     ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
434                     DO 60 KKCOL = ICOL, ICOL1, HSTEP
435                        KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
436                        CALL DGEMM( 'T', 'N', D1, KLN, D2, ONE,
437     $                       V( D1+1, 1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
438     $                       LDA, ZERO, WORK, DBLK )
439                        CALL DGESD2D( CONTXT, D1, KLN, WORK, DBLK, UP,
440     $                       MYCOL )
441                        CALL DGERV2D( CONTXT, D2, KLN, WORK( D1+1 ),
442     $                       DBLK, UP, MYCOL )
443                        CALL DGEMM( 'T', 'N', D2, KLN, D2, ONE,
444     $                       V( D1+1, D1+1 ), LDV,
445     $                       A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
446     $                       WORK( D1+1 ), DBLK )
447                        CALL DLAMOV( 'A', D2, KLN, WORK( D1+1 ), DBLK,
448     $                       A( IROW+(KKCOL-1)*LDA ), LDA )
449   60                CONTINUE
450                  END IF
451               END IF
452            END IF
453*
454*           Update vertical slab in A.
455*
456            CALL INFOG2L( LTOP, I-DBLK+1, DESCA, NPROW, NPCOL, MYROW,
457     $           MYCOL, IROW, ICOL, II, JJ )
458            IF( MYCOL .EQ. LEFT ) THEN
459               IF( MYCOL .EQ. JJ ) THEN
460                  CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
461     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
462                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
463                  DO 70 KKROW = IROW, IROW1, VSTEP
464                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
465                     CALL DGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
466     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ZERO,
467     $                    WORK, KLN )
468                     CALL DLAMOV( 'A', KLN, DBLK, WORK, KLN,
469     $                    A( KKROW+(ICOL-1)*LDA ), LDA )
470   70             CONTINUE
471               END IF
472            ELSE
473               IF( MYCOL .EQ. JJ ) THEN
474                  CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
475     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
476                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
477                  DO 80 KKROW = IROW, IROW1, VSTEP
478                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
479                     CALL DGEMM( 'N', 'N', KLN, D2, D1, ONE,
480     $                    A( KKROW+(ICOL-1)*LDA ), LDA,
481     $                    V( 1, D1+1 ), LDV, ZERO, WORK( 1+D1*KLN ),
482     $                    KLN )
483                     CALL DGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
484     $                    KLN, MYROW, RIGHT )
485                     CALL DGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
486     $                    RIGHT )
487                     CALL DGEMM( 'N', 'N', KLN, D1, D1, ONE,
488     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ONE,
489     $                    WORK, KLN )
490                     CALL DLAMOV( 'A', KLN, D1, WORK, KLN,
491     $                    A( KKROW+(ICOL-1)*LDA ), LDA )
492   80             CONTINUE
493               ELSE IF ( LEFT .EQ. JJ ) THEN
494                  CALL INFOG2L( I-DBLK, I-DBLK+1, DESCA, NPROW, NPCOL,
495     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
496                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
497                  DO 90 KKROW = IROW, IROW1, VSTEP
498                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
499                     CALL DGEMM( 'N', 'N', KLN, D1, D2, ONE,
500     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, 1 ),
501     $                    LDV, ZERO, WORK, KLN )
502                     CALL DGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
503     $                    LEFT )
504                     CALL DGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
505     $                    KLN, MYROW, LEFT )
506                     CALL DGEMM( 'N', 'N', KLN, D2, D2, ONE,
507     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, D1+1 ),
508     $                    LDV, ONE, WORK( 1+D1*KLN ), KLN )
509                     CALL DLAMOV( 'A', KLN, D2, WORK( 1+D1*KLN ), KLN,
510     $                    A( KKROW+(ICOL-1)*LDA ), LDA )
511   90             CONTINUE
512               END IF
513            END IF
514*
515*           Update vertical slab in Z.
516*
517            IF( WANTZ ) THEN
518               CALL INFOG2L( ILOZ, I-DBLK+1, DESCZ, NPROW, NPCOL, MYROW,
519     $              MYCOL, IROW, ICOL, II, JJ )
520               IF( MYCOL .EQ. LEFT ) THEN
521                  IF( MYCOL .EQ. JJ ) THEN
522                     CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
523     $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
524                     IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
525                     DO 100 KKROW = IROW, IROW1, VSTEP
526                        KLN = MIN( VSTEP, IROW1-KKROW+1 )
527                        CALL DGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE,
528     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
529     $                       WORK, KLN )
530                        CALL DLAMOV( 'A', KLN, DBLK, WORK, KLN,
531     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ )
532  100                CONTINUE
533                  END IF
534               ELSE
535                  IF( MYCOL .EQ. JJ ) THEN
536                     CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
537     $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
538                     IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
539                     DO 110 KKROW = IROW, IROW1, VSTEP
540                        KLN = MIN( VSTEP, IROW1-KKROW+1 )
541                        CALL DGEMM( 'N', 'N', KLN, D2, D1, ONE,
542     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,
543     $                       V( 1, D1+1 ), LDV, ZERO, WORK( 1+D1*KLN ),
544     $                       KLN )
545                        CALL DGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
546     $                       KLN, MYROW, RIGHT )
547                        CALL DGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
548     $                       RIGHT )
549                        CALL DGEMM( 'N', 'N', KLN, D1, D1, ONE,
550     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ONE,
551     $                       WORK, KLN )
552                        CALL DLAMOV( 'A', KLN, D1, WORK, KLN,
553     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ )
554  110                CONTINUE
555                  ELSE IF( LEFT .EQ. JJ ) THEN
556                     CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,
557     $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
558                     IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
559                     DO 120 KKROW = IROW, IROW1, VSTEP
560                        KLN = MIN( VSTEP, IROW1-KKROW+1 )
561                        CALL DGEMM( 'N', 'N', KLN, D1, D2, ONE,
562     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,
563     $                       V( D1+1, 1 ), LDV, ZERO, WORK, KLN )
564                        CALL DGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
565     $                       LEFT )
566                        CALL DGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
567     $                       KLN, MYROW, LEFT )
568                        CALL DGEMM( 'N', 'N', KLN, D2, D2, ONE,
569     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,
570     $                       V( D1+1, D1+1 ), LDV, ONE,
571     $                       WORK( 1+D1*KLN ), KLN )
572                        CALL DLAMOV( 'A', KLN, D2, WORK( 1+D1*KLN ),
573     $                       KLN, Z( KKROW+(ICOL-1)*LDZ ), LDZ )
574  120                CONTINUE
575                  END IF
576               END IF
577            END IF
578*
579         ELSE
580*
581*           Most complicated case: the deflation window lay across the
582*           border of the processor mesh.
583*           Treat V as a distributed matrix and call PDGEMM.
584*
585            HSTEP = LWORK / DBLK * NPCOL
586            VSTEP = LWORK / DBLK * NPROW
587            LLDTMP = NUMROC( DBLK, DBLK, MYROW, 0, NPROW )
588            LLDTMP = MAX( 1, LLDTMP )
589            CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, 0, 0, CONTXT,
590     $           LLDTMP, INFO )
591            CALL DESCINIT( DESCWH, DBLK, HSTEP, DBLK, LWORK / DBLK, 0,
592     $           0, CONTXT, LLDTMP, INFO )
593*
594*           Update horizontal slab in A.
595*
596            IF( WANTT ) THEN
597               DO 130 KKCOL = I+1, N, HSTEP
598                  KLN = MIN( HSTEP, N-KKCOL+1 )
599                  CALL PDGEMM( 'T', 'N', DBLK, KLN, DBLK, ONE, V, 1, 1,
600     $                 DESCV, A, I-DBLK+1, KKCOL, DESCA, ZERO, WORK, 1,
601     $                 1, DESCWH )
602                  CALL PDGEMR2D( DBLK, KLN, WORK, 1, 1, DESCWH, A,
603     $                 I-DBLK+1, KKCOL, DESCA, CONTXT )
604  130          CONTINUE
605            END IF
606*
607*           Update vertical slab in A.
608*
609            DO 140 KKROW = LTOP, I-DBLK, VSTEP
610               KLN = MIN( VSTEP, I-DBLK-KKROW+1 )
611               LLDTMP = NUMROC( KLN, LWORK / DBLK, MYROW, 0, NPROW )
612               LLDTMP = MAX( 1, LLDTMP )
613               CALL DESCINIT( DESCWV, KLN, DBLK, LWORK / DBLK, DBLK, 0,
614     $              0, CONTXT, LLDTMP, INFO )
615               CALL PDGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE, A, KKROW,
616     $              I-DBLK+1, DESCA, V, 1, 1, DESCV, ZERO, WORK, 1, 1,
617     $              DESCWV )
618               CALL PDGEMR2D( KLN, DBLK, WORK, 1, 1, DESCWV, A, KKROW,
619     $              I-DBLK+1, DESCA, CONTXT )
620  140       CONTINUE
621*
622*           Update vertical slab in Z.
623*
624            IF( WANTZ ) THEN
625               DO 150 KKROW = ILOZ, IHIZ, VSTEP
626                  KLN = MIN( VSTEP, IHIZ-KKROW+1 )
627                  LLDTMP = NUMROC( KLN, LWORK / DBLK, MYROW, 0, NPROW )
628                  LLDTMP = MAX( 1, LLDTMP )
629                  CALL DESCINIT( DESCWV, KLN, DBLK, LWORK / DBLK, DBLK,
630     $                 0, 0, CONTXT, LLDTMP, INFO )
631                  CALL PDGEMM( 'N', 'N', KLN, DBLK, DBLK, ONE, Z, KKROW,
632     $                 I-DBLK+1, DESCZ, V, 1, 1, DESCV, ZERO, WORK, 1,
633     $                 1, DESCWV )
634                  CALL PDGEMR2D( KLN, DBLK, WORK, 1, 1, DESCWV, Z,
635     $                 KKROW, I-DBLK+1, DESCZ, CONTXT )
636  150          CONTINUE
637            END IF
638         END IF
639*
640*        Extract converged eigenvalues.
641*
642         II = 0
643  160    CONTINUE
644            IF( II .EQ. ND-1 .OR. WI( DBLK-II ) .EQ. ZERO ) THEN
645               IF( NODE .EQ. 0 ) THEN
646                  SR( I-II ) = WR( DBLK-II )
647               ELSE
648                  SR( I-II ) = ZERO
649               END IF
650               SI( I-II ) = ZERO
651               II = II + 1
652            ELSE
653               IF( NODE .EQ. 0 ) THEN
654                  SR( I-II-1 ) = WR( DBLK-II-1 )
655                  SR( I-II ) = WR( DBLK-II )
656                  SI( I-II-1 ) = WI( DBLK-II-1 )
657                  SI( I-II ) = WI( DBLK-II )
658               ELSE
659                  SR( I-II-1 ) = ZERO
660                  SR( I-II ) = ZERO
661                  SI( I-II-1 ) = ZERO
662                  SI( I-II ) = ZERO
663               END IF
664               II = II + 2
665            END IF
666         IF( II .LT. ND ) GOTO 160
667      END IF
668*
669*     END OF PDLAQR2
670*
671      END
672