1      SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI,
2     $                    ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK,
3     $                    LWORK, INFO )
4*
5*     Contribution from the Department of Computing Science and HPC2N,
6*     Umea University, Sweden
7*
8*  -- ScaLAPACK routine (version 2.0.2) --
9*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10*     May 1 2012
11*
12      IMPLICIT NONE
13*
14*     .. Scalar Arguments ..
15      LOGICAL            WANTT, WANTZ
16      INTEGER            IHI, IHIZ, ILO, ILOZ, INFO, LDT, LDV, LWORK, N
17*     ..
18*     .. Array Arguments ..
19      INTEGER            DESCA( * ), DESCZ( * )
20      DOUBLE PRECISION   A( * ), T( LDT, * ), V( LDV, * ), WI( * ),
21     $                   WORK( * ), WR( * ), Z( * )
22*     ..
23*
24*  Purpose
25*  =======
26*
27*  PDLAQR4 is an auxiliary routine used to find the Schur decomposition
28*  and or eigenvalues of a matrix already in Hessenberg form from cols
29*  ILO to IHI.  This routine requires that the active block is small
30*  enough, i.e. IHI-ILO+1 .LE. LDT, so that it can be solved by LAPACK.
31*  Normally, it is called by PDLAQR1.  All the inputs are assumed to be
32*  valid without checking.
33*
34*  Notes
35*  =====
36*
37*  Each global data object is described by an associated description
38*  vector.  This vector stores the information required to establish
39*  the mapping between an object element and its corresponding process
40*  and memory location.
41*
42*  Let A be a generic term for any 2D block cyclicly distributed array.
43*  Such a global array has an associated description vector DESCA.
44*  In the following comments, the character _ should be read as
45*  "of the global array".
46*
47*  NOTATION        STORED IN      EXPLANATION
48*  --------------- -------------- --------------------------------------
49*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
50*                                 DTYPE_A = 1.
51*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
52*                                 the BLACS process grid A is distribu-
53*                                 ted over. The context itself is glo-
54*                                 bal, but the handle (the integer
55*                                 value) may vary.
56*  M_A    (global) DESCA( M_ )    The number of rows in the global
57*                                 array A.
58*  N_A    (global) DESCA( N_ )    The number of columns in the global
59*                                 array A.
60*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
61*                                 the rows of the array.
62*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
63*                                 the columns of the array.
64*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
65*                                 row of the array A is distributed.
66*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
67*                                 first column of the array A is
68*                                 distributed.
69*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
70*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
71*
72*  Let K be the number of rows or columns of a distributed matrix,
73*  and assume that its process grid has dimension p x q.
74*  LOCr( K ) denotes the number of elements of K that a process
75*  would receive if K were distributed over the p processes of its
76*  process column.
77*  Similarly, LOCc( K ) denotes the number of elements of K that a
78*  process would receive if K were distributed over the q processes of
79*  its process row.
80*  The values of LOCr() and LOCc() may be determined via a call to the
81*  ScaLAPACK tool function, NUMROC:
82*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
83*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
84*  An upper bound for these quantities may be computed by:
85*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
86*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
87*
88*  Arguments
89*  =========
90*
91*  WANTT   (global input) LOGICAL
92*          = .TRUE. : the full Schur form T is required;
93*          = .FALSE.: only eigenvalues are required.
94*
95*  WANTZ   (global input) LOGICAL
96*          = .TRUE. : the matrix of Schur vectors Z is required;
97*          = .FALSE.: Schur vectors are not required.
98*
99*  N       (global input) INTEGER
100*          The order of the Hessenberg matrix A (and Z if WANTZ).
101*          N >= 0.
102*
103*  ILO     (global input) INTEGER
104*  IHI     (global input) INTEGER
105*          It is assumed that A is already upper quasi-triangular in
106*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
107*          ILO = 1). PDLAQR4 works primarily with the Hessenberg
108*          submatrix in rows and columns ILO to IHI, but applies
109*          transformations to all of H if WANTT is .TRUE..
110*          1 <= ILO <= max(1,IHI); IHI <= N.
111*
112*  A       (global input/output) DOUBLE PRECISION array, dimension
113*          (DESCA(LLD_),*)
114*          On entry, the upper Hessenberg matrix A.
115*          On exit, if WANTT is .TRUE., A is upper quasi-triangular in
116*          rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
117*          blocks not yet in standard form. If WANTT is .FALSE., the
118*          contents of A are unspecified on exit.
119*
120*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
121*          The array descriptor for the distributed matrix A.
122*
123*  WR      (global replicated output) DOUBLE PRECISION array,
124*                                                         dimension (N)
125*  WI      (global replicated output) DOUBLE PRECISION array,
126*                                                         dimension (N)
127*          The real and imaginary parts, respectively, of the computed
128*          eigenvalues ILO to IHI are stored in the corresponding
129*          elements of WR and WI. If two eigenvalues are computed as a
130*          complex conjugate pair, they are stored in consecutive
131*          elements of WR and WI, say the i-th and (i+1)th, with
132*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
133*          eigenvalues are stored in the same order as on the diagonal
134*          of the Schur form returned in A.  A may be returned with
135*          larger diagonal blocks until the next release.
136*
137*  ILOZ    (global input) INTEGER
138*  IHIZ    (global input) INTEGER
139*          Specify the rows of Z to which transformations must be
140*          applied if WANTZ is .TRUE..
141*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
142*
143*  Z       (global input/output) DOUBLE PRECISION array.
144*          If WANTZ is .TRUE., on entry Z must contain the current
145*          matrix Z of transformations accumulated by PDHSEQR, and on
146*          exit Z has been updated; transformations are applied only to
147*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
148*          If WANTZ is .FALSE., Z is not referenced.
149*
150*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
151*          The array descriptor for the distributed matrix Z.
152*
153*  T       (local workspace) DOUBLE PRECISION array, dimension LDT*NW.
154*
155*  LDT     (local input) INTEGER
156*          The leading dimension of the array T.
157*          LDT >= IHI-ILO+1.
158*
159*  V       (local workspace) DOUBLE PRECISION array, dimension LDV*NW.
160*
161*  LDV     (local input) INTEGER
162*          The leading dimension of the array V.
163*          LDV >= IHI-ILO+1.
164*
165*  WORK    (local workspace) DOUBLE PRECISION array, dimension LWORK.
166*
167*  LWORK   (local input) INTEGER
168*          The dimension of the work array WORK.
169*          LWORK >= IHI-ILO+1.
170*          WORK(LWORK) is a local array and LWORK is assumed big enough.
171*          Typically LWORK >= 4*LDS*LDS if this routine is called by
172*          PDLAQR1. (LDS = 385, see PDLAQR1)
173*
174*  INFO    (global output) INTEGER
175*          < 0: parameter number -INFO incorrect or inconsistent;
176*          = 0: successful exit;
177*          > 0: PDLAQR4 failed to compute all the eigenvalues ILO to IHI
178*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
179*               elements i+1:ihi of WR and WI contain those eigenvalues
180*               which have been successfully computed.
181*
182*  ================================================================
183*  Implemented by
184*        Meiyue Shao, Department of Computing Science and HPC2N,
185*        Umea University, Sweden
186*
187*  ================================================================
188*  References:
189*        B. Kagstrom, D. Kressner, and M. Shao,
190*        On Aggressive Early Deflation in Parallel Variants of the QR
191*        Algorithm.
192*        Para 2010, to appear.
193*
194*  ================================================================
195*     .. Parameters ..
196      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
197     $                   LLD_, MB_, M_, NB_, N_, RSRC_
198      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
199     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
200     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
201      DOUBLE PRECISION   ZERO, ONE
202      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
203*     ..
204*     .. Local Scalars ..
205      INTEGER            CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
206     $                   ICOL2, II, IROW, IROW1, IROW2, ITMP1, ITMP2,
207     $                   IERR, J, JAFIRST, JJ, K, L, LDA, LDZ, LLDTMP,
208     $                   MYCOL, MYROW, NODE, NPCOL, NPROW, NH, NMIN, NZ,
209     $                   HSTEP, VSTEP, KKROW, KKCOL, KLN, LTOP, LEFT,
210     $                   RIGHT, UP, DOWN, D1, D2
211*     ..
212*     .. Local Arrays ..
213      INTEGER            DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
214     $                   DESCWV( 9 )
215*     ..
216*     .. External Functions ..
217      INTEGER            NUMROC, ILAENV
218      EXTERNAL           NUMROC, ILAENV
219*     ..
220*     .. External Subroutines ..
221      EXTERNAL           BLACS_GRIDINFO, INFOG2L, DLASET,
222     $                   DLAHQR, DLAQR4, DESCINIT, PDGEMM, PDGEMR2D,
223     $                   DGEMM, DLAMOV, DGESD2D, DGERV2D,
224     $                   DGEBS2D, DGEBR2D, IGEBS2D, IGEBR2D
225*     ..
226*     .. Intrinsic Functions ..
227      INTRINSIC          MAX, MIN, MOD
228*     ..
229*     .. Executable Statements ..
230*
231      INFO = 0
232*
233      NH = IHI - ILO + 1
234      NZ = IHIZ - ILOZ + 1
235      IF( N.EQ.0 .OR. NH.EQ.0 )
236     $   RETURN
237*
238*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)
239*
240      HBL = DESCA( MB_ )
241      CONTXT = DESCA( CTXT_ )
242      LDA = DESCA( LLD_ )
243      IAFIRST = DESCA( RSRC_ )
244      JAFIRST = DESCA( CSRC_ )
245      LDZ = DESCZ( LLD_ )
246      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
247      NODE = MYROW*NPCOL + MYCOL
248      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
249      RIGHT = MOD( MYCOL+1, NPCOL )
250      UP = MOD( MYROW+NPROW-1, NPROW )
251      DOWN = MOD( MYROW+1, NPROW )
252*
253*     I1 and I2 are the indices of the first row and last column of A
254*     to which transformations must be applied.
255*
256      I = IHI
257      L = ILO
258      IF( WANTT ) THEN
259         I1 = 1
260         I2 = N
261         LTOP = 1
262      ELSE
263         I1 = L
264         I2 = I
265         LTOP = L
266      END IF
267*
268*     Copy the diagonal block to local and call LAPACK.
269*
270      CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL,
271     $     IROW, ICOL, II, JJ )
272      IF ( MYROW .EQ. II ) THEN
273         CALL DESCINIT( DESCT, NH, NH, NH, NH, II, JJ, CONTXT,
274     $        LDT, IERR )
275         CALL DESCINIT( DESCV, NH, NH, NH, NH, II, JJ, CONTXT,
276     $        LDV, IERR )
277      ELSE
278         CALL DESCINIT( DESCT, NH, NH, NH, NH, II, JJ, CONTXT,
279     $        1, IERR )
280         CALL DESCINIT( DESCV, NH, NH, NH, NH, II, JJ, CONTXT,
281     $        1, IERR )
282      END IF
283      CALL PDGEMR2D( NH, NH, A, ILO, ILO, DESCA, T, 1, 1, DESCT,
284     $     CONTXT )
285      IF ( MYROW .EQ. II .AND. MYCOL .EQ. JJ ) THEN
286         CALL DLASET( 'All', NH, NH, ZERO, ONE, V, LDV )
287         NMIN = ILAENV( 12, 'DLAQR3', 'SV', NH, 1, NH, LWORK )
288         IF( NH .GT. NMIN ) THEN
289            CALL DLAQR4( .TRUE., .TRUE., NH, 1, NH, T, LDT, WR( ILO ),
290     $           WI( ILO ), 1, NH, V, LDV, WORK, LWORK, INFO )
291*           Clean up the scratch used by DLAQR4.
292            CALL DLASET( 'L', NH-2, NH-2, ZERO, ZERO, T( 3, 1 ), LDT )
293         ELSE
294            CALL DLAHQR( .TRUE., .TRUE., NH, 1, NH, T, LDT, WR( ILO ),
295     $           WI( ILO ), 1, NH, V, LDV, INFO )
296         END IF
297         CALL DGEBS2D( CONTXT, 'All', ' ', NH, NH, V, LDV )
298         CALL IGEBS2D( CONTXT, 'All', ' ', 1, 1, INFO, 1 )
299      ELSE
300         CALL DGEBR2D( CONTXT, 'All', ' ', NH, NH, V, LDV, II, JJ )
301         CALL IGEBR2D( CONTXT, 'All', ' ', 1, 1, INFO, 1, II, JJ )
302      END IF
303      IF( INFO .NE. 0 ) INFO = INFO+ILO-1
304*
305*     Copy the local matrix back to the diagonal block.
306*
307      CALL PDGEMR2D( NH, NH, T, 1, 1, DESCT, A, ILO, ILO, DESCA,
308     $     CONTXT )
309*
310*     Update T and Z.
311*
312      IF( MOD( ILO-1, HBL )+NH .LE. HBL ) THEN
313*
314*        Simplest case: the diagonal block is located on one processor.
315*        Call DGEMM directly to perform the update.
316*
317         HSTEP = LWORK / NH
318         VSTEP = HSTEP
319*
320         IF( WANTT ) THEN
321*
322*           Update horizontal slab in A.
323*
324            CALL INFOG2L( ILO, I+1, DESCA, NPROW, NPCOL, MYROW,
325     $           MYCOL, IROW, ICOL, II, JJ )
326            IF( MYROW .EQ. II ) THEN
327               ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
328               DO 10 KKCOL = ICOL, ICOL1, HSTEP
329                  KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
330                  CALL DGEMM( 'T', 'N', NH, KLN, NH, ONE, V,
331     $                 LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO, WORK,
332     $                 NH )
333                  CALL DLAMOV( 'A', NH, KLN, WORK, NH,
334     $                 A( IROW+(KKCOL-1)*LDA ), LDA )
335   10          CONTINUE
336            END IF
337*
338*           Update vertical slab in A.
339*
340            CALL INFOG2L( LTOP, ILO, DESCA, NPROW, NPCOL, MYROW,
341     $           MYCOL, IROW, ICOL, II, JJ )
342            IF( MYCOL .EQ. JJ ) THEN
343               CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
344     $              MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
345               IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
346               DO 20 KKROW = IROW, IROW1, VSTEP
347                  KLN = MIN( VSTEP, IROW1-KKROW+1 )
348                  CALL DGEMM( 'N', 'N', KLN, NH, NH, ONE,
349     $                 A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ZERO,
350     $                 WORK, KLN )
351                  CALL DLAMOV( 'A', KLN, NH, WORK, KLN,
352     $                 A( KKROW+(ICOL-1)*LDA ), LDA )
353   20          CONTINUE
354            END IF
355         END IF
356*
357*        Update vertical slab in Z.
358*
359         IF( WANTZ ) THEN
360            CALL INFOG2L( ILOZ, ILO, DESCZ, NPROW, NPCOL, MYROW,
361     $           MYCOL, IROW, ICOL, II, JJ )
362            IF( MYCOL .EQ. JJ ) THEN
363               CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
364     $              MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
365               IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
366               DO 30 KKROW = IROW, IROW1, VSTEP
367                  KLN = MIN( VSTEP, IROW1-KKROW+1 )
368                  CALL DGEMM( 'N', 'N', KLN, NH, NH, ONE,
369     $                 Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
370     $                 WORK, KLN )
371                  CALL DLAMOV( 'A', KLN, NH, WORK, KLN,
372     $                 Z( KKROW+(ICOL-1)*LDZ ), LDZ )
373   30          CONTINUE
374            END IF
375         END IF
376*
377      ELSE IF( MOD( ILO-1, HBL )+NH .LE. 2*HBL ) THEN
378*
379*        More complicated case: the diagonal block lay on a 2x2
380*        processor mesh.
381*        Call DGEMM locally and communicate by pair.
382*
383         D1 = HBL - MOD( ILO-1, HBL )
384         D2 = NH - D1
385         HSTEP = LWORK / NH
386         VSTEP = HSTEP
387*
388         IF( WANTT ) THEN
389*
390*           Update horizontal slab in A.
391*
392            CALL INFOG2L( ILO, I+1, DESCA, NPROW, NPCOL, MYROW,
393     $           MYCOL, IROW, ICOL, II, JJ )
394            IF( MYROW .EQ. UP ) THEN
395               IF( MYROW .EQ. II ) THEN
396                  ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
397                  DO 40 KKCOL = ICOL, ICOL1, HSTEP
398                     KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
399                     CALL DGEMM( 'T', 'N', NH, KLN, NH, ONE, V,
400     $                    NH, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO,
401     $                    WORK, NH )
402                     CALL DLAMOV( 'A', NH, KLN, WORK, NH,
403     $                    A( IROW+(KKCOL-1)*LDA ), LDA )
404   40             CONTINUE
405               END IF
406            ELSE
407               IF( MYROW .EQ. II ) THEN
408                  ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
409                  DO 50 KKCOL = ICOL, ICOL1, HSTEP
410                     KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
411                     CALL DGEMM( 'T', 'N', D2, KLN, D1, ONE,
412     $                    V( 1, D1+1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
413     $                    LDA, ZERO, WORK( D1+1 ), NH )
414                     CALL DGESD2D( CONTXT, D2, KLN, WORK( D1+1 ),
415     $                    NH, DOWN, MYCOL )
416                     CALL DGERV2D( CONTXT, D1, KLN, WORK, NH, DOWN,
417     $                    MYCOL )
418                     CALL DGEMM( 'T', 'N', D1, KLN, D1, ONE,
419     $                    V, LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
420     $                    WORK, NH )
421                     CALL DLAMOV( 'A', D1, KLN, WORK, NH,
422     $                    A( IROW+(KKCOL-1)*LDA ), LDA )
423   50             CONTINUE
424               ELSE IF( UP .EQ. II ) THEN
425                  ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
426                  DO 60 KKCOL = ICOL, ICOL1, HSTEP
427                     KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
428                     CALL DGEMM( 'T', 'N', D1, KLN, D2, ONE,
429     $                    V( D1+1, 1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
430     $                    LDA, ZERO, WORK, NH )
431                     CALL DGESD2D( CONTXT, D1, KLN, WORK, NH, UP,
432     $                    MYCOL )
433                     CALL DGERV2D( CONTXT, D2, KLN, WORK( D1+1 ),
434     $                    NH, UP, MYCOL )
435                     CALL DGEMM( 'T', 'N', D2, KLN, D2, ONE,
436     $                    V( D1+1, D1+1 ), LDV,
437     $                    A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
438     $                    WORK( D1+1 ), NH )
439                     CALL DLAMOV( 'A', D2, KLN, WORK( D1+1 ), NH,
440     $                    A( IROW+(KKCOL-1)*LDA ), LDA )
441   60             CONTINUE
442               END IF
443            END IF
444*
445*           Update vertical slab in A.
446*
447            CALL INFOG2L( LTOP, ILO, DESCA, NPROW, NPCOL, MYROW,
448     $           MYCOL, IROW, ICOL, II, JJ )
449            IF( MYCOL .EQ. LEFT ) THEN
450               IF( MYCOL .EQ. JJ ) THEN
451                  CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
452     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
453                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
454                  DO 70 KKROW = IROW, IROW1, VSTEP
455                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
456                     CALL DGEMM( 'N', 'N', KLN, NH, NH, ONE,
457     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV,
458     $                    ZERO, WORK, KLN )
459                     CALL DLAMOV( 'A', KLN, NH, WORK, KLN,
460     $                    A( KKROW+(ICOL-1)*LDA ), LDA )
461   70             CONTINUE
462               END IF
463            ELSE
464               IF( MYCOL .EQ. JJ ) THEN
465                  CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
466     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
467                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
468                  DO 80 KKROW = IROW, IROW1, VSTEP
469                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
470                     CALL DGEMM( 'N', 'N', KLN, D2, D1, ONE,
471     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( 1, D1+1 ),
472     $                    LDV, ZERO, WORK( 1+D1*KLN ), KLN )
473                     CALL DGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
474     $                    KLN, MYROW, RIGHT )
475                     CALL DGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
476     $                    RIGHT )
477                     CALL DGEMM( 'N', 'N', KLN, D1, D1, ONE,
478     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ONE,
479     $                    WORK, KLN )
480                     CALL DLAMOV( 'A', KLN, D1, WORK, KLN,
481     $                    A( KKROW+(ICOL-1)*LDA ), LDA )
482   80             CONTINUE
483               ELSE IF ( LEFT .EQ. JJ ) THEN
484                  CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
485     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
486                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
487                  DO 90 KKROW = IROW, IROW1, VSTEP
488                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
489                     CALL DGEMM( 'N', 'N', KLN, D1, D2, ONE,
490     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, 1 ),
491     $                    LDV, ZERO, WORK, KLN )
492                     CALL DGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
493     $                    LEFT )
494                     CALL DGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
495     $                    KLN, MYROW, LEFT )
496                     CALL DGEMM( 'N', 'N', KLN, D2, D2, ONE,
497     $                    A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, D1+1 ),
498     $                    LDV, ONE, WORK( 1+D1*KLN ), KLN )
499                     CALL DLAMOV( 'A', KLN, D2, WORK( 1+D1*KLN ), KLN,
500     $                    A( KKROW+(ICOL-1)*LDA ), LDA )
501   90             CONTINUE
502               END IF
503            END IF
504         END IF
505*
506*        Update vertical slab in Z.
507*
508         IF( WANTZ ) THEN
509            CALL INFOG2L( ILOZ, ILO, DESCZ, NPROW, NPCOL, MYROW,
510     $           MYCOL, IROW, ICOL, II, JJ )
511            IF( MYCOL .EQ. LEFT ) THEN
512               IF( MYCOL .EQ. JJ ) THEN
513                  CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
514     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
515                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
516                  DO 100 KKROW = IROW, IROW1, VSTEP
517                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
518                     CALL DGEMM( 'N', 'N', KLN, NH, NH, ONE,
519     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
520     $                    WORK, KLN )
521                     CALL DLAMOV( 'A', KLN, NH, WORK, KLN,
522     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ )
523  100             CONTINUE
524               END IF
525            ELSE
526               IF( MYCOL .EQ. JJ ) THEN
527                  CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
528     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
529                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
530                  DO 110 KKROW = IROW, IROW1, VSTEP
531                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
532                     CALL DGEMM( 'N', 'N', KLN, D2, D1, ONE,
533     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ, V( 1, D1+1 ),
534     $                    LDV, ZERO, WORK( 1+D1*KLN ), KLN )
535                     CALL DGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
536     $                    KLN, MYROW, RIGHT )
537                     CALL DGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
538     $                    RIGHT )
539                     CALL DGEMM( 'N', 'N', KLN, D1, D1, ONE,
540     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ONE,
541     $                    WORK, KLN )
542                     CALL DLAMOV( 'A', KLN, D1, WORK, KLN,
543     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ )
544  110             CONTINUE
545               ELSE IF( LEFT .EQ. JJ ) THEN
546                  CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
547     $                 MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
548                  IF( MYROW .NE. ITMP1 ) IROW1 = IROW1-1
549                  DO 120 KKROW = IROW, IROW1, VSTEP
550                     KLN = MIN( VSTEP, IROW1-KKROW+1 )
551                     CALL DGEMM( 'N', 'N', KLN, D1, D2, ONE,
552     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ, V( D1+1, 1 ),
553     $                    LDV, ZERO, WORK, KLN )
554                     CALL DGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
555     $                    LEFT )
556                     CALL DGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
557     $                    KLN, MYROW, LEFT )
558                     CALL DGEMM( 'N', 'N', KLN, D2, D2, ONE,
559     $                    Z( KKROW+(ICOL-1)*LDZ ), LDZ,
560     $                    V( D1+1, D1+1 ), LDV, ONE, WORK( 1+D1*KLN ),
561     $                    KLN )
562                     CALL DLAMOV( 'A', KLN, D2, WORK( 1+D1*KLN ),
563     $                    KLN, Z( KKROW+(ICOL-1)*LDZ ), LDZ )
564  120             CONTINUE
565               END IF
566            END IF
567         END IF
568*
569      ELSE
570*
571*        Most complicated case: the diagonal block lay across the border
572*        of the processor mesh.
573*        Treat V as a distributed matrix and call PDGEMM.
574*
575         HSTEP = LWORK / NH * NPCOL
576         VSTEP = LWORK / NH * NPROW
577         LLDTMP = NUMROC( NH, NH, MYROW, 0, NPROW )
578         LLDTMP = MAX( 1, LLDTMP )
579         CALL DESCINIT( DESCV, NH, NH, NH, NH, 0, 0, CONTXT,
580     $        LLDTMP, IERR )
581         CALL DESCINIT( DESCWH, NH, HSTEP, NH, LWORK / NH, 0, 0,
582     $        CONTXT, LLDTMP, IERR )
583*
584         IF( WANTT ) THEN
585*
586*           Update horizontal slab in A.
587*
588            DO 130 KKCOL = I+1, N, HSTEP
589               KLN = MIN( HSTEP, N-KKCOL+1 )
590               CALL PDGEMM( 'T', 'N', NH, KLN, NH, ONE, V, 1, 1,
591     $              DESCV, A, ILO, KKCOL, DESCA, ZERO, WORK, 1, 1,
592     $              DESCWH )
593               CALL PDGEMR2D( NH, KLN, WORK, 1, 1, DESCWH, A,
594     $              ILO, KKCOL, DESCA, CONTXT )
595  130       CONTINUE
596*
597*           Update vertical slab in A.
598*
599            DO 140 KKROW = LTOP, ILO-1, VSTEP
600               KLN = MIN( VSTEP, ILO-KKROW )
601               LLDTMP = NUMROC( KLN, LWORK / NH, MYROW, 0, NPROW )
602               LLDTMP = MAX( 1, LLDTMP )
603               CALL DESCINIT( DESCWV, KLN, NH, LWORK / NH, NH, 0, 0,
604     $              CONTXT, LLDTMP, IERR )
605               CALL PDGEMM( 'N', 'N', KLN, NH, NH, ONE, A, KKROW,
606     $              ILO, DESCA, V, 1, 1, DESCV, ZERO, WORK, 1, 1,
607     $              DESCWV )
608               CALL PDGEMR2D( KLN, NH, WORK, 1, 1, DESCWV, A, KKROW,
609     $              ILO, DESCA, CONTXT )
610  140       CONTINUE
611         END IF
612*
613*        Update vertical slab in Z.
614*
615         IF( WANTZ ) THEN
616            DO 150 KKROW = ILOZ, IHIZ, VSTEP
617               KLN = MIN( VSTEP, IHIZ-KKROW+1 )
618               LLDTMP = NUMROC( KLN, LWORK / NH, MYROW, 0, NPROW )
619               LLDTMP = MAX( 1, LLDTMP )
620               CALL DESCINIT( DESCWV, KLN, NH, LWORK / NH, NH, 0, 0,
621     $              CONTXT, LLDTMP, IERR )
622               CALL PDGEMM( 'N', 'N', KLN, NH, NH, ONE, Z, KKROW,
623     $              ILO, DESCZ, V, 1, 1, DESCV, ZERO, WORK, 1, 1,
624     $              DESCWV )
625               CALL PDGEMR2D( KLN, NH, WORK, 1, 1, DESCWV, Z,
626     $              KKROW, ILO, DESCZ, CONTXT )
627  150       CONTINUE
628         END IF
629      END IF
630*
631*     END OF PDLAQR4
632*
633      END
634