1      SUBROUTINE PCLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2     $                   IC, JC, DESCC, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 25, 2001
8*
9*     .. Scalar Arguments ..
10      CHARACTER          SIDE
11      INTEGER            IC, INCV, IV, JC, JV, L, M, N
12*     ..
13*     .. Array Arguments ..
14      INTEGER            DESCC( * ), DESCV( * )
15      COMPLEX            C( * ), TAU( * ), V( * ), WORK( * )
16*     ..
17*
18*  Purpose
19*  =======
20*
21*  PCLARZ applies a complex elementary reflector Q to a complex M-by-N
22*  distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the
23*  left or the right. Q is represented in the form
24*
25*        Q = I - tau * v * v'
26*
27*  where tau is a complex scalar and v is a complex vector.
28*
29*  If tau = 0, then Q is taken to be the unit matrix.
30*
31*  Q is a product of k elementary reflectors as returned by PCTZRZF.
32*
33*  Notes
34*  =====
35*
36*  Each global data object is described by an associated description
37*  vector.  This vector stores the information required to establish
38*  the mapping between an object element and its corresponding process
39*  and memory location.
40*
41*  Let A be a generic term for any 2D block cyclicly distributed array.
42*  Such a global array has an associated description vector DESCA.
43*  In the following comments, the character _ should be read as
44*  "of the global array".
45*
46*  NOTATION        STORED IN      EXPLANATION
47*  --------------- -------------- --------------------------------------
48*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
49*                                 DTYPE_A = 1.
50*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51*                                 the BLACS process grid A is distribu-
52*                                 ted over. The context itself is glo-
53*                                 bal, but the handle (the integer
54*                                 value) may vary.
55*  M_A    (global) DESCA( M_ )    The number of rows in the global
56*                                 array A.
57*  N_A    (global) DESCA( N_ )    The number of columns in the global
58*                                 array A.
59*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
60*                                 the rows of the array.
61*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
62*                                 the columns of the array.
63*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64*                                 row of the array A is distributed.
65*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66*                                 first column of the array A is
67*                                 distributed.
68*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
69*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
70*
71*  Let K be the number of rows or columns of a distributed matrix,
72*  and assume that its process grid has dimension p x q.
73*  LOCr( K ) denotes the number of elements of K that a process
74*  would receive if K were distributed over the p processes of its
75*  process column.
76*  Similarly, LOCc( K ) denotes the number of elements of K that a
77*  process would receive if K were distributed over the q processes of
78*  its process row.
79*  The values of LOCr() and LOCc() may be determined via a call to the
80*  ScaLAPACK tool function, NUMROC:
81*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83*  An upper bound for these quantities may be computed by:
84*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86*
87*  Because vectors may be viewed as a subclass of matrices, a
88*  distributed vector is considered to be a distributed matrix.
89*
90*  Restrictions
91*  ============
92*
93*  If SIDE = 'Left' and INCV = 1, then the row process having the first
94*  entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95*  MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96*  the last equality must be satisfied.
97*
98*  If SIDE = 'Right' and INCV = M_V then the column process having the
99*  first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100*  MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101*  the last equality must be satisfied.
102*
103*  Arguments
104*  =========
105*
106*  SIDE    (global input) CHARACTER
107*          = 'L': form  Q * sub( C ),
108*          = 'R': form  sub( C ) * Q.
109*
110*  M       (global input) INTEGER
111*          The number of rows to be operated on i.e the number of rows
112*          of the distributed submatrix sub( C ). M >= 0.
113*
114*  N       (global input) INTEGER
115*          The number of columns to be operated on i.e the number of
116*          columns of the distributed submatrix sub( C ). N >= 0.
117*
118*  L       (global input) INTEGER
119*          The columns of the distributed submatrix sub( A ) containing
120*          the meaningful part of the Householder reflectors.
121*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122*
123*  V       (local input) COMPLEX pointer into the local memory
124*          to an array of dimension (LLD_V,*) containing the local
125*          pieces of the distributed vectors V representing the
126*          Householder transformation Q,
127*             V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128*             V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129*             V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130*             V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131*
132*          The vector v in the representation of Q. V is not used if
133*          TAU = 0.
134*
135*  IV      (global input) INTEGER
136*          The row index in the global array V indicating the first
137*          row of sub( V ).
138*
139*  JV      (global input) INTEGER
140*          The column index in the global array V indicating the
141*          first column of sub( V ).
142*
143*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
144*          The array descriptor for the distributed matrix V.
145*
146*  INCV    (global input) INTEGER
147*          The global increment for the elements of V. Only two values
148*          of INCV are supported in this version, namely 1 and M_V.
149*          INCV must not be zero.
150*
151*  TAU     (local input) COMPLEX, array, dimension  LOCc(JV) if
152*          INCV = 1, and LOCr(IV) otherwise. This array contains the
153*          Householder scalars related to the Householder vectors.
154*          TAU is tied to the distributed matrix V.
155*
156*  C       (local input/local output) COMPLEX pointer into the
157*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158*          containing the local pieces of sub( C ). On exit, sub( C )
159*          is overwritten by the Q * sub( C ) if SIDE = 'L', or
160*          sub( C ) * Q if SIDE = 'R'.
161*
162*  IC      (global input) INTEGER
163*          The row index in the global array C indicating the first
164*          row of sub( C ).
165*
166*  JC      (global input) INTEGER
167*          The column index in the global array C indicating the
168*          first column of sub( C ).
169*
170*  DESCC   (global and local input) INTEGER array of dimension DLEN_.
171*          The array descriptor for the distributed matrix C.
172*
173*  WORK    (local workspace) COMPLEX array, dimension (LWORK)
174*          If INCV = 1,
175*            if SIDE = 'L',
176*              if IVCOL = ICCOL,
177*                LWORK >= NqC0
178*              else
179*                LWORK >= MpC0 + MAX( 1, NqC0 )
180*              end if
181*            else if SIDE = 'R',
182*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184*            end if
185*          else if INCV = M_V,
186*            if SIDE = 'L',
187*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189*            else if SIDE = 'R',
190*              if IVROW = ICROW,
191*                LWORK >= MpC0
192*              else
193*                LWORK >= NqC0 + MAX( 1, MpC0 )
194*              end if
195*            end if
196*          end if
197*
198*          where LCM is the least common multiple of NPROW and NPCOL and
199*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200*          LCMQ = LCM / NPCOL,
201*
202*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207*
208*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210*          the subroutine BLACS_GRIDINFO.
211*
212*  Alignment requirements
213*  ======================
214*
215*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216*  must verify some alignment properties, namely the following
217*  expressions should be true:
218*
219*  MB_V = NB_V,
220*
221*  If INCV = 1,
222*    If SIDE = 'Left',
223*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224*    If SIDE = 'Right',
225*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226*  else if INCV = M_V,
227*    If SIDE = 'Left',
228*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229*    If SIDE = 'Right',
230*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231*  end if
232*
233*  =====================================================================
234*
235*     .. Parameters ..
236      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237     $                   LLD_, MB_, M_, NB_, N_, RSRC_
238      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
239     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
240     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
241      COMPLEX            ONE, ZERO
242      PARAMETER          ( ONE  = ( 1.0E+0, 0.0E+0 ),
243     $                     ZERO = ( 0.0E+0, 0.0E+0 ) )
244*     ..
245*     .. Local Scalars ..
246      LOGICAL            CCBLCK, CRBLCK, LEFT
247      CHARACTER          COLBTOP, ROWBTOP
248      INTEGER            ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
249     $                   ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1,
250     $                   IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV,
251     $                   IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2,
252     $                   MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW,
253     $                   NQC2, NQV, RDEST
254      COMPLEX            TAULOC
255*     ..
256*     .. External Subroutines ..
257      EXTERNAL           BLACS_GRIDINFO, CAXPY, CCOPY, CGEBR2D,
258     $                   CGEBS2D, CGEMV, CGERC, CGERV2D,
259     $                   CGESD2D, CGSUM2D, CLASET, INFOG2L,
260     $                   PB_TOPGET, PBCTRNV
261*     ..
262*     .. External Functions ..
263      LOGICAL            LSAME
264      INTEGER            NUMROC
265      EXTERNAL           LSAME, NUMROC
266*     ..
267*     .. Intrinsic Functions ..
268      INTRINSIC          MIN, MOD
269*     ..
270*     .. Executable Statements ..
271*
272*     Quick return if possible
273*
274      IF( M.LE.0 .OR. N.LE.0 )
275     $   RETURN
276*
277*     Get grid parameters.
278*
279      ICTXT = DESCC( CTXT_ )
280      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
281*
282*     Figure local indexes
283*
284      LEFT = LSAME( SIDE, 'L' )
285      CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
286     $              IVROW, IVCOL )
287      IROFFV = MOD( IV-1, DESCV( NB_ ) )
288      MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW )
289      IF( MYROW.EQ.IVROW )
290     $   MPV = MPV - IROFFV
291      ICOFFV = MOD( JV-1, DESCV( NB_ ) )
292      NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL )
293      IF( MYCOL.EQ.IVCOL )
294     $   NQV = NQV - ICOFFV
295      LDV = DESCV( LLD_ )
296      NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ),
297     $              NPCOL )
298      LDV = DESCV( LLD_ )
299      IIV = MIN( IIV, LDV )
300      JJV = MIN( JJV, NCV )
301      IOFFV = IIV+(JJV-1)*LDV
302      NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ),
303     $              NPCOL )
304      CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
305     $              IIC1, JJC1, ICROW1, ICCOL1 )
306      IROFFC1 = MOD( IC-1, DESCC( MB_ ) )
307      ICOFFC1 = MOD( JC-1, DESCC( NB_ ) )
308      LDC = DESCC( LLD_ )
309      IIC1 = MIN( IIC1, LDC )
310      JJC1 = MIN( JJC1, MAX( 1, NCC ) )
311      IOFFC1 = IIC1 + ( JJC1-1 ) * LDC
312*
313      IF( LEFT ) THEN
314         CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
315     $                 IIC2, JJC2, ICROW2, ICCOL2 )
316         IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) )
317         ICOFFC2 = MOD( JC-1, DESCC( NB_ ) )
318         NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL )
319         IF( MYCOL.EQ.ICCOL2 )
320     $      NQC2 = NQC2 - ICOFFC2
321      ELSE
322         CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL,
323     $                 IIC2, JJC2, ICROW2, ICCOL2 )
324         IROFFC2 = MOD( IC-1, DESCC( MB_ ) )
325         MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW )
326         IF( MYROW.EQ.ICROW2 )
327     $      MPC2 = MPC2 - IROFFC2
328         ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) )
329      END IF
330      IIC2 = MIN( IIC2, LDC )
331      JJC2 = MIN( JJC2, NCC )
332      IOFFC2 = IIC2 + ( JJC2-1 ) * LDC
333*
334*     Is sub( C ) only distributed over a process row ?
335*
336      CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) )
337*
338*     Is sub( C ) only distributed over a process column ?
339*
340      CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) )
341*
342      IF( LEFT ) THEN
343*
344         IF( CRBLCK ) THEN
345            RDEST = ICROW2
346         ELSE
347            RDEST = -1
348         END IF
349*
350         IF( CCBLCK ) THEN
351*
352*           sub( C ) is distributed over a process column
353*
354            IF( DESCV( M_ ).EQ.INCV ) THEN
355*
356*              Transpose row vector V (ICOFFV = IROFFC2)
357*
358               IPW = MPV+1
359               CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M,
360     $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
361     $                       ZERO,
362     $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
363     $                       WORK( IPW ) )
364*
365*              Perform the local computation within a process column
366*
367               IF( MYCOL.EQ.ICCOL2 ) THEN
368*
369                  IF( MYROW.EQ.IVROW ) THEN
370*
371                     CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
372     $                             TAU( IIV ), 1 )
373                     TAULOC = TAU( IIV )
374*
375                  ELSE
376*
377                     CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1,
378     $                             TAULOC, 1, IVROW, MYCOL )
379*
380                  END IF
381*
382                  IF( TAULOC.NE.ZERO ) THEN
383*
384*                    w := sub( C )' * v
385*
386                     IF( MPV.GT.0 ) THEN
387                        CALL CGEMV( 'Conjugate transpose', MPV, NQC2,
388     $                              ONE, C( IOFFC2 ), LDC, WORK, 1,
389     $                              ZERO, WORK( IPW ), 1 )
390                     ELSE
391                        CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
392     $                               WORK( IPW ), MAX( 1, NQC2 ) )
393                     END IF
394                     IF( MYROW.EQ.ICROW1 )
395     $                  CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
396     $                              WORK( IPW ), MAX( 1, NQC2 ) )
397*
398                     CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
399     $                             WORK( IPW ), MAX( 1, NQC2 ), RDEST,
400     $                             MYCOL )
401*
402*                    sub( C ) := sub( C ) - v * w'
403*
404                     IF( MYROW.EQ.ICROW1 )
405     $                  CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
406     $                              MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
407                     CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1,
408     $                           WORK( IPW ), 1, C( IOFFC2 ), LDC )
409                  END IF
410*
411               END IF
412*
413            ELSE
414*
415*              V is a column vector
416*
417               IF( IVCOL.EQ.ICCOL2 ) THEN
418*
419*                 Perform the local computation within a process column
420*
421                  IF( MYCOL.EQ.ICCOL2 ) THEN
422*
423                     TAULOC = TAU( JJV )
424*
425                     IF( TAULOC.NE.ZERO ) THEN
426*
427*                       w := sub( C )' * v
428*
429                        IF( MPV.GT.0 ) THEN
430                           CALL CGEMV( 'Conjugate transpose', MPV, NQC2,
431     $                              ONE, C( IOFFC2 ), LDC, V( IOFFV ),
432     $                              1, ZERO, WORK, 1 )
433                        ELSE
434                           CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
435     $                                  WORK, MAX( 1, NQC2 ) )
436                        END IF
437                        IF( MYROW.EQ.ICROW1 )
438     $                     CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
439     $                                 WORK, MAX( 1, NQC2 ) )
440*
441                        CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
442     $                                WORK, MAX( 1, NQC2 ), RDEST,
443     $                                MYCOL )
444*
445*                       sub( C ) := sub( C ) - v * w'
446*
447                        IF( MYROW.EQ.ICROW1 )
448     $                     CALL CAXPY( NQC2, -TAULOC, WORK,
449     $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
450     $                                 LDC )
451                        CALL CGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1,
452     $                              WORK, 1, C( IOFFC2 ), LDC )
453                     END IF
454*
455                  END IF
456*
457               ELSE
458*
459*                 Send V and TAU to the process column ICCOL2
460*
461                  IF( MYCOL.EQ.IVCOL ) THEN
462*
463                     IPW = MPV+1
464                     CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
465                     WORK( IPW ) = TAU( JJV )
466                     CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
467     $                             ICCOL2 )
468*
469                  ELSE IF( MYCOL.EQ.ICCOL2 ) THEN
470*
471                     IPW = MPV+1
472                     CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
473     $                             IVCOL )
474                     TAULOC = WORK( IPW )
475*
476                     IF( TAULOC.NE.ZERO ) THEN
477*
478*                       w := sub( C )' * v
479*
480                        IF( MPV.GT.0 ) THEN
481                           CALL CGEMV( 'Conjugate transpose', MPV, NQC2,
482     $                                 ONE, C( IOFFC2 ), LDC, WORK, 1,
483     $                                 ZERO, WORK( IPW ), 1 )
484                        ELSE
485                           CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
486     $                                  WORK( IPW ), MAX( 1, NQC2 ) )
487                        END IF
488                        IF( MYROW.EQ.ICROW1 )
489     $                     CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
490     $                                 WORK( IPW ), MAX( 1, NQC2 ) )
491*
492                        CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
493     $                                WORK( IPW ), MAX( 1, NQC2 ),
494     $                                RDEST, MYCOL )
495*
496*                       sub( C ) := sub( C ) - v * w'
497*
498                        IF( MYROW.EQ.ICROW1 )
499     $                     CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
500     $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
501     $                                 LDC )
502                        CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1,
503     $                              WORK( IPW ), 1, C( IOFFC2 ), LDC )
504                     END IF
505*
506                  END IF
507*
508               END IF
509*
510            END IF
511*
512         ELSE
513*
514*           sub( C ) is a proper distributed matrix
515*
516            IF( DESCV( M_ ).EQ.INCV ) THEN
517*
518*              Transpose and broadcast row vector V (ICOFFV=IROFFC2)
519*
520               IPW = MPV+1
521               CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M,
522     $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
523     $                       ZERO,
524     $                       WORK, 1, IVROW, IVCOL, ICROW2, -1,
525     $                       WORK( IPW ) )
526*
527*              Perform the local computation within a process column
528*
529               IF( MYROW.EQ.IVROW ) THEN
530*
531                  CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
532     $                          TAU( IIV ), 1 )
533                  TAULOC = TAU( IIV )
534*
535               ELSE
536*
537                  CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC,
538     $                          1, IVROW, MYCOL )
539*
540               END IF
541*
542               IF( TAULOC.NE.ZERO ) THEN
543*
544*                 w := sub( C )' * v
545*
546                  IF( MPV.GT.0 ) THEN
547                     CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE,
548     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
549     $                           WORK( IPW ), 1 )
550                  ELSE
551                     CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
552     $                            WORK( IPW ), MAX( 1, NQC2 ) )
553                  END IF
554                  IF( MYROW.EQ.ICROW1 )
555     $               CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
556     $                           WORK( IPW ), MAX( 1, NQC2 ) )
557*
558                  CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
559     $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
560     $                          MYCOL )
561*
562*                 sub( C ) := sub( C ) - v * w'
563*
564                  IF( MYROW.EQ.ICROW1 )
565     $               CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
566     $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
567                  CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
568     $                        1, C( IOFFC2 ), LDC )
569               END IF
570*
571            ELSE
572*
573*              Broadcast column vector V
574*
575               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
576               IF( MYCOL.EQ.IVCOL ) THEN
577*
578                  IPW = MPV+1
579                  CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
580                  WORK( IPW ) = TAU( JJV )
581                  CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1,
582     $                          WORK, IPW )
583                  TAULOC = TAU( JJV )
584*
585               ELSE
586*
587                  IPW = MPV+1
588                  CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK,
589     $                          IPW, MYROW, IVCOL )
590                  TAULOC = WORK( IPW )
591*
592               END IF
593*
594               IF( TAULOC.NE.ZERO ) THEN
595*
596*                 w := sub( C )' * v
597*
598                  IF( MPV.GT.0 ) THEN
599                     CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE,
600     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
601     $                           WORK( IPW ), 1 )
602                  ELSE
603                     CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
604     $                            WORK( IPW ), MAX( 1, NQC2 ) )
605                  END IF
606                  IF( MYROW.EQ.ICROW1 )
607     $               CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
608     $                           WORK( IPW ), MAX( 1, NQC2 ) )
609*
610                  CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
611     $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
612     $                          MYCOL )
613*
614*                 sub( C ) := sub( C ) - v * w'
615*
616                  IF( MYROW.EQ.ICROW1 )
617     $               CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
618     $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
619                  CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
620     $                        1, C( IOFFC2 ), LDC )
621               END IF
622*
623            END IF
624*
625         END IF
626*
627      ELSE
628*
629         IF( CCBLCK ) THEN
630            RDEST = MYROW
631         ELSE
632            RDEST = -1
633         END IF
634*
635         IF( CRBLCK ) THEN
636*
637*           sub( C ) is distributed over a process row
638*
639            IF( DESCV( M_ ).EQ.INCV ) THEN
640*
641*              V is a row vector
642*
643               IF( IVROW.EQ.ICROW2 ) THEN
644*
645*                 Perform the local computation within a process row
646*
647                  IF( MYROW.EQ.ICROW2 ) THEN
648*
649                     TAULOC = TAU( IIV )
650*
651                     IF( TAULOC.NE.ZERO ) THEN
652*
653*                       w := sub( C ) * v
654*
655                        IF( NQV.GT.0 ) THEN
656                           CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
657     $                                 C( IOFFC2 ), LDC, V( IOFFV ),
658     $                                 LDV, ZERO, WORK, 1 )
659                        ELSE
660                           CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
661     $                                  WORK, MAX( 1, MPC2 ) )
662                        END IF
663                        IF( MYCOL.EQ.ICCOL1 )
664     $                     CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
665     $                                   WORK, 1 )
666*
667                        CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
668     $                                WORK, MAX( 1, MPC2 ), RDEST,
669     $                               ICCOL2 )
670*
671                        IF( MYCOL.EQ.ICCOL1 )
672     $                     CALL CAXPY( MPC2, -TAULOC, WORK, 1,
673     $                                 C( IOFFC1 ), 1 )
674*
675*                       sub( C ) := sub( C ) - w * v'
676*
677                        IF( MPC2.GT.0 .AND. NQV.GT.0 )
678     $                     CALL CGERC( MPC2, NQV, -TAULOC, WORK, 1,
679     $                                 V( IOFFV ), LDV, C( IOFFC2 ),
680     $                                 LDC )
681                     END IF
682*
683                  END IF
684*
685               ELSE
686*
687*                 Send V and TAU to the process row ICROW2
688*
689                  IF( MYROW.EQ.IVROW ) THEN
690*
691                     IPW = NQV+1
692                     CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
693                     WORK( IPW ) = TAU( IIV )
694                     CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2,
695     $                             MYCOL )
696*
697                  ELSE IF( MYROW.EQ.ICROW2 ) THEN
698*
699                     IPW = NQV+1
700                     CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW,
701     $                             MYCOL )
702                     TAULOC = WORK( IPW )
703*
704                     IF( TAULOC.NE.ZERO ) THEN
705*
706*                       w := sub( C ) * v
707*
708                        IF( NQV.GT.0 ) THEN
709                           CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
710     $                                 C( IOFFC2 ), LDC, WORK, 1, ZERO,
711     $                                 WORK( IPW ), 1 )
712                        ELSE
713                           CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
714     $                                  WORK( IPW ), MAX( 1, MPC2 ) )
715                        END IF
716                        IF( MYCOL.EQ.ICCOL1 )
717     $                     CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
718     $                                   WORK( IPW ), 1 )
719                        CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
720     $                                WORK( IPW ), MAX( 1, MPC2 ),
721     $                                RDEST, ICCOL2 )
722                        IF( MYCOL.EQ.ICCOL1 )
723     $                     CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
724     $                                 C( IOFFC1 ), 1 )
725*
726*                       sub( C ) := sub( C ) - w * v'
727*
728                        CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
729     $                              WORK, 1, C( IOFFC2 ), LDC )
730                     END IF
731*
732                  END IF
733*
734               END IF
735*
736            ELSE
737*
738*              Transpose column vector V (IROFFV = ICOFFC2)
739*
740               IPW = NQV+1
741               CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N,
742     $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
743     $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
744     $                       WORK( IPW ) )
745*
746*              Perform the local computation within a process column
747*
748               IF( MYROW.EQ.ICROW2 ) THEN
749*
750                  IF( MYCOL.EQ.IVCOL ) THEN
751*
752                     CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1,
753     $                             TAU( JJV ), 1 )
754                     TAULOC = TAU( JJV )
755*
756                  ELSE
757*
758                     CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC,
759     $                             1, MYROW, IVCOL )
760*
761                  END IF
762*
763                  IF( TAULOC.NE.ZERO ) THEN
764*
765*                    w := sub( C ) * v
766*
767                     IF( NQV.GT.0 ) THEN
768                        CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
769     $                              C( IOFFC2 ), LDC, WORK, 1, ZERO,
770     $                              WORK( IPW ), 1 )
771                     ELSE
772                        CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
773     $                               WORK( IPW ), MAX( 1, MPC2 ) )
774                     END IF
775                     IF( MYCOL.EQ.ICCOL1 )
776     $                  CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
777     $                              WORK( IPW ), 1 )
778                     CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
779     $                             WORK( IPW ), MAX( 1, MPC2 ), RDEST,
780     $                             ICCOL2 )
781                     IF( MYCOL.EQ.ICCOL1 )
782     $                  CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
783     $                              C( IOFFC1 ), 1 )
784*
785*                    sub( C ) := sub( C ) - w * v'
786*
787                     CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
788     $                           WORK, 1, C( IOFFC2 ), LDC )
789                  END IF
790*
791               END IF
792*
793            END IF
794*
795         ELSE
796*
797*           sub( C ) is a proper distributed matrix
798*
799            IF( DESCV( M_ ).EQ.INCV ) THEN
800*
801*              Broadcast row vector V
802*
803               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise',
804     $                       COLBTOP )
805               IF( MYROW.EQ.IVROW ) THEN
806*
807                  IPW = NQV+1
808                  CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
809                  WORK( IPW ) = TAU( IIV )
810                  CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
811     $                          WORK, IPW )
812                  TAULOC = TAU( IIV )
813*
814               ELSE
815*
816                  IPW = NQV+1
817                  CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
818     $                          WORK, IPW, IVROW, MYCOL )
819                  TAULOC = WORK( IPW )
820*
821               END IF
822*
823               IF( TAULOC.NE.ZERO ) THEN
824*
825*                 w := sub( C ) * v
826*
827                  IF( NQV.GT.0 ) THEN
828                     CALL CGEMV( 'No Transpose', MPC2, NQV, ONE,
829     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
830     $                           WORK( IPW ), 1 )
831                  ELSE
832                     CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
833     $                            WORK( IPW ), MAX( 1, MPC2 ) )
834                  END IF
835                  IF( MYCOL.EQ.ICCOL1 )
836     $               CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
837     $                           WORK( IPW ), 1 )
838*
839                  CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
840     $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
841     $                          ICCOL2 )
842                  IF( MYCOL.EQ.ICCOL1 )
843     $               CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
844     $                           C( IOFFC1 ), 1 )
845*
846*                 sub( C ) := sub( C ) - w * v'
847*
848                  CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
849     $                        1, C( IOFFC2 ), LDC )
850               END IF
851*
852            ELSE
853*
854*              Transpose and broadcast column vector V (ICOFFC2=IROFFV)
855*
856               IPW = NQV+1
857               CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N,
858     $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
859     $                       WORK, 1, IVROW, IVCOL, -1, ICCOL2,
860     $                       WORK( IPW ) )
861*
862*              Perform the local computation within a process column
863*
864               IF( MYCOL.EQ.IVCOL ) THEN
865*
866                  CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ),
867     $                          1 )
868                  TAULOC = TAU( JJV )
869*
870               ELSE
871*
872                  CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1,
873     $                          MYROW, IVCOL )
874*
875               END IF
876*
877               IF( TAULOC.NE.ZERO ) THEN
878*
879*                 w := sub( C ) * v
880*
881                  IF( NQV.GT.0 ) THEN
882                     CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
883     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
884     $                           WORK( IPW ), 1 )
885                  ELSE
886                     CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
887     $                            WORK( IPW ), MAX( 1, MPC2 ) )
888                  END IF
889                  IF( MYCOL.EQ.ICCOL1 )
890     $               CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
891     $                           WORK( IPW ), 1 )
892                  CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
893     $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
894     $                          ICCOL2 )
895                  IF( MYCOL.EQ.ICCOL1 )
896     $               CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
897     $                           C( IOFFC1 ), 1 )
898*
899*                 sub( C ) := sub( C ) - w * v'
900*
901                  CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
902     $                        1, C( IOFFC2 ), LDC )
903               END IF
904*
905            END IF
906*
907         END IF
908*
909      END IF
910*
911      RETURN
912*
913*     End of PCLARZ
914*
915      END
916