1      SUBROUTINE PZLARZC( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2     $                    IC, JC, DESCC, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 25, 2001
8*
9*     .. Scalar Arguments ..
10      CHARACTER          SIDE
11      INTEGER            IC, INCV, IV, JC, JV, L, M, N
12*     ..
13*     .. Array Arguments ..
14      INTEGER            DESCC( * ), DESCV( * )
15      COMPLEX*16         C( * ), TAU( * ), V( * ), WORK( * )
16*     ..
17*
18*  Purpose
19*  =======
20*
21*  PZLARZC applies a complex elementary reflector Q**H to a
22*  complex M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1),
23*  from either the left or the right. Q is represented in the form
24*
25*        Q = I - tau * v * v'
26*
27*  where tau is a complex scalar and v is a complex vector.
28*
29*  If tau = 0, then Q is taken to be the unit matrix.
30*
31*  Q is a product of k elementary reflectors as returned by PZTZRZF.
32*
33*  Notes
34*  =====
35*
36*  Each global data object is described by an associated description
37*  vector.  This vector stores the information required to establish
38*  the mapping between an object element and its corresponding process
39*  and memory location.
40*
41*  Let A be a generic term for any 2D block cyclicly distributed array.
42*  Such a global array has an associated description vector DESCA.
43*  In the following comments, the character _ should be read as
44*  "of the global array".
45*
46*  NOTATION        STORED IN      EXPLANATION
47*  --------------- -------------- --------------------------------------
48*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
49*                                 DTYPE_A = 1.
50*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51*                                 the BLACS process grid A is distribu-
52*                                 ted over. The context itself is glo-
53*                                 bal, but the handle (the integer
54*                                 value) may vary.
55*  M_A    (global) DESCA( M_ )    The number of rows in the global
56*                                 array A.
57*  N_A    (global) DESCA( N_ )    The number of columns in the global
58*                                 array A.
59*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
60*                                 the rows of the array.
61*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
62*                                 the columns of the array.
63*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64*                                 row of the array A is distributed.
65*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66*                                 first column of the array A is
67*                                 distributed.
68*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
69*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
70*
71*  Let K be the number of rows or columns of a distributed matrix,
72*  and assume that its process grid has dimension p x q.
73*  LOCr( K ) denotes the number of elements of K that a process
74*  would receive if K were distributed over the p processes of its
75*  process column.
76*  Similarly, LOCc( K ) denotes the number of elements of K that a
77*  process would receive if K were distributed over the q processes of
78*  its process row.
79*  The values of LOCr() and LOCc() may be determined via a call to the
80*  ScaLAPACK tool function, NUMROC:
81*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83*  An upper bound for these quantities may be computed by:
84*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86*
87*  Because vectors may be viewed as a subclass of matrices, a
88*  distributed vector is considered to be a distributed matrix.
89*
90*  Restrictions
91*  ============
92*
93*  If SIDE = 'Left' and INCV = 1, then the row process having the first
94*  entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95*  MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96*  the last equality must be satisfied.
97*
98*  If SIDE = 'Right' and INCV = M_V then the column process having the
99*  first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100*  MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101*  the last equality must be satisfied.
102*
103*  Arguments
104*  =========
105*
106*  SIDE    (global input) CHARACTER
107*          = 'L': form  Q**H * sub( C ),
108*          = 'R': form  sub( C ) * Q**H.
109*
110*  M       (global input) INTEGER
111*          The number of rows to be operated on i.e the number of rows
112*          of the distributed submatrix sub( C ). M >= 0.
113*
114*  N       (global input) INTEGER
115*          The number of columns to be operated on i.e the number of
116*          columns of the distributed submatrix sub( C ). N >= 0.
117*
118*  L       (global input) INTEGER
119*          The columns of the distributed submatrix sub( A ) containing
120*          the meaningful part of the Householder reflectors.
121*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122*
123*  V       (local input) COMPLEX*16 pointer into the local memory
124*          to an array of dimension (LLD_V,*) containing the local
125*          pieces of the distributed vectors V representing the
126*          Householder transformation Q,
127*             V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128*             V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129*             V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130*             V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131*
132*          The vector v in the representation of Q. V is not used if
133*          TAU = 0.
134*
135*  IV      (global input) INTEGER
136*          The row index in the global array V indicating the first
137*          row of sub( V ).
138*
139*  JV      (global input) INTEGER
140*          The column index in the global array V indicating the
141*          first column of sub( V ).
142*
143*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
144*          The array descriptor for the distributed matrix V.
145*
146*  INCV    (global input) INTEGER
147*          The global increment for the elements of V. Only two values
148*          of INCV are supported in this version, namely 1 and M_V.
149*          INCV must not be zero.
150*
151*  TAU     (local input) COMPLEX*16, array, dimension  LOCc(JV) if
152*          INCV = 1, and LOCr(IV) otherwise. This array contains the
153*          Householder scalars related to the Householder vectors.
154*          TAU is tied to the distributed matrix V.
155*
156*  C       (local input/local output) COMPLEX*16 pointer into the
157*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158*          containing the local pieces of sub( C ). On exit, sub( C )
159*          is overwritten by the Q**H * sub( C ) if SIDE = 'L', or
160*          sub( C ) * Q**H if SIDE = 'R'.
161*
162*  IC      (global input) INTEGER
163*          The row index in the global array C indicating the first
164*          row of sub( C ).
165*
166*  JC      (global input) INTEGER
167*          The column index in the global array C indicating the
168*          first column of sub( C ).
169*
170*  DESCC   (global and local input) INTEGER array of dimension DLEN_.
171*          The array descriptor for the distributed matrix C.
172*
173*  WORK    (local workspace) COMPLEX*16 array, dimension (LWORK)
174*          If INCV = 1,
175*            if SIDE = 'L',
176*              if IVCOL = ICCOL,
177*                LWORK >= NqC0
178*              else
179*                LWORK >= MpC0 + MAX( 1, NqC0 )
180*              end if
181*            else if SIDE = 'R',
182*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184*            end if
185*          else if INCV = M_V,
186*            if SIDE = 'L',
187*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189*            else if SIDE = 'R',
190*              if IVROW = ICROW,
191*                LWORK >= MpC0
192*              else
193*                LWORK >= NqC0 + MAX( 1, MpC0 )
194*              end if
195*            end if
196*          end if
197*
198*          where LCM is the least common multiple of NPROW and NPCOL and
199*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200*          LCMQ = LCM / NPCOL,
201*
202*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207*
208*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210*          the subroutine BLACS_GRIDINFO.
211*
212*  Alignment requirements
213*  ======================
214*
215*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216*  must verify some alignment properties, namely the following
217*  expressions should be true:
218*
219*  MB_V = NB_V,
220*
221*  If INCV = 1,
222*    If SIDE = 'Left',
223*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224*    If SIDE = 'Right',
225*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226*  else if INCV = M_V,
227*    If SIDE = 'Left',
228*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229*    If SIDE = 'Right',
230*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231*  end if
232*
233*  =====================================================================
234*
235*     .. Parameters ..
236      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237     $                   LLD_, MB_, M_, NB_, N_, RSRC_
238      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
239     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
240     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
241      COMPLEX*16         ONE, ZERO
242      PARAMETER          ( ONE  = ( 1.0D+0, 0.0D+0 ),
243     $                     ZERO = ( 0.0D+0, 0.0D+0 ) )
244*     ..
245*     .. Local Scalars ..
246      LOGICAL            CCBLCK, CRBLCK, LEFT
247      CHARACTER          COLBTOP, ROWBTOP
248      INTEGER            ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
249     $                   ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1,
250     $                   IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV,
251     $                   IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2,
252     $                   MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW,
253     $                   NQC2, NQV, RDEST
254      COMPLEX*16         TAULOC
255*     ..
256*     .. External Subroutines ..
257      EXTERNAL           BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBZTRNV,
258     $                   ZAXPY, ZCOPY, ZGEBR2D, ZGEBS2D,
259     $                   ZGEMV, ZGERC, ZGERV2D, ZGESD2D,
260     $                   ZGSUM2D, ZLASET
261*     ..
262*     .. External Functions ..
263      LOGICAL            LSAME
264      INTEGER            NUMROC
265      EXTERNAL           LSAME, NUMROC
266*     ..
267*     .. Intrinsic Functions ..
268      INTRINSIC          MIN, MOD
269*     ..
270*     .. Executable Statements ..
271*
272*     Quick return if possible
273*
274      IF( M.LE.0 .OR. N.LE.0 )
275     $   RETURN
276*
277*     Get grid parameters.
278*
279      ICTXT = DESCC( CTXT_ )
280      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
281*
282*     Figure local indexes
283*
284      LEFT = LSAME( SIDE, 'L' )
285      CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
286     $              IVROW, IVCOL )
287      IROFFV = MOD( IV-1, DESCV( NB_ ) )
288      MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW )
289      IF( MYROW.EQ.IVROW )
290     $   MPV = MPV - IROFFV
291      ICOFFV = MOD( JV-1, DESCV( NB_ ) )
292      NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL )
293      IF( MYCOL.EQ.IVCOL )
294     $   NQV = NQV - ICOFFV
295      LDV = DESCV( LLD_ )
296      NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ),
297     $              NPCOL )
298      LDV = DESCV( LLD_ )
299      IIV = MIN( IIV, LDV )
300      JJV = MIN( JJV, NCV )
301      IOFFV = IIV+(JJV-1)*LDV
302      NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ),
303     $              NPCOL )
304      CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
305     $              IIC1, JJC1, ICROW1, ICCOL1 )
306      IROFFC1 = MOD( IC-1, DESCC( MB_ ) )
307      ICOFFC1 = MOD( JC-1, DESCC( NB_ ) )
308      LDC = DESCC( LLD_ )
309      IIC1 = MIN( IIC1, LDC )
310      JJC1 = MIN( JJC1, MAX( 1, NCC ) )
311      IOFFC1 = IIC1 + ( JJC1-1 ) * LDC
312*
313      IF( LEFT ) THEN
314         CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
315     $                 IIC2, JJC2, ICROW2, ICCOL2 )
316         IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) )
317         ICOFFC2 = MOD( JC-1, DESCC( NB_ ) )
318         NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL )
319         IF( MYCOL.EQ.ICCOL2 )
320     $      NQC2 = NQC2 - ICOFFC2
321      ELSE
322         CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL,
323     $                 IIC2, JJC2, ICROW2, ICCOL2 )
324         IROFFC2 = MOD( IC-1, DESCC( MB_ ) )
325         MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW )
326         IF( MYROW.EQ.ICROW2 )
327     $      MPC2 = MPC2 - IROFFC2
328         ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) )
329      END IF
330      IIC2 = MIN( IIC2, LDC )
331      JJC2 = MIN( JJC2, NCC )
332      IOFFC2 = IIC2 + ( JJC2-1 ) * LDC
333*
334*     Is sub( C ) only distributed over a process row ?
335*
336      CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) )
337*
338*     Is sub( C ) only distributed over a process column ?
339*
340      CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) )
341*
342      IF( LEFT ) THEN
343*
344         IF( CRBLCK ) THEN
345            RDEST = ICROW2
346         ELSE
347            RDEST = -1
348         END IF
349*
350         IF( CCBLCK ) THEN
351*
352*           sub( C ) is distributed over a process column
353*
354            IF( DESCV( M_ ).EQ.INCV ) THEN
355*
356*              Transpose row vector V (ICOFFV = IROFFC2)
357*
358               IPW = MPV+1
359               CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M,
360     $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
361     $                       ZERO,
362     $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
363     $                       WORK( IPW ) )
364*
365*              Perform the local computation within a process column
366*
367               IF( MYCOL.EQ.ICCOL2 ) THEN
368*
369                  IF( MYROW.EQ.IVROW ) THEN
370*
371                     CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
372     $                             TAU( IIV ), 1 )
373                     TAULOC = DCONJG( TAU( IIV ) )
374*
375                  ELSE
376*
377                     CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1,
378     $                             TAULOC, 1, IVROW, MYCOL )
379                     TAULOC = DCONJG( TAULOC )
380*
381                  END IF
382*
383                  IF( TAULOC.NE.ZERO ) THEN
384*
385*                    w := sub( C )' * v
386*
387                     IF( MPV.GT.0 ) THEN
388                        CALL ZGEMV( 'Conjugate transpose', MPV, NQC2,
389     $                              ONE, C( IOFFC2 ), LDC, WORK, 1,
390     $                              ZERO, WORK( IPW ), 1 )
391                     ELSE
392                        CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO,
393     $                               WORK( IPW ), MAX( 1, NQC2 ) )
394                     END IF
395                     IF( MYROW.EQ.ICROW1 )
396     $                  CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
397     $                              WORK( IPW ), MAX( 1, NQC2 ) )
398*
399                     CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
400     $                             WORK( IPW ), MAX( 1, NQC2 ), RDEST,
401     $                             MYCOL )
402*
403*                    sub( C ) := sub( C ) - v * w'
404*
405                     IF( MYROW.EQ.ICROW1 )
406     $                  CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ),
407     $                              MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
408                     CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1,
409     $                           WORK( IPW ), 1, C( IOFFC2 ), LDC )
410                  END IF
411*
412               END IF
413*
414            ELSE
415*
416*              V is a column vector
417*
418               IF( IVCOL.EQ.ICCOL2 ) THEN
419*
420*                 Perform the local computation within a process column
421*
422                  IF( MYCOL.EQ.ICCOL2 ) THEN
423*
424                     TAULOC = DCONJG( TAU( JJV ) )
425*
426                     IF( TAULOC.NE.ZERO ) THEN
427*
428*                       w := sub( C )' * v
429*
430                        IF( MPV.GT.0 ) THEN
431                           CALL ZGEMV( 'Conjugate transpose', MPV, NQC2,
432     $                              ONE, C( IOFFC2 ), LDC, V( IOFFV ),
433     $                              1, ZERO, WORK, 1 )
434                        ELSE
435                           CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO,
436     $                                  WORK, MAX( 1, NQC2 ) )
437                        END IF
438                        IF( MYROW.EQ.ICROW1 )
439     $                     CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
440     $                                 WORK, MAX( 1, NQC2 ) )
441*
442                        CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
443     $                                WORK, MAX( 1, NQC2 ), RDEST,
444     $                                MYCOL )
445*
446*                       sub( C ) := sub( C ) - v * w'
447*
448                        IF( MYROW.EQ.ICROW1 )
449     $                     CALL ZAXPY( NQC2, -TAULOC, WORK,
450     $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
451     $                                 LDC )
452                        CALL ZGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1,
453     $                              WORK, 1, C( IOFFC2 ), LDC )
454                     END IF
455*
456                  END IF
457*
458               ELSE
459*
460*                 Send V and TAU to the process column ICCOL2
461*
462                  IF( MYCOL.EQ.IVCOL ) THEN
463*
464                     IPW = MPV+1
465                     CALL ZCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
466                     WORK( IPW ) = TAU( JJV )
467                     CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
468     $                             ICCOL2 )
469*
470                  ELSE IF( MYCOL.EQ.ICCOL2 ) THEN
471*
472                     IPW = MPV+1
473                     CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
474     $                             IVCOL )
475                     TAULOC = DCONJG( WORK( IPW ) )
476*
477                     IF( TAULOC.NE.ZERO ) THEN
478*
479*                       w := sub( C )' * v
480*
481                        IF( MPV.GT.0 ) THEN
482                           CALL ZGEMV( 'Conjugate transpose', MPV, NQC2,
483     $                                 ONE, C( IOFFC2 ), LDC, WORK, 1,
484     $                                 ZERO, WORK( IPW ), 1 )
485                        ELSE
486                           CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO,
487     $                                  WORK( IPW ), MAX( 1, NQC2 ) )
488                        END IF
489                        IF( MYROW.EQ.ICROW1 )
490     $                     CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
491     $                                 WORK( IPW ), MAX( 1, NQC2 ) )
492*
493                        CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
494     $                                WORK( IPW ), MAX( 1, NQC2 ),
495     $                                RDEST, MYCOL )
496*
497*                       sub( C ) := sub( C ) - v * w'
498*
499                        IF( MYROW.EQ.ICROW1 )
500     $                     CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ),
501     $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
502     $                                 LDC )
503                        CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1,
504     $                              WORK( IPW ), 1, C( IOFFC2 ), LDC )
505                     END IF
506*
507                  END IF
508*
509               END IF
510*
511            END IF
512*
513         ELSE
514*
515*           sub( C ) is a proper distributed matrix
516*
517            IF( DESCV( M_ ).EQ.INCV ) THEN
518*
519*              Transpose and broadcast row vector V (ICOFFV=IROFFC2)
520*
521               IPW = MPV+1
522               CALL PBZTRNV( ICTXT, 'Rowwise', 'Transpose', M,
523     $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
524     $                       ZERO,
525     $                       WORK, 1, IVROW, IVCOL, ICROW2, -1,
526     $                       WORK( IPW ) )
527*
528*              Perform the local computation within a process column
529*
530               IF( MYROW.EQ.IVROW ) THEN
531*
532                  CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
533     $                          TAU( IIV ), 1 )
534                  TAULOC = DCONJG( TAU( IIV ) )
535*
536               ELSE
537*
538                  CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC,
539     $                          1, IVROW, MYCOL )
540                  TAULOC = DCONJG( TAULOC )
541*
542               END IF
543*
544               IF( TAULOC.NE.ZERO ) THEN
545*
546*                 w := sub( C )' * v
547*
548                  IF( MPV.GT.0 ) THEN
549                     CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, ONE,
550     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
551     $                           WORK( IPW ), 1 )
552                  ELSE
553                     CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO,
554     $                            WORK( IPW ), MAX( 1, NQC2 ) )
555                  END IF
556                  IF( MYROW.EQ.ICROW1 )
557     $               CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
558     $                           WORK( IPW ), MAX( 1, NQC2 ) )
559*
560                  CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
561     $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
562     $                          MYCOL )
563*
564*                 sub( C ) := sub( C ) - v * w'
565*
566                  IF( MYROW.EQ.ICROW1 )
567     $               CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ),
568     $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
569                  CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
570     $                        1, C( IOFFC2 ), LDC )
571               END IF
572*
573            ELSE
574*
575*              Broadcast column vector V
576*
577               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
578               IF( MYCOL.EQ.IVCOL ) THEN
579*
580                  IPW = MPV+1
581                  CALL ZCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
582                  WORK( IPW ) = TAU( JJV )
583                  CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1,
584     $                          WORK, IPW )
585                  TAULOC = DCONJG( TAU( JJV ) )
586*
587               ELSE
588*
589                  IPW = MPV+1
590                  CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK,
591     $                          IPW, MYROW, IVCOL )
592                  TAULOC = DCONJG( WORK( IPW ) )
593*
594               END IF
595*
596               IF( TAULOC.NE.ZERO ) THEN
597*
598*                 w := sub( C )' * v
599*
600                  IF( MPV.GT.0 ) THEN
601                     CALL ZGEMV( 'Conjugate transpose', MPV, NQC2, ONE,
602     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
603     $                           WORK( IPW ), 1 )
604                  ELSE
605                     CALL ZLASET( 'All', NQC2, 1, ZERO, ZERO,
606     $                            WORK( IPW ), MAX( 1, NQC2 ) )
607                  END IF
608                  IF( MYROW.EQ.ICROW1 )
609     $               CALL ZAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
610     $                           WORK( IPW ), MAX( 1, NQC2 ) )
611*
612                  CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
613     $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
614     $                          MYCOL )
615*
616*                 sub( C ) := sub( C ) - v * w'
617*
618                  IF( MYROW.EQ.ICROW1 )
619     $               CALL ZAXPY( NQC2, -TAULOC, WORK( IPW ),
620     $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
621                  CALL ZGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
622     $                        1, C( IOFFC2 ), LDC )
623               END IF
624*
625            END IF
626*
627         END IF
628*
629      ELSE
630*
631         IF( CCBLCK ) THEN
632            RDEST = MYROW
633         ELSE
634            RDEST = -1
635         END IF
636*
637         IF( CRBLCK ) THEN
638*
639*           sub( C ) is distributed over a process row
640*
641            IF( DESCV( M_ ).EQ.INCV ) THEN
642*
643*              V is a row vector
644*
645               IF( IVROW.EQ.ICROW2 ) THEN
646*
647*                 Perform the local computation within a process row
648*
649                  IF( MYROW.EQ.ICROW2 ) THEN
650*
651                     TAULOC = DCONJG( TAU( IIV ) )
652*
653                     IF( TAULOC.NE.ZERO ) THEN
654*
655*                       w := sub( C ) * v
656*
657                        IF( NQV.GT.0 ) THEN
658                           CALL ZGEMV( 'No transpose', MPC2, NQV, ONE,
659     $                                 C( IOFFC2 ), LDC, V( IOFFV ),
660     $                                 LDV, ZERO, WORK, 1 )
661                        ELSE
662                           CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO,
663     $                                  WORK, MAX( 1, MPC2 ) )
664                        END IF
665                        IF( MYCOL.EQ.ICCOL1 )
666     $                     CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1,
667     $                                   WORK, 1 )
668*
669                        CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
670     $                                WORK, MAX( 1, MPC2 ), RDEST,
671     $                               ICCOL2 )
672*
673                        IF( MYCOL.EQ.ICCOL1 )
674     $                     CALL ZAXPY( MPC2, -TAULOC, WORK, 1,
675     $                                 C( IOFFC1 ), 1 )
676*
677*                       sub( C ) := sub( C ) - w * v'
678*
679                        CALL ZGERC( MPC2, NQV, -TAULOC, WORK, 1,
680     $                              V( IOFFV ), LDV, C( IOFFC2 ), LDC )
681                     END IF
682*
683                  END IF
684*
685               ELSE
686*
687*                 Send V and TAU to the process row ICROW2
688*
689                  IF( MYROW.EQ.IVROW ) THEN
690*
691                     IPW = NQV+1
692                     CALL ZCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
693                     WORK( IPW ) = TAU( IIV )
694                     CALL ZGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2,
695     $                             MYCOL )
696*
697                  ELSE IF( MYROW.EQ.ICROW2 ) THEN
698*
699                     IPW = NQV+1
700                     CALL ZGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW,
701     $                             MYCOL )
702                     TAULOC = DCONJG( WORK( IPW ) )
703*
704                     IF( TAULOC.NE.ZERO ) THEN
705*
706*                       w := sub( C ) * v
707*
708                        IF( NQV.GT.0 ) THEN
709                           CALL ZGEMV( 'No transpose', MPC2, NQV, ONE,
710     $                                 C( IOFFC2 ), LDC, WORK, 1, ZERO,
711     $                                 WORK( IPW ), 1 )
712                        ELSE
713                           CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO,
714     $                                  WORK( IPW ), MAX( 1, MPC2 ) )
715                        END IF
716                        IF( MYCOL.EQ.ICCOL1 )
717     $                     CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1,
718     $                                   WORK( IPW ), 1 )
719                        CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
720     $                                WORK( IPW ), MAX( 1, MPC2 ),
721     $                                RDEST, ICCOL2 )
722                        IF( MYCOL.EQ.ICCOL1 )
723     $                     CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
724     $                                 C( IOFFC1 ), 1 )
725*
726*                       sub( C ) := sub( C ) - w * v'
727*
728                        CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
729     $                              WORK, 1, C( IOFFC2 ), LDC )
730                     END IF
731*
732                  END IF
733*
734               END IF
735*
736            ELSE
737*
738*              Transpose column vector V (IROFFV = ICOFFC2)
739*
740               IPW = NQV+1
741               CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N,
742     $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
743     $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
744     $                       WORK( IPW ) )
745*
746*              Perform the local computation within a process column
747*
748               IF( MYROW.EQ.ICROW2 ) THEN
749*
750                  IF( MYCOL.EQ.IVCOL ) THEN
751*
752                     CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1,
753     $                             TAU( JJV ), 1 )
754                     TAULOC = DCONJG( TAU( JJV ) )
755*
756                  ELSE
757*
758                     CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC,
759     $                             1, MYROW, IVCOL )
760                     TAULOC = DCONJG( TAULOC )
761*
762                  END IF
763*
764                  IF( TAULOC.NE.ZERO ) THEN
765*
766*                    w := sub( C ) * v
767*
768                     IF( NQV.GT.0 ) THEN
769                        CALL ZGEMV( 'No transpose', MPC2, NQV, ONE,
770     $                              C( IOFFC2 ), LDC, WORK, 1, ZERO,
771     $                              WORK( IPW ), 1 )
772                     ELSE
773                        CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO,
774     $                               WORK( IPW ), MAX( 1, MPC2 ) )
775                     END IF
776                     IF( MYCOL.EQ.ICCOL1 )
777     $                  CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1,
778     $                              WORK( IPW ), 1 )
779                     CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
780     $                             WORK( IPW ), MAX( 1, MPC2 ), RDEST,
781     $                             ICCOL2 )
782                     IF( MYCOL.EQ.ICCOL1 )
783     $                  CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
784     $                              C( IOFFC1 ), 1 )
785*
786*                    sub( C ) := sub( C ) - w * v'
787*
788                     CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
789     $                           WORK, 1, C( IOFFC2 ), LDC )
790                  END IF
791*
792               END IF
793*
794            END IF
795*
796         ELSE
797*
798*           sub( C ) is a proper distributed matrix
799*
800            IF( DESCV( M_ ).EQ.INCV ) THEN
801*
802*              Broadcast row vector V
803*
804               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise',
805     $                         COLBTOP )
806               IF( MYROW.EQ.IVROW ) THEN
807*
808                  IPW = NQV+1
809                  CALL ZCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
810                  WORK( IPW ) = TAU( IIV )
811                  CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
812     $                          WORK, IPW )
813                  TAULOC = DCONJG( TAU( IIV ) )
814*
815               ELSE
816*
817                  IPW = NQV+1
818                  CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
819     $                          WORK, IPW, IVROW, MYCOL )
820                  TAULOC = DCONJG( WORK( IPW ) )
821*
822               END IF
823*
824               IF( TAULOC.NE.ZERO ) THEN
825*
826*                 w := sub( C ) * v
827*
828                  IF( NQV.GT.0 ) THEN
829                     CALL ZGEMV( 'No Transpose', MPC2, NQV, ONE,
830     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
831     $                           WORK( IPW ), 1 )
832                  ELSE
833                     CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO,
834     $                            WORK( IPW ), MAX( 1, MPC2 ) )
835                  END IF
836                  IF( MYCOL.EQ.ICCOL1 )
837     $               CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1,
838     $                           WORK( IPW ), 1 )
839*
840                  CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
841     $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
842     $                          ICCOL2 )
843                  IF( MYCOL.EQ.ICCOL1 )
844     $               CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
845     $                           C( IOFFC1 ), 1 )
846*
847*                 sub( C ) := sub( C ) - w * v'
848*
849                  CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
850     $                        1, C( IOFFC2 ), LDC )
851               END IF
852*
853            ELSE
854*
855*              Transpose and broadcast column vector V (ICOFFC2=IROFFV)
856*
857               IPW = NQV+1
858               CALL PBZTRNV( ICTXT, 'Columnwise', 'Transpose', N,
859     $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
860     $                       WORK, 1, IVROW, IVCOL, -1, ICCOL2,
861     $                       WORK( IPW ) )
862*
863*              Perform the local computation within a process column
864*
865               IF( MYCOL.EQ.IVCOL ) THEN
866*
867                  CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ),
868     $                          1 )
869                  TAULOC = DCONJG( TAU( JJV ) )
870*
871               ELSE
872*
873                  CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1,
874     $                          MYROW, IVCOL )
875                  TAULOC = DCONJG( TAULOC )
876*
877               END IF
878*
879               IF( TAULOC.NE.ZERO ) THEN
880*
881*                 w := sub( C ) * v
882*
883                  IF( NQV.GT.0 ) THEN
884                     CALL ZGEMV( 'No transpose', MPC2, NQV, ONE,
885     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
886     $                           WORK( IPW ), 1 )
887                  ELSE
888                     CALL ZLASET( 'All', MPC2, 1, ZERO, ZERO,
889     $                            WORK( IPW ), MAX( 1, MPC2 ) )
890                  END IF
891                  IF( MYCOL.EQ.ICCOL1 )
892     $               CALL ZAXPY( MPC2, ONE, C( IOFFC1 ), 1,
893     $                           WORK( IPW ), 1 )
894                  CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
895     $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
896     $                          ICCOL2 )
897                  IF( MYCOL.EQ.ICCOL1 )
898     $               CALL ZAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
899     $                           C( IOFFC1 ), 1 )
900*
901*                 sub( C ) := sub( C ) - w * v'
902*
903                  CALL ZGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
904     $                        1, C( IOFFC2 ), LDC )
905               END IF
906*
907            END IF
908*
909         END IF
910*
911      END IF
912*
913      RETURN
914*
915*     End of PZLARZC
916*
917      END
918