1      SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,
2     $                   C, IC, JC, DESCC, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 25, 2001
8*
9*     .. Scalar Arguments ..
10      CHARACTER          SIDE
11      INTEGER            IC, INCV, IV, JC, JV, M, N
12*     ..
13*     .. Array Arguments ..
14      INTEGER            DESCC( * ), DESCV( * )
15      DOUBLE PRECISION   C( * ), TAU( * ), V( * ), WORK( * )
16*     ..
17*
18*  Purpose
19*  =======
20*
21*  PDLARF applies a real elementary reflector Q (or Q**T) to a real
22*  M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from
23*  either the left or the right. Q is represented in the form
24*
25*        Q = I - tau * v * v'
26*
27*  where tau is a real scalar and v is a real vector.
28*
29*  If tau = 0, then Q is taken to be the unit matrix.
30*
31*  Notes
32*  =====
33*
34*  Each global data object is described by an associated description
35*  vector.  This vector stores the information required to establish
36*  the mapping between an object element and its corresponding process
37*  and memory location.
38*
39*  Let A be a generic term for any 2D block cyclicly distributed array.
40*  Such a global array has an associated description vector DESCA.
41*  In the following comments, the character _ should be read as
42*  "of the global array".
43*
44*  NOTATION        STORED IN      EXPLANATION
45*  --------------- -------------- --------------------------------------
46*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
47*                                 DTYPE_A = 1.
48*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49*                                 the BLACS process grid A is distribu-
50*                                 ted over. The context itself is glo-
51*                                 bal, but the handle (the integer
52*                                 value) may vary.
53*  M_A    (global) DESCA( M_ )    The number of rows in the global
54*                                 array A.
55*  N_A    (global) DESCA( N_ )    The number of columns in the global
56*                                 array A.
57*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
58*                                 the rows of the array.
59*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
60*                                 the columns of the array.
61*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62*                                 row of the array A is distributed.
63*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64*                                 first column of the array A is
65*                                 distributed.
66*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
67*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
68*
69*  Let K be the number of rows or columns of a distributed matrix,
70*  and assume that its process grid has dimension p x q.
71*  LOCr( K ) denotes the number of elements of K that a process
72*  would receive if K were distributed over the p processes of its
73*  process column.
74*  Similarly, LOCc( K ) denotes the number of elements of K that a
75*  process would receive if K were distributed over the q processes of
76*  its process row.
77*  The values of LOCr() and LOCc() may be determined via a call to the
78*  ScaLAPACK tool function, NUMROC:
79*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81*  An upper bound for these quantities may be computed by:
82*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85*  Because vectors may be viewed as a subclass of matrices, a
86*  distributed vector is considered to be a distributed matrix.
87*
88*  Restrictions
89*  ============
90*
91*  If SIDE = 'Left' and INCV = 1, then the row process having the first
92*  entry V(IV,JV) must also have the first row of sub( C ). Moreover,
93*  MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only
94*  the last equality must be satisfied.
95*
96*  If SIDE = 'Right' and INCV = M_V then the column process having the
97*  first entry V(IV,JV) must also have the first column of sub( C ) and
98*  MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the
99*  last equality must be satisfied.
100*
101*  Arguments
102*  =========
103*
104*  SIDE    (global input) CHARACTER
105*          = 'L': form  Q * sub( C ),
106*          = 'R': form  sub( C ) * Q, Q = Q**T.
107*
108*  M       (global input) INTEGER
109*          The number of rows to be operated on i.e the number of rows
110*          of the distributed submatrix sub( C ). M >= 0.
111*
112*  N       (global input) INTEGER
113*          The number of columns to be operated on i.e the number of
114*          columns of the distributed submatrix sub( C ). N >= 0.
115*
116*  V       (local input) DOUBLE PRECISION pointer into the local memory
117*          to an array of dimension (LLD_V,*) containing the local
118*          pieces of the distributed vectors V representing the
119*          Householder transformation Q,
120*             V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,
121*             V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,
122*             V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,
123*             V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,
124*
125*          The vector v in the representation of Q. V is not used if
126*          TAU = 0.
127*
128*  IV      (global input) INTEGER
129*          The row index in the global array V indicating the first
130*          row of sub( V ).
131*
132*  JV      (global input) INTEGER
133*          The column index in the global array V indicating the
134*          first column of sub( V ).
135*
136*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
137*          The array descriptor for the distributed matrix V.
138*
139*  INCV    (global input) INTEGER
140*          The global increment for the elements of V. Only two values
141*          of INCV are supported in this version, namely 1 and M_V.
142*          INCV must not be zero.
143*
144*  TAU     (local input) DOUBLE PRECISION array, dimension  LOCc(JV) if
145*          INCV = 1, and LOCr(IV) otherwise. This array contains the
146*          Householder scalars related to the Householder vectors.
147*          TAU is tied to the distributed matrix V.
148*
149*  C       (local input/local output) DOUBLE PRECISION pointer into the
150*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
151*          containing the local pieces of sub( C ). On exit, sub( C )
152*          is overwritten by the Q * sub( C ) if SIDE = 'L', or
153*          sub( C ) * Q if SIDE = 'R'.
154*
155*  IC      (global input) INTEGER
156*          The row index in the global array C indicating the first
157*          row of sub( C ).
158*
159*  JC      (global input) INTEGER
160*          The column index in the global array C indicating the
161*          first column of sub( C ).
162*
163*  DESCC   (global and local input) INTEGER array of dimension DLEN_.
164*          The array descriptor for the distributed matrix C.
165*
166*  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
167*          If INCV = 1,
168*            if SIDE = 'L',
169*              if IVCOL = ICCOL,
170*                LWORK >= NqC0
171*              else
172*                LWORK >= MpC0 + MAX( 1, NqC0 )
173*              end if
174*            else if SIDE = 'R',
175*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
176*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
177*            end if
178*          else if INCV = M_V,
179*            if SIDE = 'L',
180*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
181*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
182*            else if SIDE = 'R',
183*              if IVROW = ICROW,
184*                LWORK >= MpC0
185*              else
186*                LWORK >= NqC0 + MAX( 1, MpC0 )
187*              end if
188*            end if
189*          end if
190*
191*          where LCM is the least common multiple of NPROW and NPCOL and
192*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
193*          LCMQ = LCM / NPCOL,
194*
195*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
196*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
197*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
198*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
199*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
200*
201*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
202*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
203*          the subroutine BLACS_GRIDINFO.
204*
205*  Alignment requirements
206*  ======================
207*
208*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
209*  must verify some alignment properties, namely the following
210*  expressions should be true:
211*
212*  MB_V = NB_V,
213*
214*  If INCV = 1,
215*    If SIDE = 'Left',
216*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
217*    If SIDE = 'Right',
218*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
219*  else if INCV = M_V,
220*    If SIDE = 'Left',
221*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
222*    If SIDE = 'Right',
223*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
224*  end if
225*
226*  =====================================================================
227*
228*     .. Parameters ..
229      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
230     $                   LLD_, MB_, M_, NB_, N_, RSRC_
231      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
232     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
233     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
234      DOUBLE PRECISION   ONE, ZERO
235      PARAMETER          ( ONE  = 1.0D+0, ZERO = 0.0D+0 )
236*     ..
237*     .. Local Scalars ..
238      LOGICAL            CCBLCK, CRBLCK
239      CHARACTER          COLBTOP, ROWBTOP
240      INTEGER            ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,
241     $                   IOFFV, IPW, IROFF, IVCOL, IVROW, JJC, JJV, LDC,
242     $                   LDV, MYCOL, MYROW, MP, NCC, NCV, NPCOL, NPROW,
243     $                   NQ, RDEST
244      DOUBLE PRECISION   TAULOC
245*     ..
246*     .. External Subroutines ..
247      EXTERNAL           BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D,
248     $                   DGEMV, DGER, DGERV2D, DGESD2D,
249     $                   DGSUM2D, DLASET, INFOG2L, PB_TOPGET,
250     $                   PBDTRNV
251*     ..
252*     .. External Functions ..
253      LOGICAL            LSAME
254      INTEGER            NUMROC
255      EXTERNAL           LSAME, NUMROC
256*     ..
257*     .. Intrinsic Functions ..
258      INTRINSIC          MIN, MOD
259*     ..
260*     .. Executable Statements ..
261*
262*     Quick return if possible
263*
264      IF( M.LE.0 .OR. N.LE.0 )
265     $   RETURN
266*
267*     Get grid parameters.
268*
269      ICTXT = DESCC( CTXT_ )
270      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
271*
272*     Figure local indexes
273*
274      CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC,
275     $              ICROW, ICCOL )
276      CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
277     $              IVROW, IVCOL )
278      NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ),
279     $              NPCOL )
280      NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ),
281     $              NPCOL )
282      LDC = DESCC( LLD_ )
283      LDV = DESCV( LLD_ )
284      IIC = MIN( IIC, LDC )
285      IIV = MIN( IIV, LDV )
286      JJC = MIN( JJC, NCC )
287      JJV = MIN( JJV, NCV )
288      IOFFC = IIC+(JJC-1)*LDC
289      IOFFV = IIV+(JJV-1)*LDV
290*
291      IROFF = MOD( IC-1, DESCC( MB_ ) )
292      ICOFF = MOD( JC-1, DESCC( NB_ ) )
293      MP = NUMROC( M+IROFF, DESCC( MB_ ), MYROW, ICROW, NPROW )
294      NQ = NUMROC( N+ICOFF, DESCC( NB_ ), MYCOL, ICCOL, NPCOL )
295      IF( MYROW.EQ.ICROW )
296     $   MP = MP - IROFF
297      IF( MYCOL.EQ.ICCOL )
298     $   NQ = NQ - ICOFF
299*
300*     Is sub( C ) only distributed over a process row ?
301*
302      CRBLCK = ( M.LE.(DESCC( MB_ )-IROFF) )
303*
304*     Is sub( C ) only distributed over a process column ?
305*
306      CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFF) )
307*
308      IF( LSAME( SIDE, 'L' ) ) THEN
309*
310         IF( CRBLCK ) THEN
311            RDEST = ICROW
312         ELSE
313            RDEST = -1
314         END IF
315*
316         IF( CCBLCK ) THEN
317*
318*           sub( C ) is distributed over a process column
319*
320            IF( DESCV( M_ ).EQ.INCV ) THEN
321*
322*              Transpose row vector V
323*
324               IPW = MP+1
325               CALL PBDTRNV( ICTXT, 'Rowwise', 'Transpose', M,
326     $                       DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO,
327     $                       WORK, 1, IVROW, IVCOL, ICROW, ICCOL,
328     $                       WORK( IPW ) )
329*
330*              Perform the local computation within a process column
331*
332               IF( MYCOL.EQ.ICCOL ) THEN
333*
334                  IF( MYROW.EQ.IVROW ) THEN
335*
336                     CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
337     $                             TAU( IIV ), 1 )
338                     TAULOC = TAU( IIV )
339*
340                  ELSE
341*
342                     CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1,
343     $                             TAULOC, 1, IVROW, MYCOL )
344*
345                  END IF
346*
347                  IF( TAULOC.NE.ZERO ) THEN
348*
349*                    w := sub( C )' * v
350*
351                     IF( MP.GT.0 ) THEN
352                        CALL DGEMV( 'Transpose', MP, NQ, ONE,
353     $                              C( IOFFC ), LDC, WORK, 1, ZERO,
354     $                              WORK( IPW ), 1 )
355                     ELSE
356                        CALL DLASET( 'All', NQ, 1, ZERO, ZERO,
357     $                               WORK( IPW ), MAX( 1, NQ ) )
358                     END IF
359                     CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1,
360     $                             WORK( IPW ), MAX( 1, NQ ), RDEST,
361     $                             MYCOL )
362*
363*                    sub( C ) := sub( C ) - v * w'
364*
365                     CALL DGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ),
366     $                          1, C( IOFFC ), LDC )
367                  END IF
368*
369               END IF
370*
371            ELSE
372*
373*              V is a column vector
374*
375               IF( IVCOL.EQ.ICCOL ) THEN
376*
377*                 Perform the local computation within a process column
378*
379                  IF( MYCOL.EQ.ICCOL ) THEN
380*
381                     TAULOC = TAU( JJV )
382*
383                     IF( TAULOC.NE.ZERO ) THEN
384*
385*                       w := sub( C )' * v
386*
387                        IF( MP.GT.0 ) THEN
388                           CALL DGEMV( 'Transpose', MP, NQ, ONE,
389     $                                 C( IOFFC ), LDC, V( IOFFV ), 1,
390     $                                 ZERO, WORK, 1 )
391                        ELSE
392                           CALL DLASET( 'All', NQ, 1, ZERO, ZERO,
393     $                                  WORK, MAX( 1, NQ ) )
394                        END IF
395                        CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1,
396     $                                WORK, MAX( 1, NQ ), RDEST, MYCOL )
397*
398*                       sub( C ) := sub( C ) - v * w'
399*
400                        CALL DGER( MP, NQ, -TAULOC, V( IOFFV ), 1, WORK,
401     $                             1, C( IOFFC ), LDC )
402                     END IF
403*
404                  END IF
405*
406               ELSE
407*
408*                 Send V and TAU to the process column ICCOL
409*
410                  IF( MYCOL.EQ.IVCOL ) THEN
411*
412                     IPW = MP+1
413                     CALL DCOPY( MP, V( IOFFV ), 1, WORK, 1 )
414                     WORK( IPW ) = TAU( JJV )
415                     CALL DGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
416     $                             ICCOL )
417*
418                  ELSE IF( MYCOL.EQ.ICCOL ) THEN
419*
420                     IPW = MP+1
421                     CALL DGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
422     $                             IVCOL )
423                     TAULOC = WORK( IPW )
424*
425                     IF( TAULOC.NE.ZERO ) THEN
426*
427*                       w := sub( C )' * v
428*
429                        IF( MP.GT.0 ) THEN
430                           CALL DGEMV( 'Transpose', MP, NQ, ONE,
431     $                                 C( IOFFC ), LDC, WORK, 1, ZERO,
432     $                                 WORK( IPW ), 1 )
433                        ELSE
434                           CALL DLASET( 'All', NQ, 1, ZERO, ZERO,
435     $                                  WORK( IPW ), MAX( 1, NQ ) )
436                        END IF
437                        CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1,
438     $                                WORK( IPW ), MAX( 1, NQ ), RDEST,
439     $                                MYCOL )
440*
441*                       sub( C ) := sub( C ) - v * w'
442*
443                        CALL DGER( MP, NQ, -TAULOC, WORK, 1,
444     $                             WORK( IPW ), 1, C( IOFFC ), LDC )
445                     END IF
446*
447                  END IF
448*
449               END IF
450*
451            END IF
452*
453         ELSE
454*
455*           sub( C ) is a proper distributed matrix
456*
457            IF( DESCV( M_ ).EQ.INCV ) THEN
458*
459*              Transpose and broadcast row vector V
460*
461               IPW = MP+1
462               CALL PBDTRNV( ICTXT, 'Rowwise', 'Transpose', M,
463     $                       DESCV( NB_ ), IROFF, V( IOFFV ), LDV, ZERO,
464     $                       WORK, 1, IVROW, IVCOL, ICROW, -1,
465     $                       WORK( IPW ) )
466*
467*              Perform the local computation within a process column
468*
469               IF( MYROW.EQ.IVROW ) THEN
470*
471                  CALL DGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
472     $                          TAU( IIV ), 1 )
473                  TAULOC = TAU( IIV )
474*
475               ELSE
476*
477                  CALL DGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC,
478     $                          1, IVROW, MYCOL )
479*
480               END IF
481*
482               IF( TAULOC.NE.ZERO ) THEN
483*
484*                 w := sub( C )' * v
485*
486                  IF( MP.GT.0 ) THEN
487                     IF( IOFFC.GT.0 )
488     $                  CALL DGEMV( 'Transpose', MP, NQ, ONE,
489     $                              C( IOFFC ), LDC, WORK, 1, ZERO,
490     $                              WORK( IPW ), 1 )
491                  ELSE
492                     CALL DLASET( 'All', NQ, 1, ZERO, ZERO,
493     $                            WORK( IPW ), MAX( 1, NQ ) )
494                  END IF
495                  CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1,
496     $                          WORK( IPW ), MAX( 1, NQ ), RDEST,
497     $                          MYCOL )
498*
499*                 sub( C ) := sub( C ) - v * w'
500*
501                  IF( IOFFC.GT.0 )
502     $               CALL DGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ),
503     $                          1, C( IOFFC ), LDC )
504               END IF
505*
506            ELSE
507*
508*              Broadcast column vector V
509*
510               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
511               IF( MYCOL.EQ.IVCOL ) THEN
512*
513                  IPW = MP+1
514                  CALL DCOPY( MP, V( IOFFV ), 1, WORK, 1 )
515                  WORK(IPW) = TAU( JJV )
516                  CALL DGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1,
517     $                          WORK, IPW )
518                  TAULOC = TAU( JJV )
519*
520               ELSE
521*
522                  IPW = MP+1
523                  CALL DGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK,
524     $                          IPW, MYROW, IVCOL )
525                  TAULOC = WORK( IPW )
526*
527               END IF
528*
529               IF( TAULOC.NE.ZERO ) THEN
530*
531*                 w := sub( C )' * v
532*
533                  IF( MP.GT.0 ) THEN
534                     IF( IOFFC.GT.0 )
535     $                  CALL DGEMV( 'Transpose', MP, NQ, ONE,
536     $                              C( IOFFC ), LDC, WORK, 1, ZERO,
537     $                              WORK( IPW ), 1 )
538                  ELSE
539                     CALL DLASET( 'All', NQ, 1, ZERO, ZERO,
540     $                            WORK( IPW ), MAX( 1, NQ ) )
541                  END IF
542                  CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQ, 1,
543     $                          WORK( IPW ), MAX( 1, NQ ), RDEST,
544     $                          MYCOL )
545*
546*                 sub( C ) := sub( C ) - v * w'
547*
548                  IF( IOFFC.GT.0 )
549     $               CALL DGER( MP, NQ, -TAULOC, WORK, 1, WORK( IPW ),
550     $                          1, C( IOFFC ), LDC )
551               END IF
552*
553            END IF
554*
555         END IF
556*
557      ELSE
558*
559         IF( CCBLCK ) THEN
560            RDEST = MYROW
561         ELSE
562            RDEST = -1
563         END IF
564*
565         IF( CRBLCK ) THEN
566*
567*           sub( C ) is distributed over a process row
568*
569            IF( DESCV( M_ ).EQ.INCV ) THEN
570*
571*              V is a row vector
572*
573               IF( IVROW.EQ.ICROW ) THEN
574*
575*                 Perform the local computation within a process row
576*
577                  IF( MYROW.EQ.ICROW ) THEN
578*
579                     TAULOC = TAU( IIV )
580*
581                     IF( TAULOC.NE.ZERO ) THEN
582*
583*                       w := sub( C ) * v
584*
585                        IF( NQ.GT.0 ) THEN
586                           CALL DGEMV( 'No transpose', MP, NQ, ONE,
587     $                                 C( IOFFC ), LDC, V( IOFFV ), LDV,
588     $                                 ZERO, WORK, 1 )
589                        ELSE
590                           CALL DLASET( 'All', MP, 1, ZERO, ZERO,
591     $                                  WORK, MAX( 1, MP ) )
592                        END IF
593                        CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1,
594     $                                WORK, MAX( 1, MP ), RDEST, ICCOL )
595*
596*                       sub( C ) := sub( C ) - w * v'
597*
598                        IF( IOFFV.GT.0 .AND. IOFFC.GT.0 )
599     $                     CALL DGER( MP, NQ, -TAULOC, WORK, 1,
600     $                                V( IOFFV ), LDV, C( IOFFC ), LDC )
601                     END IF
602*
603                  END IF
604*
605               ELSE
606*
607*                 Send V and TAU to the process row ICROW
608*
609                  IF( MYROW.EQ.IVROW ) THEN
610*
611                     IPW = NQ+1
612                     CALL DCOPY( NQ, V( IOFFV ), LDV, WORK, 1 )
613                     WORK(IPW) = TAU( IIV )
614                     CALL DGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW,
615     $                             MYCOL )
616*
617                  ELSE IF( MYROW.EQ.ICROW ) THEN
618*
619                     IPW = NQ+1
620                     CALL DGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW,
621     $                             MYCOL )
622                     TAULOC = WORK( IPW )
623*
624                     IF( TAULOC.NE.ZERO ) THEN
625*
626*                       w := sub( C ) * v
627*
628                        IF( NQ.GT.0 ) THEN
629                           CALL DGEMV( 'No transpose', MP, NQ, ONE,
630     $                                 C( IOFFC ), LDC, WORK, 1, ZERO,
631     $                                 WORK( IPW ), 1 )
632                        ELSE
633                           CALL DLASET( 'All', MP, 1, ZERO, ZERO,
634     $                                  WORK( IPW ), MAX( 1, MP ) )
635                        END IF
636                        CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1,
637     $                                WORK( IPW ), MAX( 1, MP ), RDEST,
638     $                                ICCOL )
639*
640*                       sub( C ) := sub( C ) - w * v'
641*
642                        CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1,
643     $                             WORK, 1, C( IOFFC ), LDC )
644                     END IF
645*
646                  END IF
647*
648               END IF
649*
650            ELSE
651*
652*              Transpose column vector V
653*
654               IPW = NQ+1
655               CALL PBDTRNV( ICTXT, 'Columnwise', 'Transpose', N,
656     $                       DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO,
657     $                       WORK, 1, IVROW, IVCOL, ICROW, ICCOL,
658     $                       WORK( IPW ) )
659*
660*              Perform the local computation within a process column
661*
662               IF( MYROW.EQ.ICROW ) THEN
663*
664                  IF( MYCOL.EQ.IVCOL ) THEN
665*
666                     CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1,
667     $                             TAU( JJV ), 1 )
668                     TAULOC = TAU( JJV )
669*
670                  ELSE
671*
672                     CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC,
673     $                             1, MYROW, IVCOL )
674*
675                  END IF
676*
677                  IF( TAULOC.NE.ZERO ) THEN
678*
679*                    w := sub( C ) * v
680*
681                     IF( NQ.GT.0 ) THEN
682                        CALL DGEMV( 'No transpose', MP, NQ, ONE,
683     $                              C( IOFFC ), LDC, WORK, 1, ZERO,
684     $                              WORK( IPW ), 1 )
685                     ELSE
686                        CALL DLASET( 'All', MP, 1, ZERO, ZERO,
687     $                               WORK( IPW ), MAX( 1, MP ) )
688                     END IF
689                     CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1,
690     $                             WORK( IPW ), MAX( 1, MP ), RDEST,
691     $                             ICCOL )
692*
693*                    sub( C ) := sub( C ) - w * v'
694*
695                     CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK,
696     $                          1, C( IOFFC ), LDC )
697                  END IF
698*
699               END IF
700*
701            END IF
702*
703         ELSE
704*
705*           sub( C ) is a proper distributed matrix
706*
707            IF( DESCV( M_ ).EQ.INCV ) THEN
708*
709*              Broadcast row vector V
710*
711               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise',
712     $                         COLBTOP )
713               IF( MYROW.EQ.IVROW ) THEN
714*
715                  IPW = NQ+1
716                  IF( IOFFV.GT.0 )
717     $               CALL DCOPY( NQ, V( IOFFV ), LDV, WORK, 1 )
718                  WORK(IPW) = TAU( IIV )
719                  CALL DGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
720     $                          WORK, IPW )
721                  TAULOC = TAU( IIV )
722*
723               ELSE
724*
725                  IPW = NQ+1
726                  CALL DGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
727     $                          WORK, IPW, IVROW, MYCOL )
728                  TAULOC = WORK( IPW )
729*
730               END IF
731*
732               IF( TAULOC.NE.ZERO ) THEN
733*
734*                 w := sub( C ) * v
735*
736                  IF( NQ.GT.0 ) THEN
737                     CALL DGEMV( 'No Transpose', MP, NQ, ONE,
738     $                           C( IOFFC ), LDC, WORK, 1, ZERO,
739     $                           WORK( IPW ), 1 )
740                  ELSE
741                     CALL DLASET( 'All', MP, 1, ZERO, ZERO,
742     $                            WORK( IPW ), MAX( 1, MP ) )
743                  END IF
744                  CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1,
745     $                          WORK( IPW ), MAX( 1, MP ), RDEST,
746     $                          ICCOL )
747*
748*                 sub( C ) := sub( C ) - w * v'
749*
750                  IF( IOFFC.GT.0 )
751     $               CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK,
752     $                          1, C( IOFFC ), LDC )
753               END IF
754*
755            ELSE
756*
757*              Transpose and broadcast column vector V
758*
759               IPW = NQ+1
760               CALL PBDTRNV( ICTXT, 'Columnwise', 'Transpose', N,
761     $                       DESCV( MB_ ), ICOFF, V( IOFFV ), 1, ZERO,
762     $                       WORK, 1, IVROW, IVCOL, -1, ICCOL,
763     $                       WORK( IPW ) )
764*
765*              Perform the local computation within a process column
766*
767               IF( MYCOL.EQ.IVCOL ) THEN
768*
769                  CALL DGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ),
770     $                          1 )
771                  TAULOC = TAU( JJV )
772*
773               ELSE
774*
775                  CALL DGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1,
776     $                          MYROW, IVCOL )
777*
778               END IF
779*
780               IF( TAULOC.NE.ZERO ) THEN
781*
782*                 w := sub( C ) * v
783*
784                  IF( NQ.GT.0 ) THEN
785                     CALL DGEMV( 'No transpose', MP, NQ, ONE,
786     $                           C( IOFFC ), LDC, WORK, 1, ZERO,
787     $                           WORK( IPW ), 1 )
788                  ELSE
789                     CALL DLASET( 'All', MP, 1, ZERO, ZERO, WORK( IPW ),
790     $                            MAX( 1, MP ) )
791                  END IF
792                  CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MP, 1,
793     $                          WORK( IPW ), MAX( 1, MP ), RDEST,
794     $                          ICCOL )
795*
796*                 sub( C ) := sub( C ) - w * v'
797*
798                  CALL DGER( MP, NQ, -TAULOC, WORK( IPW ), 1, WORK, 1,
799     $                       C( IOFFC ), LDC )
800               END IF
801*
802            END IF
803*
804         END IF
805*
806      END IF
807*
808      RETURN
809*
810*     End of PDLARF
811*
812      END
813