1      SUBROUTINE PSLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2     $                   IC, JC, DESCC, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 1.7) --
5*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6*     and University of California, Berkeley.
7*     May 25, 2001
8*
9*     .. Scalar Arguments ..
10      CHARACTER          SIDE
11      INTEGER            IC, INCV, IV, JC, JV, L, M, N
12*     ..
13*     .. Array Arguments ..
14      INTEGER            DESCC( * ), DESCV( * )
15      REAL               C( * ), TAU( * ), V( * ), WORK( * )
16*     ..
17*
18*  Purpose
19*  =======
20*
21*  PSLARZ applies a real elementary reflector Q (or Q**T) to a real
22*  M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from
23*  either the left or the right. Q is represented in the form
24*
25*        Q = I - tau * v * v'
26*
27*  where tau is a real scalar and v is a real vector.
28*
29*  If tau = 0, then Q is taken to be the unit matrix.
30*
31*  Q is a product of k elementary reflectors as returned by PSTZRZF.
32*
33*  Notes
34*  =====
35*
36*  Each global data object is described by an associated description
37*  vector.  This vector stores the information required to establish
38*  the mapping between an object element and its corresponding process
39*  and memory location.
40*
41*  Let A be a generic term for any 2D block cyclicly distributed array.
42*  Such a global array has an associated description vector DESCA.
43*  In the following comments, the character _ should be read as
44*  "of the global array".
45*
46*  NOTATION        STORED IN      EXPLANATION
47*  --------------- -------------- --------------------------------------
48*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
49*                                 DTYPE_A = 1.
50*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51*                                 the BLACS process grid A is distribu-
52*                                 ted over. The context itself is glo-
53*                                 bal, but the handle (the integer
54*                                 value) may vary.
55*  M_A    (global) DESCA( M_ )    The number of rows in the global
56*                                 array A.
57*  N_A    (global) DESCA( N_ )    The number of columns in the global
58*                                 array A.
59*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
60*                                 the rows of the array.
61*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
62*                                 the columns of the array.
63*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64*                                 row of the array A is distributed.
65*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66*                                 first column of the array A is
67*                                 distributed.
68*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
69*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
70*
71*  Let K be the number of rows or columns of a distributed matrix,
72*  and assume that its process grid has dimension p x q.
73*  LOCr( K ) denotes the number of elements of K that a process
74*  would receive if K were distributed over the p processes of its
75*  process column.
76*  Similarly, LOCc( K ) denotes the number of elements of K that a
77*  process would receive if K were distributed over the q processes of
78*  its process row.
79*  The values of LOCr() and LOCc() may be determined via a call to the
80*  ScaLAPACK tool function, NUMROC:
81*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83*  An upper bound for these quantities may be computed by:
84*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86*
87*  Because vectors may be viewed as a subclass of matrices, a
88*  distributed vector is considered to be a distributed matrix.
89*
90*  Restrictions
91*  ============
92*
93*  If SIDE = 'Left' and INCV = 1, then the row process having the first
94*  entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95*  MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96*  the last equality must be satisfied.
97*
98*  If SIDE = 'Right' and INCV = M_V then the column process having the
99*  first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100*  MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101*  the last equality must be satisfied.
102*
103*  Arguments
104*  =========
105*
106*  SIDE    (global input) CHARACTER
107*          = 'L': form  Q * sub( C ),
108*          = 'R': form  sub( C ) * Q, Q = Q**T.
109*
110*  M       (global input) INTEGER
111*          The number of rows to be operated on i.e the number of rows
112*          of the distributed submatrix sub( C ). M >= 0.
113*
114*  N       (global input) INTEGER
115*          The number of columns to be operated on i.e the number of
116*          columns of the distributed submatrix sub( C ). N >= 0.
117*
118*  L       (global input) INTEGER
119*          The columns of the distributed submatrix sub( A ) containing
120*          the meaningful part of the Householder reflectors.
121*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122*
123*  V       (local input) REAL pointer into the local memory
124*          to an array of dimension (LLD_V,*) containing the local
125*          pieces of the distributed vectors V representing the
126*          Householder transformation Q,
127*             V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128*             V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129*             V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130*             V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131*
132*          The vector v in the representation of Q. V is not used if
133*          TAU = 0.
134*
135*  IV      (global input) INTEGER
136*          The row index in the global array V indicating the first
137*          row of sub( V ).
138*
139*  JV      (global input) INTEGER
140*          The column index in the global array V indicating the
141*          first column of sub( V ).
142*
143*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
144*          The array descriptor for the distributed matrix V.
145*
146*  INCV    (global input) INTEGER
147*          The global increment for the elements of V. Only two values
148*          of INCV are supported in this version, namely 1 and M_V.
149*          INCV must not be zero.
150*
151*  TAU     (local input) REAL, array, dimension  LOCc(JV) if
152*          INCV = 1, and LOCr(IV) otherwise. This array contains the
153*          Householder scalars related to the Householder vectors.
154*          TAU is tied to the distributed matrix V.
155*
156*  C       (local input/local output) REAL pointer into the
157*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158*          containing the local pieces of sub( C ). On exit, sub( C )
159*          is overwritten by the Q * sub( C ) if SIDE = 'L', or
160*          sub( C ) * Q if SIDE = 'R'.
161*
162*  IC      (global input) INTEGER
163*          The row index in the global array C indicating the first
164*          row of sub( C ).
165*
166*  JC      (global input) INTEGER
167*          The column index in the global array C indicating the
168*          first column of sub( C ).
169*
170*  DESCC   (global and local input) INTEGER array of dimension DLEN_.
171*          The array descriptor for the distributed matrix C.
172*
173*  WORK    (local workspace) REAL array, dimension (LWORK)
174*          If INCV = 1,
175*            if SIDE = 'L',
176*              if IVCOL = ICCOL,
177*                LWORK >= NqC0
178*              else
179*                LWORK >= MpC0 + MAX( 1, NqC0 )
180*              end if
181*            else if SIDE = 'R',
182*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184*            end if
185*          else if INCV = M_V,
186*            if SIDE = 'L',
187*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189*            else if SIDE = 'R',
190*              if IVROW = ICROW,
191*                LWORK >= MpC0
192*              else
193*                LWORK >= NqC0 + MAX( 1, MpC0 )
194*              end if
195*            end if
196*          end if
197*
198*          where LCM is the least common multiple of NPROW and NPCOL and
199*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200*          LCMQ = LCM / NPCOL,
201*
202*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207*
208*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210*          the subroutine BLACS_GRIDINFO.
211*
212*  Alignment requirements
213*  ======================
214*
215*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216*  must verify some alignment properties, namely the following
217*  expressions should be true:
218*
219*  MB_V = NB_V,
220*
221*  If INCV = 1,
222*    If SIDE = 'Left',
223*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224*    If SIDE = 'Right',
225*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226*  else if INCV = M_V,
227*    If SIDE = 'Left',
228*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229*    If SIDE = 'Right',
230*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231*  end if
232*
233*  =====================================================================
234*
235*     .. Parameters ..
236      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237     $                   LLD_, MB_, M_, NB_, N_, RSRC_
238      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
239     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
240     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
241      REAL               ONE, ZERO
242      PARAMETER          ( ONE  = 1.0E+0, ZERO = 0.0E+0 )
243*     ..
244*     .. Local Scalars ..
245      LOGICAL            CCBLCK, CRBLCK, LEFT
246      CHARACTER          COLBTOP, ROWBTOP
247      INTEGER            ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
248     $                   ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1,
249     $                   IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV,
250     $                   IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2,
251     $                   MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW,
252     $                   NQC2, NQV, RDEST
253      REAL               TAULOC
254*     ..
255*     .. External Subroutines ..
256      EXTERNAL           BLACS_GRIDINFO, INFOG2L, PB_TOPGET, PBSTRNV,
257     $                   SAXPY, SCOPY, SGEBR2D, SGEBS2D,
258     $                   SGEMV, SGER, SGERV2D, SGESD2D,
259     $                   SGSUM2D, SLASET
260*     ..
261*     .. External Functions ..
262      LOGICAL            LSAME
263      INTEGER            NUMROC
264      EXTERNAL           LSAME, NUMROC
265*     ..
266*     .. Intrinsic Functions ..
267      INTRINSIC          MIN, MOD
268*     ..
269*     .. Executable Statements ..
270*
271*     Quick return if possible
272*
273      IF( M.LE.0 .OR. N.LE.0 )
274     $   RETURN
275*
276*     Get grid parameters.
277*
278      ICTXT = DESCC( CTXT_ )
279      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
280*
281*     Figure local indexes
282*
283      LEFT = LSAME( SIDE, 'L' )
284      CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
285     $              IVROW, IVCOL )
286      IROFFV = MOD( IV-1, DESCV( NB_ ) )
287      MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW )
288      IF( MYROW.EQ.IVROW )
289     $   MPV = MPV - IROFFV
290      ICOFFV = MOD( JV-1, DESCV( NB_ ) )
291      NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL )
292      IF( MYCOL.EQ.IVCOL )
293     $   NQV = NQV - ICOFFV
294      LDV = DESCV( LLD_ )
295      NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ),
296     $              NPCOL )
297      LDV = DESCV( LLD_ )
298      IIV = MIN( IIV, LDV )
299      JJV = MIN( JJV, NCV )
300      IOFFV = IIV+(JJV-1)*LDV
301      NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ),
302     $              NPCOL )
303      CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
304     $              IIC1, JJC1, ICROW1, ICCOL1 )
305      IROFFC1 = MOD( IC-1, DESCC( MB_ ) )
306      ICOFFC1 = MOD( JC-1, DESCC( NB_ ) )
307      LDC = DESCC( LLD_ )
308      IIC1 = MIN( IIC1, LDC )
309      JJC1 = MIN( JJC1, MAX( 1, NCC ) )
310      IOFFC1 = IIC1 + ( JJC1-1 ) * LDC
311*
312      IF( LEFT ) THEN
313         CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
314     $                 IIC2, JJC2, ICROW2, ICCOL2 )
315         IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) )
316         ICOFFC2 = MOD( JC-1, DESCC( NB_ ) )
317         NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL )
318         IF( MYCOL.EQ.ICCOL2 )
319     $      NQC2 = NQC2 - ICOFFC2
320      ELSE
321         CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL,
322     $                 IIC2, JJC2, ICROW2, ICCOL2 )
323         IROFFC2 = MOD( IC-1, DESCC( MB_ ) )
324         MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW )
325         IF( MYROW.EQ.ICROW2 )
326     $      MPC2 = MPC2 - IROFFC2
327         ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) )
328      END IF
329      IIC2 = MIN( IIC2, LDC )
330      JJC2 = MIN( JJC2, NCC )
331      IOFFC2 = IIC2 + ( JJC2-1 ) * LDC
332*
333*     Is sub( C ) only distributed over a process row ?
334*
335      CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) )
336*
337*     Is sub( C ) only distributed over a process column ?
338*
339      CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) )
340*
341      IF( LEFT ) THEN
342*
343         IF( CRBLCK ) THEN
344            RDEST = ICROW2
345         ELSE
346            RDEST = -1
347         END IF
348*
349         IF( CCBLCK ) THEN
350*
351*           sub( C ) is distributed over a process column
352*
353            IF( DESCV( M_ ).EQ.INCV ) THEN
354*
355*              Transpose row vector V (ICOFFV = IROFFC2)
356*
357               IPW = MPV+1
358               CALL PBSTRNV( ICTXT, 'Rowwise', 'Transpose', M,
359     $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
360     $                       ZERO,
361     $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
362     $                       WORK( IPW ) )
363*
364*              Perform the local computation within a process column
365*
366               IF( MYCOL.EQ.ICCOL2 ) THEN
367*
368                  IF( MYROW.EQ.IVROW ) THEN
369*
370                     CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
371     $                             TAU( IIV ), 1 )
372                     TAULOC = TAU( IIV )
373*
374                  ELSE
375*
376                     CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1,
377     $                             TAULOC, 1, IVROW, MYCOL )
378*
379                  END IF
380*
381                  IF( TAULOC.NE.ZERO ) THEN
382*
383*                    w := sub( C )' * v
384*
385                     IF( MPV.GT.0 ) THEN
386                        CALL SGEMV( 'Transpose', MPV, NQC2, ONE,
387     $                              C( IOFFC2 ), LDC, WORK, 1, ZERO,
388     $                              WORK( IPW ), 1 )
389                     ELSE
390                        CALL SLASET( 'All', NQC2, 1, ZERO, ZERO,
391     $                               WORK( IPW ), MAX( 1, NQC2 ) )
392                     END IF
393                     IF( MYROW.EQ.ICROW1 )
394     $                  CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
395     $                              WORK( IPW ), MAX( 1, NQC2 ) )
396*
397                     CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
398     $                             WORK( IPW ), MAX( 1, NQC2 ), RDEST,
399     $                             MYCOL )
400*
401*                    sub( C ) := sub( C ) - v * w'
402*
403                     IF( MYROW.EQ.ICROW1 )
404     $                  CALL SAXPY( NQC2, -TAULOC, WORK( IPW ),
405     $                              MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
406                     CALL SGER( MPV, NQC2, -TAULOC, WORK, 1,
407     $                          WORK( IPW ), 1, C( IOFFC2 ), LDC )
408                  END IF
409*
410               END IF
411*
412            ELSE
413*
414*              V is a column vector
415*
416               IF( IVCOL.EQ.ICCOL2 ) THEN
417*
418*                 Perform the local computation within a process column
419*
420                  IF( MYCOL.EQ.ICCOL2 ) THEN
421*
422                     TAULOC = TAU( JJV )
423*
424                     IF( TAULOC.NE.ZERO ) THEN
425*
426*                       w := sub( C )' * v
427*
428                        IF( MPV.GT.0 ) THEN
429                           CALL SGEMV( 'Transpose', MPV, NQC2, ONE,
430     $                                 C( IOFFC2 ), LDC, V( IOFFV ), 1,
431     $                                 ZERO, WORK, 1 )
432                        ELSE
433                           CALL SLASET( 'All', NQC2, 1, ZERO, ZERO,
434     $                                  WORK, MAX( 1, NQC2 ) )
435                        END IF
436                        IF( MYROW.EQ.ICROW1 )
437     $                     CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
438     $                                 WORK, MAX( 1, NQC2 ) )
439*
440                        CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
441     $                                WORK, MAX( 1, NQC2 ), RDEST,
442     $                                MYCOL )
443*
444*                       sub( C ) := sub( C ) - v * w'
445*
446                        IF( MYROW.EQ.ICROW1 )
447     $                     CALL SAXPY( NQC2, -TAULOC, WORK,
448     $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
449     $                                 LDC )
450                        CALL SGER( MPV, NQC2, -TAULOC, V( IOFFV ), 1,
451     $                             WORK, 1, C( IOFFC2 ), LDC )
452                     END IF
453*
454                  END IF
455*
456               ELSE
457*
458*                 Send V and TAU to the process column ICCOL2
459*
460                  IF( MYCOL.EQ.IVCOL ) THEN
461*
462                     IPW = MPV+1
463                     CALL SCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
464                     WORK( IPW ) = TAU( JJV )
465                     CALL SGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
466     $                             ICCOL2 )
467*
468                  ELSE IF( MYCOL.EQ.ICCOL2 ) THEN
469*
470                     IPW = MPV+1
471                     CALL SGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
472     $                             IVCOL )
473                     TAULOC = WORK( IPW )
474*
475                     IF( TAULOC.NE.ZERO ) THEN
476*
477*                       w := sub( C )' * v
478*
479                        IF( MPV.GT.0 ) THEN
480                           CALL SGEMV( 'Transpose', MPV, NQC2, ONE,
481     $                                 C( IOFFC2 ), LDC, WORK, 1, ZERO,
482     $                                 WORK( IPW ), 1 )
483                        ELSE
484                           CALL SLASET( 'All', NQC2, 1, ZERO, ZERO,
485     $                                  WORK( IPW ), MAX( 1, NQC2 ) )
486                        END IF
487                        IF( MYROW.EQ.ICROW1 )
488     $                     CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
489     $                                 WORK( IPW ), MAX( 1, NQC2 ) )
490*
491                        CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
492     $                                WORK( IPW ), MAX( 1, NQC2 ),
493     $                                RDEST, MYCOL )
494*
495*                       sub( C ) := sub( C ) - v * w'
496*
497                        IF( MYROW.EQ.ICROW1 )
498     $                     CALL SAXPY( NQC2, -TAULOC, WORK( IPW ),
499     $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
500     $                                 LDC )
501                        CALL SGER( MPV, NQC2, -TAULOC, WORK, 1,
502     $                             WORK( IPW ), 1, C( IOFFC2 ), LDC )
503                     END IF
504*
505                  END IF
506*
507               END IF
508*
509            END IF
510*
511         ELSE
512*
513*           sub( C ) is a proper distributed matrix
514*
515            IF( DESCV( M_ ).EQ.INCV ) THEN
516*
517*              Transpose and broadcast row vector V (ICOFFV=IROFFC2)
518*
519               IPW = MPV+1
520               CALL PBSTRNV( ICTXT, 'Rowwise', 'Transpose', M,
521     $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
522     $                       ZERO,
523     $                       WORK, 1, IVROW, IVCOL, ICROW2, -1,
524     $                       WORK( IPW ) )
525*
526*              Perform the local computation within a process column
527*
528               IF( MYROW.EQ.IVROW ) THEN
529*
530                  CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
531     $                          TAU( IIV ), 1 )
532                  TAULOC = TAU( IIV )
533*
534               ELSE
535*
536                  CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC,
537     $                          1, IVROW, MYCOL )
538*
539               END IF
540*
541               IF( TAULOC.NE.ZERO ) THEN
542*
543*                 w := sub( C )' * v
544*
545                  IF( MPV.GT.0 ) THEN
546                     CALL SGEMV( 'Transpose', MPV, NQC2, ONE,
547     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
548     $                           WORK( IPW ), 1 )
549                  ELSE
550                     CALL SLASET( 'All', NQC2, 1, ZERO, ZERO,
551     $                            WORK( IPW ), MAX( 1, NQC2 ) )
552                  END IF
553                  IF( MYROW.EQ.ICROW1 )
554     $               CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
555     $                           WORK( IPW ), MAX( 1, NQC2 ) )
556*
557                  CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
558     $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
559     $                          MYCOL )
560*
561*                 sub( C ) := sub( C ) - v * w'
562*
563                  IF( MYROW.EQ.ICROW1 )
564     $               CALL SAXPY( NQC2, -TAULOC, WORK( IPW ),
565     $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
566                  CALL SGER( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
567     $                       1, C( IOFFC2 ), LDC )
568               END IF
569*
570            ELSE
571*
572*              Broadcast column vector V
573*
574               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
575               IF( MYCOL.EQ.IVCOL ) THEN
576*
577                  IPW = MPV+1
578                  CALL SCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
579                  WORK( IPW ) = TAU( JJV )
580                  CALL SGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1,
581     $                          WORK, IPW )
582                  TAULOC = TAU( JJV )
583*
584               ELSE
585*
586                  IPW = MPV+1
587                  CALL SGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK,
588     $                          IPW, MYROW, IVCOL )
589                  TAULOC = WORK( IPW )
590*
591               END IF
592*
593               IF( TAULOC.NE.ZERO ) THEN
594*
595*                 w := sub( C )' * v
596*
597                  IF( MPV.GT.0 ) THEN
598                     CALL SGEMV( 'Transpose', MPV, NQC2, ONE,
599     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
600     $                           WORK( IPW ), 1 )
601                  ELSE
602                     CALL SLASET( 'All', NQC2, 1, ZERO, ZERO,
603     $                            WORK( IPW ), MAX( 1, NQC2 ) )
604                  END IF
605                  IF( MYROW.EQ.ICROW1 )
606     $               CALL SAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
607     $                           WORK( IPW ), MAX( 1, NQC2 ) )
608*
609                  CALL SGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
610     $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
611     $                          MYCOL )
612*
613*                 sub( C ) := sub( C ) - v * w'
614*
615                  IF( MYROW.EQ.ICROW1 )
616     $               CALL SAXPY( NQC2, -TAULOC, WORK( IPW ),
617     $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
618                  CALL SGER( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
619     $                       1, C( IOFFC2 ), LDC )
620               END IF
621*
622            END IF
623*
624         END IF
625*
626      ELSE
627*
628         IF( CCBLCK ) THEN
629            RDEST = MYROW
630         ELSE
631            RDEST = -1
632         END IF
633*
634         IF( CRBLCK ) THEN
635*
636*           sub( C ) is distributed over a process row
637*
638            IF( DESCV( M_ ).EQ.INCV ) THEN
639*
640*              V is a row vector
641*
642               IF( IVROW.EQ.ICROW2 ) THEN
643*
644*                 Perform the local computation within a process row
645*
646                  IF( MYROW.EQ.ICROW2 ) THEN
647*
648                     TAULOC = TAU( IIV )
649*
650                     IF( TAULOC.NE.ZERO ) THEN
651*
652*                       w := sub( C ) * v
653*
654                        IF( NQV.GT.0 ) THEN
655                           CALL SGEMV( 'No transpose', MPC2, NQV, ONE,
656     $                                 C( IOFFC2 ), LDC, V( IOFFV ),
657     $                                 LDV, ZERO, WORK, 1 )
658                        ELSE
659                           CALL SLASET( 'All', MPC2, 1, ZERO, ZERO,
660     $                                  WORK, MAX( 1, MPC2 ) )
661                        END IF
662                        IF( MYCOL.EQ.ICCOL1 )
663     $                     CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1,
664     $                                   WORK, 1 )
665*
666                        CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
667     $                                WORK, MAX( 1, MPC2 ), RDEST,
668     $                               ICCOL2 )
669*
670                        IF( MYCOL.EQ.ICCOL1 )
671     $                     CALL SAXPY( MPC2, -TAULOC, WORK, 1,
672     $                                 C( IOFFC1 ), 1 )
673*
674*                       sub( C ) := sub( C ) - w * v'
675*
676                        IF( MPC2.GT.0 .AND. NQV.GT.0 )
677     $                     CALL SGER( MPC2, NQV, -TAULOC, WORK, 1,
678     $                                V( IOFFV ), LDV, C( IOFFC2 ),
679     $                                LDC )
680                     END IF
681*
682                  END IF
683*
684               ELSE
685*
686*                 Send V and TAU to the process row ICROW2
687*
688                  IF( MYROW.EQ.IVROW ) THEN
689*
690                     IPW = NQV+1
691                     CALL SCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
692                     WORK( IPW ) = TAU( IIV )
693                     CALL SGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2,
694     $                             MYCOL )
695*
696                  ELSE IF( MYROW.EQ.ICROW2 ) THEN
697*
698                     IPW = NQV+1
699                     CALL SGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW,
700     $                             MYCOL )
701                     TAULOC = WORK( IPW )
702*
703                     IF( TAULOC.NE.ZERO ) THEN
704*
705*                       w := sub( C ) * v
706*
707                        IF( NQV.GT.0 ) THEN
708                           CALL SGEMV( 'No transpose', MPC2, NQV, ONE,
709     $                                 C( IOFFC2 ), LDC, WORK, 1, ZERO,
710     $                                 WORK( IPW ), 1 )
711                        ELSE
712                           CALL SLASET( 'All', MPC2, 1, ZERO, ZERO,
713     $                                  WORK( IPW ), MAX( 1, MPC2 ) )
714                        END IF
715                        IF( MYCOL.EQ.ICCOL1 )
716     $                     CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1,
717     $                                   WORK( IPW ), 1 )
718                        CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
719     $                                WORK( IPW ), MAX( 1, MPC2 ),
720     $                                RDEST, ICCOL2 )
721                        IF( MYCOL.EQ.ICCOL1 )
722     $                     CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
723     $                                 C( IOFFC1 ), 1 )
724*
725*                       sub( C ) := sub( C ) - w * v'
726*
727                        CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
728     $                             WORK, 1, C( IOFFC2 ), LDC )
729                     END IF
730*
731                  END IF
732*
733               END IF
734*
735            ELSE
736*
737*              Transpose column vector V (IROFFV = ICOFFC2)
738*
739               IPW = NQV+1
740               CALL PBSTRNV( ICTXT, 'Columnwise', 'Transpose', N,
741     $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
742     $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
743     $                       WORK( IPW ) )
744*
745*              Perform the local computation within a process column
746*
747               IF( MYROW.EQ.ICROW2 ) THEN
748*
749                  IF( MYCOL.EQ.IVCOL ) THEN
750*
751                     CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1,
752     $                             TAU( JJV ), 1 )
753                     TAULOC = TAU( JJV )
754*
755                  ELSE
756*
757                     CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC,
758     $                             1, MYROW, IVCOL )
759*
760                  END IF
761*
762                  IF( TAULOC.NE.ZERO ) THEN
763*
764*                    w := sub( C ) * v
765*
766                     IF( NQV.GT.0 ) THEN
767                        CALL SGEMV( 'No transpose', MPC2, NQV, ONE,
768     $                              C( IOFFC2 ), LDC, WORK, 1, ZERO,
769     $                              WORK( IPW ), 1 )
770                     ELSE
771                        CALL SLASET( 'All', MPC2, 1, ZERO, ZERO,
772     $                               WORK( IPW ), MAX( 1, MPC2 ) )
773                     END IF
774                     IF( MYCOL.EQ.ICCOL1 )
775     $                  CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1,
776     $                              WORK( IPW ), 1 )
777                     CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
778     $                             WORK( IPW ), MAX( 1, MPC2 ), RDEST,
779     $                             ICCOL2 )
780                     IF( MYCOL.EQ.ICCOL1 )
781     $                  CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
782     $                              C( IOFFC1 ), 1 )
783*
784*                    sub( C ) := sub( C ) - w * v'
785*
786                     CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
787     $                          WORK, 1, C( IOFFC2 ), LDC )
788                  END IF
789*
790               END IF
791*
792            END IF
793*
794         ELSE
795*
796*           sub( C ) is a proper distributed matrix
797*
798            IF( DESCV( M_ ).EQ.INCV ) THEN
799*
800*              Broadcast row vector V
801*
802               CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise',
803     $                         COLBTOP )
804               IF( MYROW.EQ.IVROW ) THEN
805*
806                  IPW = NQV+1
807                  CALL SCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
808                  WORK( IPW ) = TAU( IIV )
809                  CALL SGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
810     $                          WORK, IPW )
811                  TAULOC = TAU( IIV )
812*
813               ELSE
814*
815                  IPW = NQV+1
816                  CALL SGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
817     $                          WORK, IPW, IVROW, MYCOL )
818                  TAULOC = WORK( IPW )
819*
820               END IF
821*
822               IF( TAULOC.NE.ZERO ) THEN
823*
824*                 w := sub( C ) * v
825*
826                  IF( NQV.GT.0 ) THEN
827                     CALL SGEMV( 'No Transpose', MPC2, NQV, ONE,
828     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
829     $                           WORK( IPW ), 1 )
830                  ELSE
831                     CALL SLASET( 'All', MPC2, 1, ZERO, ZERO,
832     $                            WORK( IPW ), MAX( 1, MPC2 ) )
833                  END IF
834                  IF( MYCOL.EQ.ICCOL1 )
835     $               CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1,
836     $                           WORK( IPW ), 1 )
837*
838                  CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
839     $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
840     $                          ICCOL2 )
841                  IF( MYCOL.EQ.ICCOL1 )
842     $               CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
843     $                           C( IOFFC1 ), 1 )
844*
845*                 sub( C ) := sub( C ) - w * v'
846*
847                  CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
848     $                       1, C( IOFFC2 ), LDC )
849               END IF
850*
851            ELSE
852*
853*              Transpose and broadcast column vector V (ICOFFC2=IROFFV)
854*
855               IPW = NQV+1
856               CALL PBSTRNV( ICTXT, 'Columnwise', 'Transpose', N,
857     $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
858     $                       WORK, 1, IVROW, IVCOL, -1, ICCOL2,
859     $                       WORK( IPW ) )
860*
861*              Perform the local computation within a process column
862*
863               IF( MYCOL.EQ.IVCOL ) THEN
864*
865                  CALL SGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ),
866     $                          1 )
867                  TAULOC = TAU( JJV )
868*
869               ELSE
870*
871                  CALL SGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1,
872     $                          MYROW, IVCOL )
873*
874               END IF
875*
876               IF( TAULOC.NE.ZERO ) THEN
877*
878*                 w := sub( C ) * v
879*
880                  IF( NQV.GT.0 ) THEN
881                     CALL SGEMV( 'No transpose', MPC2, NQV, ONE,
882     $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
883     $                           WORK( IPW ), 1 )
884                  ELSE
885                     CALL SLASET( 'All', MPC2, 1, ZERO, ZERO,
886     $                            WORK( IPW ), MAX( 1, MPC2 ) )
887                  END IF
888                  IF( MYCOL.EQ.ICCOL1 )
889     $               CALL SAXPY( MPC2, ONE, C( IOFFC1 ), 1,
890     $                           WORK( IPW ), 1 )
891                  CALL SGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
892     $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
893     $                          ICCOL2 )
894                  IF( MYCOL.EQ.ICCOL1 )
895     $               CALL SAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
896     $                           C( IOFFC1 ), 1 )
897*
898*                 sub( C ) := sub( C ) - w * v'
899*
900                  CALL SGER( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
901     $                       1, C( IOFFC2 ), LDC )
902               END IF
903*
904            END IF
905*
906         END IF
907*
908      END IF
909*
910      RETURN
911*
912*     End of PSLARZ
913*
914      END
915