1      SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2     $                    JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4*  -- ScaLAPACK auxiliary routine (version 2.0.2) --
5*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6*     May 1 2012
7*
8*     .. Scalar Arguments ..
9      CHARACTER          SIDE, TRANS, DIRECT, STOREV
10      INTEGER            IC, IV, JC, JV, K, M, N
11*     ..
12*     .. Array Arguments ..
13      INTEGER            DESCC( * ), DESCV( * )
14      DOUBLE PRECISION   C( * ), T( * ), V( * ), WORK( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PDLARFB applies a real block reflector Q or its transpose Q**T to a
21*  real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1)
22*  from the left or the right.
23*
24*  Notes
25*  =====
26*
27*  Each global data object is described by an associated description
28*  vector.  This vector stores the information required to establish
29*  the mapping between an object element and its corresponding process
30*  and memory location.
31*
32*  Let A be a generic term for any 2D block cyclicly distributed array.
33*  Such a global array has an associated description vector DESCA.
34*  In the following comments, the character _ should be read as
35*  "of the global array".
36*
37*  NOTATION        STORED IN      EXPLANATION
38*  --------------- -------------- --------------------------------------
39*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
40*                                 DTYPE_A = 1.
41*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42*                                 the BLACS process grid A is distribu-
43*                                 ted over. The context itself is glo-
44*                                 bal, but the handle (the integer
45*                                 value) may vary.
46*  M_A    (global) DESCA( M_ )    The number of rows in the global
47*                                 array A.
48*  N_A    (global) DESCA( N_ )    The number of columns in the global
49*                                 array A.
50*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
51*                                 the rows of the array.
52*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
53*                                 the columns of the array.
54*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55*                                 row of the array A is distributed.
56*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57*                                 first column of the array A is
58*                                 distributed.
59*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
60*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
61*
62*  Let K be the number of rows or columns of a distributed matrix,
63*  and assume that its process grid has dimension p x q.
64*  LOCr( K ) denotes the number of elements of K that a process
65*  would receive if K were distributed over the p processes of its
66*  process column.
67*  Similarly, LOCc( K ) denotes the number of elements of K that a
68*  process would receive if K were distributed over the q processes of
69*  its process row.
70*  The values of LOCr() and LOCc() may be determined via a call to the
71*  ScaLAPACK tool function, NUMROC:
72*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74*  An upper bound for these quantities may be computed by:
75*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78*  Arguments
79*  =========
80*
81*  SIDE    (global input) CHARACTER
82*          = 'L': apply Q or Q**T from the Left;
83*          = 'R': apply Q or Q**T from the Right.
84*
85*  TRANS   (global input) CHARACTER
86*          = 'N':  No transpose, apply Q;
87*          = 'T':  Transpose, apply Q**T.
88*
89*  DIRECT  (global input) CHARACTER
90*          Indicates how Q is formed from a product of elementary
91*          reflectors
92*          = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93*          = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94*
95*  STOREV  (global input) CHARACTER
96*          Indicates how the vectors which define the elementary
97*          reflectors are stored:
98*          = 'C': Columnwise
99*          = 'R': Rowwise
100*
101*  M       (global input) INTEGER
102*          The number of rows to be operated on i.e the number of rows
103*          of the distributed submatrix sub( C ). M >= 0.
104*
105*  N       (global input) INTEGER
106*          The number of columns to be operated on i.e the number of
107*          columns of the distributed submatrix sub( C ). N >= 0.
108*
109*  K       (global input) INTEGER
110*          The order of the matrix T (= the number of elementary
111*          reflectors whose product defines the block reflector).
112*
113*  V       (local input) DOUBLE PRECISION pointer into the local memory
114*          to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115*          STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116*          SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117*          SIDE = 'R'. It contains the local pieces of the distributed
118*          vectors V representing the Householder transformation.
119*          See further details.
120*          If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121*          if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122*          if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123*
124*  IV      (global input) INTEGER
125*          The row index in the global array V indicating the first
126*          row of sub( V ).
127*
128*  JV      (global input) INTEGER
129*          The column index in the global array V indicating the
130*          first column of sub( V ).
131*
132*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
133*          The array descriptor for the distributed matrix V.
134*
135*  T       (local input) DOUBLE PRECISION array, dimension MB_V by MB_V
136*          if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137*          gular matrix T in the representation of the block reflector.
138*
139*  C       (local input/local output) DOUBLE PRECISION pointer into the
140*          local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141*          On entry, the M-by-N distributed matrix sub( C ). On exit,
142*          sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143*          sub( C )*Q or sub( C )*Q'.
144*
145*  IC      (global input) INTEGER
146*          The row index in the global array C indicating the first
147*          row of sub( C ).
148*
149*  JC      (global input) INTEGER
150*          The column index in the global array C indicating the
151*          first column of sub( C ).
152*
153*  DESCC   (global and local input) INTEGER array of dimension DLEN_.
154*          The array descriptor for the distributed matrix C.
155*
156*  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)
157*          If STOREV = 'C',
158*            if SIDE = 'L',
159*              LWORK >= ( NqC0 + MpC0 ) * K
160*            else if SIDE = 'R',
161*              LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162*                         NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163*                         MpC0 ) ) * K
164*            end if
165*          else if STOREV = 'R',
166*            if SIDE = 'L',
167*              LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168*                         MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169*                         NqC0 ) ) * K
170*            else if SIDE = 'R',
171*              LWORK >= ( MpC0 + NqC0 ) * K
172*            end if
173*          end if
174*
175*          where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176*
177*          IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178*          IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179*          IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180*          MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181*          NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182*
183*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187*          NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189*
190*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192*          the subroutine BLACS_GRIDINFO.
193*
194*  Alignment requirements
195*  ======================
196*
197*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198*  must verify some alignment properties, namely the following
199*  expressions should be true:
200*
201*  If STOREV = 'Columnwise'
202*    If SIDE = 'Left',
203*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204*    If SIDE = 'Right',
205*      ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206*  else if STOREV = 'Rowwise'
207*    If SIDE = 'Left',
208*      ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209*    If SIDE = 'Right',
210*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211*  end if
212*
213*  =====================================================================
214*
215*     .. Parameters ..
216      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217     $                   LLD_, MB_, M_, NB_, N_, RSRC_
218      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
219     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
220     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
221      DOUBLE PRECISION   ONE, ZERO
222      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
223*     ..
224*     .. Local Scalars ..
225      LOGICAL            FORWARD
226      CHARACTER          COLBTOP, ROWBTOP, TRANST, UPLO
227      INTEGER            HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
228     $                   ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV,
229     $                   ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV,
230     $                   IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV,
231     $                   ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND,
232     $                   JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC,
233     $                   MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV,
234     $                   NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE
235*     ..
236*     .. External Subroutines ..
237      EXTERNAL           BLACS_GRIDINFO, DGEBR2D, DGEBS2D,DGEMM,
238     $                   DGSUM2D, DLAMOV, DLASET, DTRBR2D,
239     $                   DTRBS2D, DTRMM, INFOG1L, INFOG2L, PB_TOPGET,
240     $                   PBDTRAN
241*     ..
242*     .. Intrinsic Functions ..
243      INTRINSIC          MAX, MIN, MOD
244*     ..
245*     .. External Functions ..
246      LOGICAL            LSAME
247      INTEGER            ICEIL, NUMROC
248      EXTERNAL           ICEIL, LSAME, NUMROC
249*     ..
250*     .. Executable Statements ..
251*
252*     Quick return if possible
253*
254      IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 )
255     $   RETURN
256*
257*     Get grid parameters
258*
259      ICTXT = DESCC( CTXT_ )
260      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
261*
262      IF( LSAME( TRANS, 'N' ) ) THEN
263          TRANST = 'T'
264      ELSE
265          TRANST = 'N'
266      END IF
267      FORWARD = LSAME( DIRECT, 'F' )
268      IF( FORWARD ) THEN
269         UPLO = 'U'
270      ELSE
271         UPLO = 'L'
272      END IF
273*
274      CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
275     $              IVROW, IVCOL )
276      CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC,
277     $              ICROW, ICCOL )
278      LDC = DESCC( LLD_ )
279      LDV = DESCV( LLD_ )
280      IIC = MIN( IIC, LDC )
281      IIV = MIN( IIV, LDV )
282      IROFFC = MOD( IC-1, DESCC( MB_ ) )
283      ICOFFC = MOD( JC-1, DESCC( NB_ ) )
284      MBV = DESCV( MB_ )
285      NBV = DESCV( NB_ )
286      IROFFV = MOD( IV-1, MBV )
287      ICOFFV = MOD( JV-1, NBV )
288      MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW )
289      NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL )
290      IF( MYCOL.EQ.ICCOL )
291     $   NQC = NQC - ICOFFC
292      IF( MYROW.EQ.ICROW )
293     $   MPC = MPC - IROFFC
294      JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) )
295      JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL,
296     $                                DESCV( CSRC_ ), NPCOL ) ) )
297      IOFFC = IIC + ( JJC-1 ) * LDC
298      IOFFV = IIV + ( JJV-1 ) * LDV
299*
300      IF( LSAME( STOREV, 'C' ) ) THEN
301*
302*        V is stored columnwise
303*
304         IF( LSAME( SIDE, 'L' ) ) THEN
305*
306*           Form  Q*sub( C )  or  Q'*sub( C  )
307*
308*           Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
309*           WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
310*           WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
311*
312            IPV = 1
313            IPW = IPV + MPC * K
314            LV = MAX( 1, MPC )
315            LW = MAX( 1, NQC )
316*
317*           Broadcast V to the other process columns.
318*
319            CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
320            IF( MYCOL.EQ.IVCOL ) THEN
321               CALL DGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K,
322     $                       V( IOFFV ), LDV )
323               IF( MYROW.EQ.IVROW )
324     $            CALL DTRBS2D( ICTXT, 'Rowwise', ROWBTOP, UPLO,
325     $                          'Non unit', K, K, T, NBV )
326               CALL DLAMOV( 'All', MPC, K, V( IOFFV ), LDV, WORK( IPV ),
327     $                      LV )
328            ELSE
329               CALL DGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K,
330     $                       WORK( IPV ), LV, MYROW, IVCOL )
331               IF( MYROW.EQ.IVROW )
332     $            CALL DTRBR2D( ICTXT, 'Rowwise', ROWBTOP, UPLO,
333     $                          'Non unit', K, K, T, NBV, MYROW, IVCOL )
334            END IF
335*
336            IF( FORWARD ) THEN
337*
338*              WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
339*                          ( V2 ) zeroes upper triangular part of V1
340*
341               MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
342               ITOP = MAX( 0, MYDIST*MBV - IROFFV )
343               IIBEG = IIV
344               IIEND = IIBEG + MPC - 1
345               IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
346*
347   10          CONTINUE
348               IF( K-ITOP .GT.0 ) THEN
349                  CALL DLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO,
350     $                         ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV )
351                  MYDIST = MYDIST + NPROW
352                  ITOP = MYDIST * MBV - IROFFV
353                  IIBEG = IINXT + 1
354                  IINXT = MIN( IINXT+MBV, IIEND )
355                  GO TO 10
356               END IF
357*
358            ELSE
359*
360*              WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
361*                          ( V2 ) zeroes lower triangular part of V2
362*
363               JJ = JJV
364               IOFF = MOD( IV+M-K-1, MBV )
365               CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ),
366     $                       II, ILASTROW )
367               KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
368               IF( MYROW.EQ.ILASTROW )
369     $            KP = KP - IOFF
370               MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
371               ITOP = MYDIST * MBV - IOFF
372               IBASE = MIN( ITOP+MBV, K )
373               ITOP = MIN( MAX( 0, ITOP ), K )
374*
375   20          CONTINUE
376               IF( JJ.LE.( JJV+K-1 ) ) THEN
377                  HEIGHT = IBASE - ITOP
378                  CALL DLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO,
379     $                         WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
380                  CALL DLASET( 'Lower', KP, HEIGHT, ZERO, ONE,
381     $                         WORK( IPV+II-IIV+ITOP*LV ), LV )
382                  KP = MAX( 0, KP - HEIGHT )
383                  II = II + HEIGHT
384                  JJ = JJV + IBASE
385                  MYDIST = MYDIST + NPROW
386                  ITOP = MYDIST * MBV - IOFF
387                  IBASE = MIN( ITOP + MBV, K )
388                  ITOP = MIN( ITOP, K )
389                  GO TO 20
390               END IF
391*
392            END IF
393*
394*           WORK( IPW ) = C( IOFFC )' * V  (NQC x MPC x K) -> NQC x K
395*
396            IF( MPC.GT.0 ) THEN
397               CALL DGEMM( 'Transpose', 'No transpose', NQC, K, MPC,
398     $                    ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
399     $                    WORK( IPW ), LW )
400            ELSE
401               CALL DLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
402            END IF
403*
404            CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ),
405     $                    LW, IVROW, MYCOL )
406*
407            IF( MYROW.EQ.IVROW ) THEN
408*
409*              WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
410*
411               CALL DTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K,
412     $                     ONE, T, NBV, WORK( IPW ), LW )
413               CALL DGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K,
414     $                       WORK( IPW ), LW )
415            ELSE
416               CALL DGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K,
417     $                       WORK( IPW ), LW, IVROW, MYCOL )
418            END IF
419*
420*               C            C      -     V       *     W'
421*           C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
422*                        MPC x NQC    MPC x K         K x NQC
423*
424            CALL DGEMM( 'No transpose', 'Transpose', MPC, NQC, K, -ONE,
425     $                  WORK( IPV ), LV, WORK( IPW ), LW, ONE,
426     $                  C( IOFFC ), LDC )
427*
428         ELSE
429*
430*           Form sub( C )*Q or sub( C )*Q'
431*
432*           ICOFFC = IROFFV is required by the current transposition
433*           routine PBDTRAN
434*
435            NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW )
436            IF( MYROW.EQ.IVROW ) THEN
437               NPV = NPV0 - IROFFV
438            ELSE
439               NPV = NPV0
440            END IF
441            IF( MYCOL.EQ.ICCOL ) THEN
442               NQC0 = NQC + ICOFFC
443            ELSE
444               NQC0 = NQC
445            END IF
446*
447*           Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
448*           WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
449*           WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
450*           WORK( IPT ) is the workspace for PBDTRAN
451*
452            IPV = 1
453            IPW = IPV + K * NQC0
454            IPT = IPW + NPV0 * K
455            LV = MAX( 1, K )
456            LW = MAX( 1, NPV0 )
457*
458            IF( MYCOL.EQ.IVCOL ) THEN
459               IF( MYROW.EQ.IVROW ) THEN
460                  CALL DLASET( 'All', IROFFV, K, ZERO, ZERO,
461     $                         WORK( IPW ), LW )
462                  IPW1 = IPW + IROFFV
463                  CALL DLAMOV( 'All', NPV, K, V( IOFFV ), LDV,
464     $                         WORK( IPW1 ), LW )
465               ELSE
466                  IPW1 = IPW
467                  CALL DLAMOV( 'All', NPV, K, V( IOFFV ), LDV,
468     $                         WORK( IPW1 ), LW )
469               END IF
470*
471               IF( FORWARD ) THEN
472*
473*                 WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
474*                 triangular, zeroes upper triangular part of V1
475*
476                  MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
477                  ITOP = MAX( 0, MYDIST*MBV - IROFFV )
478                  IIBEG = IIV
479                  IIEND = IIBEG + NPV - 1
480                  IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
481*
482   30             CONTINUE
483                  IF( ( K-ITOP ).GT.0 ) THEN
484                     CALL DLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO,
485     $                            ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ),
486     $                            LW )
487                     MYDIST = MYDIST + NPROW
488                     ITOP = MYDIST * MBV - IROFFV
489                     IIBEG = IINXT + 1
490                     IINXT = MIN( IINXT+MBV, IIEND )
491                     GO TO 30
492                  END IF
493*
494               ELSE
495*
496*                 WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
497*                 triangular, zeroes lower triangular part of V2.
498*
499                  JJ = JJV
500                  CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW,
501     $                          DESCV( RSRC_ ), II, ILASTROW )
502                  IOFF = MOD( IV+N-K-1, MBV )
503                  KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
504                  IF( MYROW.EQ.ILASTROW )
505     $               KP = KP - IOFF
506                  MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
507                  ITOP = MYDIST * MBV - IOFF
508                  IBASE = MIN( ITOP+MBV, K )
509                  ITOP = MIN( MAX( 0, ITOP ), K )
510*
511   40             CONTINUE
512                  IF( JJ.LE.( JJV+K-1 ) ) THEN
513                     HEIGHT = IBASE - ITOP
514                     CALL DLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO,
515     $                            WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
516                     CALL DLASET( 'Lower', KP, HEIGHT, ZERO, ONE,
517     $                            WORK( IPW1+II-IIV+ITOP*LW ), LW )
518                     KP = MAX( 0, KP - HEIGHT )
519                     II = II + HEIGHT
520                     JJ = JJV + IBASE
521                     MYDIST = MYDIST + NPROW
522                     ITOP = MYDIST * MBV - IOFF
523                     IBASE = MIN( ITOP + MBV, K )
524                     ITOP = MIN( ITOP, K )
525                     GO TO 40
526                  END IF
527               END IF
528            END IF
529*
530            CALL PBDTRAN( ICTXT, 'Columnwise', 'Transpose', N+IROFFV, K,
531     $                    MBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV,
532     $                    IVROW, IVCOL, -1, ICCOL, WORK( IPT ) )
533*
534*           WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
535*
536            IF( MYCOL.EQ.ICCOL )
537     $         IPV = IPV + ICOFFC * LV
538*
539*           WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
540*           WORK( IPW ) = C( IOFFC ) * V  (MPC x NQC x K) -> MPC x K
541*
542            LW = MAX( 1, MPC )
543*
544            IF( NQC.GT.0 ) THEN
545               CALL DGEMM( 'No transpose', 'Transpose', MPC, K, NQC,
546     $                     ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
547     $                     WORK( IPW ), LW )
548            ELSE
549               CALL DLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
550            END IF
551*
552            CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
553     $                    LW, MYROW, IVCOL )
554*
555*           WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
556*
557            IF( MYCOL.EQ.IVCOL ) THEN
558               IF( MYROW.EQ.IVROW ) THEN
559*
560*                 Broadcast the block reflector to the other rows.
561*
562                  CALL DTRBS2D( ICTXT, 'Columnwise', ' ', UPLO,
563     $                          'Non unit', K, K, T, NBV )
564               ELSE
565                  CALL DTRBR2D( ICTXT, 'Columnwise', ' ', UPLO,
566     $                          'Non unit', K, K, T, NBV, IVROW, MYCOL )
567               END IF
568               CALL DTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K,
569     $                     ONE, T, NBV, WORK( IPW ), LW )
570*
571               CALL DGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
572     $                       LW )
573            ELSE
574               CALL DGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
575     $                       LW, MYROW, IVCOL )
576            END IF
577*
578*               C            C      -     W       *     V'
579*           C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
580*                        MPC x NQC    MPC x K         K x NQC
581*
582            CALL DGEMM( 'No transpose', 'No transpose', MPC, NQC, K,
583     $                  -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
584     $                  C( IOFFC ), LDC )
585         END IF
586*
587      ELSE
588*
589*        V is stored rowwise
590*
591         IF( LSAME( SIDE, 'L' ) ) THEN
592*
593*           Form Q*sub( C ) or Q'*sub( C )
594*
595*           IROFFC = ICOFFV is required by the current transposition
596*           routine PBDTRAN
597*
598            MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
599            IF( MYCOL.EQ.IVCOL ) THEN
600               MQV = MQV0 - ICOFFV
601            ELSE
602               MQV = MQV0
603            END IF
604            IF( MYROW.EQ.ICROW ) THEN
605               MPC0 = MPC + IROFFC
606            ELSE
607               MPC0 = MPC
608            END IF
609*
610*           Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
611*           WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
612*           WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
613*           WORK( IPT ) is the workspace for PBDTRAN
614*
615            IPV = 1
616            IPW = IPV + MPC0 * K
617            IPT = IPW + K * MQV0
618            LV = MAX( 1, MPC0 )
619            LW = MAX( 1, K )
620*
621            IF( MYROW.EQ.IVROW ) THEN
622               IF( MYCOL.EQ.IVCOL ) THEN
623                  CALL DLASET( 'All', K, ICOFFV, ZERO, ZERO,
624     $                         WORK( IPW ), LW )
625                  IPW1 = IPW + ICOFFV * LW
626                  CALL DLAMOV( 'All', K, MQV, V( IOFFV ), LDV,
627     $                         WORK( IPW1 ), LW )
628               ELSE
629                  IPW1 = IPW
630                  CALL DLAMOV( 'All', K, MQV, V( IOFFV ), LDV,
631     $                         WORK( IPW1 ), LW )
632               END IF
633*
634               IF( FORWARD ) THEN
635*
636*                 WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
637*                 triangular, zeroes lower triangular part of V1
638*
639                  MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
640                  ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
641                  JJBEG = JJV
642                  JJEND = JJV + MQV - 1
643                  JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
644*
645   50             CONTINUE
646                  IF( ( K-ILEFT ).GT.0 ) THEN
647                     CALL DLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
648     $                            ONE,
649     $                            WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ),
650     $                            LW )
651                     MYDIST = MYDIST + NPCOL
652                     ILEFT = MYDIST * NBV - ICOFFV
653                     JJBEG = JJNXT + 1
654                     JJNXT = MIN( JJNXT+NBV, JJEND )
655                     GO TO 50
656                  END IF
657*
658               ELSE
659*
660*                 WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
661*                 triangular, zeroes upper triangular part of V2.
662*
663                  II = IIV
664                  CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL,
665     $                          DESCV( CSRC_ ), JJ, ILASTCOL )
666                  IOFF = MOD( JV+M-K-1, NBV )
667                  KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
668                  IF( MYCOL.EQ.ILASTCOL )
669     $               KQ = KQ - IOFF
670                  MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
671                  ILEFT = MYDIST * NBV - IOFF
672                  IRIGHT = MIN( ILEFT+NBV, K )
673                  ILEFT = MIN( MAX( 0, ILEFT ), K )
674*
675   60             CONTINUE
676                  IF( II.LE.( IIV+K-1 ) ) THEN
677                     WIDE = IRIGHT - ILEFT
678                     CALL DLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO,
679     $                            WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
680                     CALL DLASET( 'Upper', WIDE, KQ, ZERO, ONE,
681     $                            WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW )
682                     KQ = MAX( 0, KQ - WIDE )
683                     II = IIV + IRIGHT
684                     JJ = JJ + WIDE
685                     MYDIST = MYDIST + NPCOL
686                     ILEFT = MYDIST * NBV - IOFF
687                     IRIGHT = MIN( ILEFT + NBV, K )
688                     ILEFT = MIN( ILEFT, K )
689                     GO TO 60
690                  END IF
691               END IF
692            END IF
693*
694*           WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
695*
696            CALL PBDTRAN( ICTXT, 'Rowwise', 'Transpose', K, M+ICOFFV,
697     $                    NBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV,
698     $                    IVROW, IVCOL, ICROW, -1, WORK( IPT ) )
699*
700*           WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
701*
702            IF( MYROW.EQ.ICROW )
703     $         IPV = IPV + IROFFC
704*
705*           WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
706*           WORK( IPW ) = C( IOFFC )' * V'  (NQC x MPC x K) -> NQC x K
707*
708            LW = MAX( 1, NQC )
709*
710            IF( MPC.GT.0 ) THEN
711               CALL DGEMM( 'Transpose', 'No transpose', NQC, K, MPC,
712     $                     ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
713     $                     WORK( IPW ), LW )
714            ELSE
715               CALL DLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
716            END IF
717*
718            CALL DGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ),
719     $                    LW, IVROW, MYCOL )
720*
721*           WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
722*
723            IF( MYROW.EQ.IVROW ) THEN
724               IF( MYCOL.EQ.IVCOL ) THEN
725*
726*                 Broadcast the block reflector to the other columns.
727*
728                  CALL DTRBS2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit',
729     $                          K, K, T, MBV )
730               ELSE
731                  CALL DTRBR2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit',
732     $                          K, K, T, MBV, MYROW, IVCOL )
733               END IF
734               CALL DTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K,
735     $                     ONE, T, MBV, WORK( IPW ), LW )
736*
737               CALL DGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K,
738     $                       WORK( IPW ), LW )
739            ELSE
740               CALL DGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K,
741     $                       WORK( IPW ), LW, IVROW, MYCOL )
742            END IF
743*
744*               C            C      -     V'      *     W'
745*           C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
746*                        MPC x NQC    MPC x K         K x NQC
747*
748            CALL DGEMM( 'No transpose', 'Transpose', MPC, NQC, K, -ONE,
749     $                  WORK( IPV ), LV, WORK( IPW ), LW, ONE,
750     $                  C( IOFFC ), LDC )
751*
752         ELSE
753*
754*           Form Q*sub( C ) or Q'*sub( C )
755*
756*           Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
757*           WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
758*           WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
759*
760            IPV = 1
761            IPW = IPV + K * NQC
762            LV = MAX( 1, K )
763            LW = MAX( 1, MPC )
764*
765*           Broadcast V to the other process rows.
766*
767            CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP )
768            IF( MYROW.EQ.IVROW ) THEN
769               CALL DGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC,
770     $                       V( IOFFV ), LDV )
771               IF( MYCOL.EQ.IVCOL )
772     $            CALL DTRBS2D( ICTXT, 'Columnwise', COLBTOP, UPLO,
773     $                          'Non unit', K, K, T, MBV )
774               CALL DLAMOV( 'All', K, NQC, V( IOFFV ), LDV, WORK( IPV ),
775     $                      LV )
776            ELSE
777               CALL DGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC,
778     $                       WORK( IPV ), LV, IVROW, MYCOL )
779               IF( MYCOL.EQ.IVCOL )
780     $            CALL DTRBR2D( ICTXT, 'Columnwise', COLBTOP, UPLO,
781     $                          'Non unit', K, K, T, MBV, IVROW, MYCOL )
782            END IF
783*
784            IF( FORWARD ) THEN
785*
786*              WORK(IPW) = ( V1 V2 ) where V1 is unit upper
787*              triangular, zeroes lower triangular part of V1
788*
789               MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
790               ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
791               JJBEG = JJV
792               JJEND = JJV + NQC - 1
793               JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
794*
795   70          CONTINUE
796               IF( ( K-ILEFT ).GT.0 ) THEN
797                  CALL DLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
798     $                            ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ),
799     $                            LV )
800                  MYDIST = MYDIST + NPCOL
801                  ILEFT = MYDIST * NBV - ICOFFV
802                  JJBEG = JJNXT + 1
803                  JJNXT = MIN( JJNXT+NBV, JJEND )
804                  GO TO 70
805               END IF
806*
807            ELSE
808*
809*              WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
810*              triangular, zeroes upper triangular part of V2.
811*
812               II = IIV
813               CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ),
814     $                       JJ, ILASTCOL )
815               IOFF = MOD( JV+N-K-1, NBV )
816               KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
817               IF( MYCOL.EQ.ILASTCOL )
818     $            KQ = KQ - IOFF
819               MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
820               ILEFT = MYDIST * NBV - IOFF
821               IRIGHT = MIN( ILEFT+NBV, K )
822               ILEFT = MIN( MAX( 0, ILEFT ), K )
823*
824   80          CONTINUE
825               IF( II.LE.( IIV+K-1 ) ) THEN
826                  WIDE = IRIGHT - ILEFT
827                  CALL DLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO,
828     $                         WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
829                  CALL DLASET( 'Upper', WIDE, KQ, ZERO, ONE,
830     $                         WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV )
831                  KQ = MAX( 0, KQ - WIDE )
832                  II = IIV + IRIGHT
833                  JJ = JJ + WIDE
834                  MYDIST = MYDIST + NPCOL
835                  ILEFT = MYDIST * NBV - IOFF
836                  IRIGHT = MIN( ILEFT + NBV, K )
837                  ILEFT = MIN( ILEFT, K )
838                  GO TO 80
839               END IF
840*
841            END IF
842*
843*           WORK( IPV ) is K x NQC = V = V( IOFFV )
844*           WORK( IPW ) = C( IOFFC ) * V'  (MPC x NQC x K) -> MPC x K
845*
846            IF( NQC.GT.0 ) THEN
847               CALL DGEMM( 'No Transpose', 'Transpose', MPC, K, NQC,
848     $                    ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
849     $                    WORK( IPW ), LW )
850            ELSE
851               CALL DLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
852            END IF
853*
854            CALL DGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
855     $                    LW, MYROW, IVCOL )
856*
857*           WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
858*
859            IF( MYCOL.EQ.IVCOL ) THEN
860               CALL DTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K,
861     $                     ONE, T, MBV, WORK( IPW ), LW )
862               CALL DGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
863     $                       LW )
864            ELSE
865               CALL DGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
866     $                       LW, MYROW, IVCOL )
867            END IF
868*
869*               C            C      -     W       *     V
870*           C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
871*                        MPC x NQC    MPC x K         K x NQC
872*
873            CALL DGEMM( 'No transpose', 'No transpose', MPC, NQC, K,
874     $                  -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
875     $                  C( IOFFC ), LDC )
876*
877         END IF
878*
879      END IF
880*
881      RETURN
882*
883*     End of PDLARFB
884*
885      END
886