1      SUBROUTINE PZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2     $                    JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4*  -- ScaLAPACK routine (version 2.0.2) --
5*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6*     May 1 2012
7*
8*     .. Scalar Arguments ..
9      CHARACTER          SIDE, TRANS, DIRECT, STOREV
10      INTEGER            IC, IV, JC, JV, K, M, N
11*     ..
12*     .. Array Arguments ..
13      INTEGER            DESCC( * ), DESCV( * )
14      COMPLEX*16         C( * ), T( * ), V( * ), WORK( * )
15*     ..
16*
17*  Purpose
18*  =======
19*
20*  PZLARFB applies a complex block reflector Q or its conjugate
21*  transpose Q**H to a complex M-by-N distributed matrix sub( C )
22*  denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right.
23*
24*  Notes
25*  =====
26*
27*  Each global data object is described by an associated description
28*  vector.  This vector stores the information required to establish
29*  the mapping between an object element and its corresponding process
30*  and memory location.
31*
32*  Let A be a generic term for any 2D block cyclicly distributed array.
33*  Such a global array has an associated description vector DESCA.
34*  In the following comments, the character _ should be read as
35*  "of the global array".
36*
37*  NOTATION        STORED IN      EXPLANATION
38*  --------------- -------------- --------------------------------------
39*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
40*                                 DTYPE_A = 1.
41*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42*                                 the BLACS process grid A is distribu-
43*                                 ted over. The context itself is glo-
44*                                 bal, but the handle (the integer
45*                                 value) may vary.
46*  M_A    (global) DESCA( M_ )    The number of rows in the global
47*                                 array A.
48*  N_A    (global) DESCA( N_ )    The number of columns in the global
49*                                 array A.
50*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
51*                                 the rows of the array.
52*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
53*                                 the columns of the array.
54*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55*                                 row of the array A is distributed.
56*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57*                                 first column of the array A is
58*                                 distributed.
59*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
60*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
61*
62*  Let K be the number of rows or columns of a distributed matrix,
63*  and assume that its process grid has dimension p x q.
64*  LOCr( K ) denotes the number of elements of K that a process
65*  would receive if K were distributed over the p processes of its
66*  process column.
67*  Similarly, LOCc( K ) denotes the number of elements of K that a
68*  process would receive if K were distributed over the q processes of
69*  its process row.
70*  The values of LOCr() and LOCc() may be determined via a call to the
71*  ScaLAPACK tool function, NUMROC:
72*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74*  An upper bound for these quantities may be computed by:
75*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78*  Arguments
79*  =========
80*
81*  SIDE    (global input) CHARACTER
82*          = 'L': apply Q or Q**H from the Left;
83*          = 'R': apply Q or Q**H from the Right.
84*
85*  TRANS   (global input) CHARACTER
86*          = 'N':  No transpose, apply Q;
87*          = 'C':  Conjugate transpose, apply Q**H.
88*
89*  DIRECT  (global input) CHARACTER
90*          Indicates how Q is formed from a product of elementary
91*          reflectors
92*          = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93*          = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94*
95*  STOREV  (global input) CHARACTER
96*          Indicates how the vectors which define the elementary
97*          reflectors are stored:
98*          = 'C': Columnwise
99*          = 'R': Rowwise
100*
101*  M       (global input) INTEGER
102*          The number of rows to be operated on i.e the number of rows
103*          of the distributed submatrix sub( C ). M >= 0.
104*
105*  N       (global input) INTEGER
106*          The number of columns to be operated on i.e the number of
107*          columns of the distributed submatrix sub( C ). N >= 0.
108*
109*  K       (global input) INTEGER
110*          The order of the matrix T (= the number of elementary
111*          reflectors whose product defines the block reflector).
112*
113*  V       (local input) COMPLEX*16 pointer into the local memory
114*          to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115*          STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116*          SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117*          SIDE = 'R'. It contains the local pieces of the distributed
118*          vectors V representing the Householder transformation.
119*          See further details.
120*          If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121*          if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122*          if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123*
124*  IV      (global input) INTEGER
125*          The row index in the global array V indicating the first
126*          row of sub( V ).
127*
128*  JV      (global input) INTEGER
129*          The column index in the global array V indicating the
130*          first column of sub( V ).
131*
132*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
133*          The array descriptor for the distributed matrix V.
134*
135*  T       (local input) COMPLEX*16 array, dimension MB_V by MB_V
136*          if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137*          gular matrix T in the representation of the block reflector.
138*
139*  C       (local input/local output) COMPLEX*16 pointer into the
140*          local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141*          On entry, the M-by-N distributed matrix sub( C ). On exit,
142*          sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143*          sub( C )*Q or sub( C )*Q'.
144*
145*  IC      (global input) INTEGER
146*          The row index in the global array C indicating the first
147*          row of sub( C ).
148*
149*  JC      (global input) INTEGER
150*          The column index in the global array C indicating the
151*          first column of sub( C ).
152*
153*  DESCC   (global and local input) INTEGER array of dimension DLEN_.
154*          The array descriptor for the distributed matrix C.
155*
156*  WORK    (local workspace) COMPLEX*16 array, dimension (LWORK)
157*          If STOREV = 'C',
158*            if SIDE = 'L',
159*              LWORK >= ( NqC0 + MpC0 ) * K
160*            else if SIDE = 'R',
161*              LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162*                         NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163*                         MpC0 ) ) * K
164*            end if
165*          else if STOREV = 'R',
166*            if SIDE = 'L',
167*              LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168*                         MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169*                         NqC0 ) ) * K
170*            else if SIDE = 'R',
171*              LWORK >= ( MpC0 + NqC0 ) * K
172*            end if
173*          end if
174*
175*          where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176*
177*          IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178*          IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179*          IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180*          MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181*          NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182*
183*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187*          NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189*
190*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192*          the subroutine BLACS_GRIDINFO.
193*
194*  Alignment requirements
195*  ======================
196*
197*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198*  must verify some alignment properties, namely the following
199*  expressions should be true:
200*
201*  If STOREV = 'Columnwise'
202*    If SIDE = 'Left',
203*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204*    If SIDE = 'Right',
205*      ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206*  else if STOREV = 'Rowwise'
207*    If SIDE = 'Left',
208*      ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209*    If SIDE = 'Right',
210*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211*  end if
212*
213*  =====================================================================
214*
215*     .. Parameters ..
216      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217     $                   LLD_, MB_, M_, NB_, N_, RSRC_
218      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
219     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
220     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
221      COMPLEX*16         ONE, ZERO
222      PARAMETER          ( ONE  = ( 1.0D+0, 0.0D+0 ),
223     $                     ZERO = ( 0.0D+0, 0.0D+0 ) )
224*     ..
225*     .. Local Scalars ..
226      LOGICAL            FORWARD
227      CHARACTER          COLBTOP, ROWBTOP, TRANST, UPLO
228      INTEGER            HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
229     $                   ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV,
230     $                   ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV,
231     $                   IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV,
232     $                   ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND,
233     $                   JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC,
234     $                   MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV,
235     $                   NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE
236*     ..
237*     .. External Subroutines ..
238      EXTERNAL           BLACS_GRIDINFO, INFOG1L, INFOG2L, PB_TOPGET,
239     $                   PBZTRAN, ZGEBR2D, ZGEBS2D, ZGEMM,
240     $                   ZGSUM2D, ZLAMOV, ZLASET, ZTRBR2D,
241     $                   ZTRBS2D, ZTRMM
242*     ..
243*     .. Intrinsic Functions ..
244      INTRINSIC          MAX, MIN, MOD
245*     ..
246*     .. External Functions ..
247      LOGICAL            LSAME
248      INTEGER            ICEIL, NUMROC
249      EXTERNAL           ICEIL, LSAME, NUMROC
250*     ..
251*     .. Executable Statements ..
252*
253*     Quick return if possible
254*
255      IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 )
256     $   RETURN
257*
258*     Get grid parameters
259*
260      ICTXT = DESCC( CTXT_ )
261      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
262*
263      IF( LSAME( TRANS, 'N' ) ) THEN
264          TRANST = 'C'
265      ELSE
266          TRANST = 'N'
267      END IF
268      FORWARD = LSAME( DIRECT, 'F' )
269      IF( FORWARD ) THEN
270         UPLO = 'U'
271      ELSE
272         UPLO = 'L'
273      END IF
274*
275      CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
276     $              IVROW, IVCOL )
277      CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC,
278     $              ICROW, ICCOL )
279      LDC = DESCC( LLD_ )
280      LDV = DESCV( LLD_ )
281      IIC = MIN( IIC, LDC )
282      IIV = MIN( IIV, LDV )
283      IROFFC = MOD( IC-1, DESCC( MB_ ) )
284      ICOFFC = MOD( JC-1, DESCC( NB_ ) )
285      MBV = DESCV( MB_ )
286      NBV = DESCV( NB_ )
287      IROFFV = MOD( IV-1, MBV )
288      ICOFFV = MOD( JV-1, NBV )
289      MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW )
290      NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL )
291      IF( MYCOL.EQ.ICCOL )
292     $   NQC = NQC - ICOFFC
293      IF( MYROW.EQ.ICROW )
294     $   MPC = MPC - IROFFC
295      JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) )
296      JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL,
297     $                                DESCV( CSRC_ ), NPCOL ) ) )
298      IOFFC = IIC + ( JJC-1 ) * LDC
299      IOFFV = IIV + ( JJV-1 ) * LDV
300*
301      IF( LSAME( STOREV, 'C' ) ) THEN
302*
303*        V is stored columnwise
304*
305         IF( LSAME( SIDE, 'L' ) ) THEN
306*
307*           Form  Q*sub( C )  or  Q'*sub( C  )
308*
309*           Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
310*           WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
311*           WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
312*
313            IPV = 1
314            IPW = IPV + MPC * K
315            LV = MAX( 1, MPC )
316            LW = MAX( 1, NQC )
317*
318*           Broadcast V to the other process columns.
319*
320            CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
321            IF( MYCOL.EQ.IVCOL ) THEN
322               CALL ZGEBS2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K,
323     $                       V( IOFFV ), LDV )
324               IF( MYROW.EQ.IVROW )
325     $            CALL ZTRBS2D( ICTXT, 'Rowwise', ROWBTOP, UPLO,
326     $                          'Non unit', K, K, T, NBV )
327               CALL ZLAMOV( 'All', MPC, K, V( IOFFV ), LDV, WORK( IPV ),
328     $                      LV )
329            ELSE
330               CALL ZGEBR2D( ICTXT, 'Rowwise', ROWBTOP, MPC, K,
331     $                       WORK( IPV ), LV, MYROW, IVCOL )
332               IF( MYROW.EQ.IVROW )
333     $            CALL ZTRBR2D( ICTXT, 'Rowwise', ROWBTOP, UPLO,
334     $                          'Non unit', K, K, T, NBV, MYROW, IVCOL )
335            END IF
336*
337            IF( FORWARD ) THEN
338*
339*              WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
340*                          ( V2 ) zeroes upper triangular part of V1
341*
342               MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
343               ITOP = MAX( 0, MYDIST*MBV - IROFFV )
344               IIBEG = IIV
345               IIEND = IIBEG + MPC - 1
346               IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
347*
348   10          CONTINUE
349               IF( K-ITOP .GT.0 ) THEN
350                  CALL ZLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO,
351     $                         ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV )
352                  MYDIST = MYDIST + NPROW
353                  ITOP = MYDIST * MBV - IROFFV
354                  IIBEG = IINXT + 1
355                  IINXT = MIN( IINXT+MBV, IIEND )
356                  GO TO 10
357               END IF
358*
359            ELSE
360*
361*              WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
362*                          ( V2 ) zeroes lower triangular part of V2
363*
364               JJ = JJV
365               IOFF = MOD( IV+M-K-1, MBV )
366               CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ),
367     $                       II, ILASTROW )
368               KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
369               IF( MYROW.EQ.ILASTROW )
370     $            KP = KP - IOFF
371               MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
372               ITOP = MYDIST * MBV - IOFF
373               IBASE = MIN( ITOP+MBV, K )
374               ITOP = MIN( MAX( 0, ITOP ), K )
375*
376   20          CONTINUE
377               IF( JJ.LE.( JJV+K-1 ) ) THEN
378                  HEIGHT = IBASE - ITOP
379                  CALL ZLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO,
380     $                         WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
381                  CALL ZLASET( 'Lower', KP, HEIGHT, ZERO, ONE,
382     $                         WORK( IPV+II-IIV+ITOP*LV ), LV )
383                  KP = MAX( 0, KP - HEIGHT )
384                  II = II + HEIGHT
385                  JJ = JJV + IBASE
386                  MYDIST = MYDIST + NPROW
387                  ITOP = MYDIST * MBV - IOFF
388                  IBASE = MIN( ITOP + MBV, K )
389                  ITOP = MIN( ITOP, K )
390                  GO TO 20
391               END IF
392*
393            END IF
394*
395*           WORK( IPW ) = C( IOFFC )' * V  (NQC x MPC x K) -> NQC x K
396*
397            IF( MPC.GT.0 ) THEN
398               CALL ZGEMM( 'Conjugate transpose', 'No transpose', NQC,
399     $                    K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ), LV,
400     $                    ZERO, WORK( IPW ), LW )
401            ELSE
402               CALL ZLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
403            END IF
404*
405            CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ),
406     $                    LW, IVROW, MYCOL )
407*
408            IF( MYROW.EQ.IVROW ) THEN
409*
410*              WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
411*
412               CALL ZTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K,
413     $                     ONE, T, NBV, WORK( IPW ), LW )
414               CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K,
415     $                       WORK( IPW ), LW )
416            ELSE
417               CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K,
418     $                       WORK( IPW ), LW, IVROW, MYCOL )
419            END IF
420*
421*               C            C      -     V       *     W'
422*           C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
423*                        MPC x NQC    MPC x K         K x NQC
424*
425            CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC, NQC,
426     $                  K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE,
427     $                  C( IOFFC ), LDC )
428*
429         ELSE
430*
431*           Form sub( C )*Q or sub( C )*Q'
432*
433*           ICOFFC = IROFFV is required by the current transposition
434*           routine PBZTRAN
435*
436            NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW )
437            IF( MYROW.EQ.IVROW ) THEN
438               NPV = NPV0 - IROFFV
439            ELSE
440               NPV = NPV0
441            END IF
442            IF( MYCOL.EQ.ICCOL ) THEN
443               NQC0 = NQC + ICOFFC
444            ELSE
445               NQC0 = NQC
446            END IF
447*
448*           Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
449*           WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
450*           WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
451*           WORK( IPT ) is the workspace for PBZTRAN
452*
453            IPV = 1
454            IPW = IPV + K * NQC0
455            IPT = IPW + NPV0 * K
456            LV = MAX( 1, K )
457            LW = MAX( 1, NPV0 )
458*
459            IF( MYCOL.EQ.IVCOL ) THEN
460               IF( MYROW.EQ.IVROW ) THEN
461                  CALL ZLASET( 'All', IROFFV, K, ZERO, ZERO,
462     $                         WORK( IPW ), LW )
463                  IPW1 = IPW + IROFFV
464                  CALL ZLAMOV( 'All', NPV, K, V( IOFFV ), LDV,
465     $                         WORK( IPW1 ), LW )
466               ELSE
467                  IPW1 = IPW
468                  CALL ZLAMOV( 'All', NPV, K, V( IOFFV ), LDV,
469     $                         WORK( IPW1 ), LW )
470               END IF
471*
472               IF( FORWARD ) THEN
473*
474*                 WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
475*                 triangular, zeroes upper triangular part of V1
476*
477                  MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
478                  ITOP = MAX( 0, MYDIST*MBV - IROFFV )
479                  IIBEG = IIV
480                  IIEND = IIBEG + NPV - 1
481                  IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
482*
483   30             CONTINUE
484                  IF( ( K-ITOP ).GT.0 ) THEN
485                     CALL ZLASET( 'Upper', IINXT-IIBEG+1, K-ITOP, ZERO,
486     $                            ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ),
487     $                            LW )
488                     MYDIST = MYDIST + NPROW
489                     ITOP = MYDIST * MBV - IROFFV
490                     IIBEG = IINXT + 1
491                     IINXT = MIN( IINXT+MBV, IIEND )
492                     GO TO 30
493                  END IF
494*
495               ELSE
496*
497*                 WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
498*                 triangular, zeroes lower triangular part of V2.
499*
500                  JJ = JJV
501                  CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW,
502     $                          DESCV( RSRC_ ), II, ILASTROW )
503                  IOFF = MOD( IV+N-K-1, MBV )
504                  KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
505                  IF( MYROW.EQ.ILASTROW )
506     $               KP = KP - IOFF
507                  MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
508                  ITOP = MYDIST * MBV - IOFF
509                  IBASE = MIN( ITOP+MBV, K )
510                  ITOP = MIN( MAX( 0, ITOP ), K )
511*
512   40             CONTINUE
513                  IF( JJ.LE.( JJV+K-1 ) ) THEN
514                     HEIGHT = IBASE - ITOP
515                     CALL ZLASET( 'All', KP, ITOP-JJ+JJV, ZERO, ZERO,
516     $                            WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
517                     CALL ZLASET( 'Lower', KP, HEIGHT, ZERO, ONE,
518     $                            WORK( IPW1+II-IIV+ITOP*LW ), LW )
519                     KP = MAX( 0, KP - HEIGHT )
520                     II = II + HEIGHT
521                     JJ = JJV + IBASE
522                     MYDIST = MYDIST + NPROW
523                     ITOP = MYDIST * MBV - IOFF
524                     IBASE = MIN( ITOP + MBV, K )
525                     ITOP = MIN( ITOP, K )
526                     GO TO 40
527                  END IF
528               END IF
529            END IF
530*
531            CALL PBZTRAN( ICTXT, 'Columnwise', 'Conjugate transpose',
532     $                    N+IROFFV, K, MBV, WORK( IPW ), LW, ZERO,
533     $                    WORK( IPV ), LV, IVROW, IVCOL, -1, ICCOL,
534     $                    WORK( IPT ) )
535*
536*           WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
537*
538            IF( MYCOL.EQ.ICCOL )
539     $         IPV = IPV + ICOFFC * LV
540*
541*           WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
542*           WORK( IPW ) = C( IOFFC ) * V  (MPC x NQC x K) -> MPC x K
543*
544            LW = MAX( 1, MPC )
545*
546            IF( NQC.GT.0 ) THEN
547               CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC,
548     $                     K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ),
549     $                     LV, ZERO, WORK( IPW ), LW )
550            ELSE
551               CALL ZLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
552            END IF
553*
554            CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
555     $                    LW, MYROW, IVCOL )
556*
557*           WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
558*
559            IF( MYCOL.EQ.IVCOL ) THEN
560               IF( MYROW.EQ.IVROW ) THEN
561*
562*                 Broadcast the block reflector to the other rows.
563*
564                  CALL ZTRBS2D( ICTXT, 'Columnwise', ' ', UPLO,
565     $                          'Non unit', K, K, T, NBV )
566               ELSE
567                  CALL ZTRBR2D( ICTXT, 'Columnwise', ' ', UPLO,
568     $                          'Non unit', K, K, T, NBV, IVROW, MYCOL )
569               END IF
570               CALL ZTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K,
571     $                     ONE, T, NBV, WORK( IPW ), LW )
572*
573               CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
574     $                       LW )
575            ELSE
576               CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
577     $                       LW, MYROW, IVCOL )
578            END IF
579*
580*               C            C      -     W       *     V'
581*           C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
582*                        MPC x NQC    MPC x K         K x NQC
583*
584            CALL ZGEMM( 'No transpose', 'No transpose', MPC, NQC, K,
585     $                  -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
586     $                  C( IOFFC ), LDC )
587         END IF
588*
589      ELSE
590*
591*        V is stored rowwise
592*
593         IF( LSAME( SIDE, 'L' ) ) THEN
594*
595*           Form Q*sub( C ) or Q'*sub( C )
596*
597*           IROFFC = ICOFFV is required by the current transposition
598*           routine PBZTRAN
599*
600            MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
601            IF( MYCOL.EQ.IVCOL ) THEN
602               MQV = MQV0 - ICOFFV
603            ELSE
604               MQV = MQV0
605            END IF
606            IF( MYROW.EQ.ICROW ) THEN
607               MPC0 = MPC + IROFFC
608            ELSE
609               MPC0 = MPC
610            END IF
611*
612*           Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
613*           WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
614*           WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
615*           WORK( IPT ) is the workspace for PBZTRAN
616*
617            IPV = 1
618            IPW = IPV + MPC0 * K
619            IPT = IPW + K * MQV0
620            LV = MAX( 1, MPC0 )
621            LW = MAX( 1, K )
622*
623            IF( MYROW.EQ.IVROW ) THEN
624               IF( MYCOL.EQ.IVCOL ) THEN
625                  CALL ZLASET( 'All', K, ICOFFV, ZERO, ZERO,
626     $                         WORK( IPW ), LW )
627                  IPW1 = IPW + ICOFFV * LW
628                  CALL ZLAMOV( 'All', K, MQV, V( IOFFV ), LDV,
629     $                         WORK( IPW1 ), LW )
630               ELSE
631                  IPW1 = IPW
632                  CALL ZLAMOV( 'All', K, MQV, V( IOFFV ), LDV,
633     $                         WORK( IPW1 ), LW )
634               END IF
635*
636               IF( FORWARD ) THEN
637*
638*                 WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
639*                 triangular, zeroes lower triangular part of V1
640*
641                  MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
642                  ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
643                  JJBEG = JJV
644                  JJEND = JJV + MQV - 1
645                  JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
646*
647   50             CONTINUE
648                  IF( ( K-ILEFT ).GT.0 ) THEN
649                     CALL ZLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
650     $                            ONE,
651     $                            WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ),
652     $                            LW )
653                     MYDIST = MYDIST + NPCOL
654                     ILEFT = MYDIST * NBV - ICOFFV
655                     JJBEG = JJNXT + 1
656                     JJNXT = MIN( JJNXT+NBV, JJEND )
657                     GO TO 50
658                  END IF
659*
660               ELSE
661*
662*                 WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
663*                 triangular, zeroes upper triangular part of V2.
664*
665                  II = IIV
666                  CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL,
667     $                          DESCV( CSRC_ ), JJ, ILASTCOL )
668                  IOFF = MOD( JV+M-K-1, NBV )
669                  KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
670                  IF( MYCOL.EQ.ILASTCOL )
671     $               KQ = KQ - IOFF
672                  MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
673                  ILEFT = MYDIST * NBV - IOFF
674                  IRIGHT = MIN( ILEFT+NBV, K )
675                  ILEFT = MIN( MAX( 0, ILEFT ), K )
676*
677   60             CONTINUE
678                  IF( II.LE.( IIV+K-1 ) ) THEN
679                     WIDE = IRIGHT - ILEFT
680                     CALL ZLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO,
681     $                            WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
682                     CALL ZLASET( 'Upper', WIDE, KQ, ZERO, ONE,
683     $                            WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW )
684                     KQ = MAX( 0, KQ - WIDE )
685                     II = IIV + IRIGHT
686                     JJ = JJ + WIDE
687                     MYDIST = MYDIST + NPCOL
688                     ILEFT = MYDIST * NBV - IOFF
689                     IRIGHT = MIN( ILEFT + NBV, K )
690                     ILEFT = MIN( ILEFT, K )
691                     GO TO 60
692                  END IF
693               END IF
694            END IF
695*
696*           WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
697*
698            CALL PBZTRAN( ICTXT, 'Rowwise', 'Conjugate transpose', K,
699     $                    M+ICOFFV, NBV, WORK( IPW ), LW, ZERO,
700     $                    WORK( IPV ), LV, IVROW, IVCOL, ICROW, -1,
701     $                    WORK( IPT ) )
702*
703*           WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
704*
705            IF( MYROW.EQ.ICROW )
706     $         IPV = IPV + IROFFC
707*
708*           WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
709*           WORK( IPW ) = C( IOFFC )' * V'  (NQC x MPC x K) -> NQC x K
710*
711            LW = MAX( 1, NQC )
712*
713            IF( MPC.GT.0 ) THEN
714               CALL ZGEMM( 'Conjugate transpose', 'No transpose', NQC,
715     $                     K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ),
716     $                     LV, ZERO, WORK( IPW ), LW )
717            ELSE
718               CALL ZLASET( 'All', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
719            END IF
720*
721            CALL ZGSUM2D( ICTXT, 'Columnwise', ' ', NQC, K, WORK( IPW ),
722     $                    LW, IVROW, MYCOL )
723*
724*           WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
725*
726            IF( MYROW.EQ.IVROW ) THEN
727               IF( MYCOL.EQ.IVCOL ) THEN
728*
729*                 Broadcast the block reflector to the other columns.
730*
731                  CALL ZTRBS2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit',
732     $                          K, K, T, MBV )
733               ELSE
734                  CALL ZTRBR2D( ICTXT, 'Rowwise', ' ', UPLO, 'Non unit',
735     $                          K, K, T, MBV, MYROW, IVCOL )
736               END IF
737               CALL ZTRMM( 'Right', UPLO, TRANST, 'Non unit', NQC, K,
738     $                     ONE, T, MBV, WORK( IPW ), LW )
739*
740               CALL ZGEBS2D( ICTXT, 'Columnwise', ' ', NQC, K,
741     $                       WORK( IPW ), LW )
742            ELSE
743               CALL ZGEBR2D( ICTXT, 'Columnwise', ' ', NQC, K,
744     $                       WORK( IPW ), LW, IVROW, MYCOL )
745            END IF
746*
747*               C            C      -     V'      *     W'
748*           C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
749*                        MPC x NQC    MPC x K         K x NQC
750*
751            CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC, NQC,
752     $                  K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE,
753     $                  C( IOFFC ), LDC )
754*
755         ELSE
756*
757*           Form Q*sub( C ) or Q'*sub( C )
758*
759*           Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
760*           WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
761*           WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
762*
763            IPV = 1
764            IPW = IPV + K * NQC
765            LV = MAX( 1, K )
766            LW = MAX( 1, MPC )
767*
768*           Broadcast V to the other process rows.
769*
770            CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP )
771            IF( MYROW.EQ.IVROW ) THEN
772               CALL ZGEBS2D( ICTXT, 'Columnwise', COLBTOP, K, NQC,
773     $                       V( IOFFV ), LDV )
774               IF( MYCOL.EQ.IVCOL )
775     $            CALL ZTRBS2D( ICTXT, 'Columnwise', COLBTOP, UPLO,
776     $                          'Non unit', K, K, T, MBV )
777               CALL ZLAMOV( 'All', K, NQC, V( IOFFV ), LDV, WORK( IPV ),
778     $                      LV )
779            ELSE
780               CALL ZGEBR2D( ICTXT, 'Columnwise', COLBTOP, K, NQC,
781     $                       WORK( IPV ), LV, IVROW, MYCOL )
782               IF( MYCOL.EQ.IVCOL )
783     $            CALL ZTRBR2D( ICTXT, 'Columnwise', COLBTOP, UPLO,
784     $                          'Non unit', K, K, T, MBV, IVROW, MYCOL )
785            END IF
786*
787            IF( FORWARD ) THEN
788*
789*              WORK(IPW) = ( V1 V2 ) where V1 is unit upper
790*              triangular, zeroes lower triangular part of V1
791*
792               MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
793               ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
794               JJBEG = JJV
795               JJEND = JJV + NQC - 1
796               JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
797*
798   70          CONTINUE
799               IF( ( K-ILEFT ).GT.0 ) THEN
800                  CALL ZLASET( 'Lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
801     $                            ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ),
802     $                            LV )
803                  MYDIST = MYDIST + NPCOL
804                  ILEFT = MYDIST * NBV - ICOFFV
805                  JJBEG = JJNXT + 1
806                  JJNXT = MIN( JJNXT+NBV, JJEND )
807                  GO TO 70
808               END IF
809*
810            ELSE
811*
812*              WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
813*              triangular, zeroes upper triangular part of V2.
814*
815               II = IIV
816               CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ),
817     $                       JJ, ILASTCOL )
818               IOFF = MOD( JV+N-K-1, NBV )
819               KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
820               IF( MYCOL.EQ.ILASTCOL )
821     $            KQ = KQ - IOFF
822               MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
823               ILEFT = MYDIST * NBV - IOFF
824               IRIGHT = MIN( ILEFT+NBV, K )
825               ILEFT = MIN( MAX( 0, ILEFT ), K )
826*
827   80          CONTINUE
828               IF( II.LE.( IIV+K-1 ) ) THEN
829                  WIDE = IRIGHT - ILEFT
830                  CALL ZLASET( 'All', ILEFT-II+IIV, KQ, ZERO, ZERO,
831     $                         WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
832                  CALL ZLASET( 'Upper', WIDE, KQ, ZERO, ONE,
833     $                         WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV )
834                  KQ = MAX( 0, KQ - WIDE )
835                  II = IIV + IRIGHT
836                  JJ = JJ + WIDE
837                  MYDIST = MYDIST + NPCOL
838                  ILEFT = MYDIST * NBV - IOFF
839                  IRIGHT = MIN( ILEFT + NBV, K )
840                  ILEFT = MIN( ILEFT, K )
841                  GO TO 80
842               END IF
843*
844            END IF
845*
846*           WORK( IPV ) is K x NQC = V = V( IOFFV )
847*           WORK( IPW ) = C( IOFFC ) * V'  (MPC x NQC x K) -> MPC x K
848*
849            IF( NQC.GT.0 ) THEN
850               CALL ZGEMM( 'No transpose', 'Conjugate transpose', MPC,
851     $                    K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ),
852     $                    LV, ZERO, WORK( IPW ), LW )
853            ELSE
854               CALL ZLASET( 'All', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
855            END IF
856*
857            CALL ZGSUM2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
858     $                    LW, MYROW, IVCOL )
859*
860*           WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
861*
862            IF( MYCOL.EQ.IVCOL ) THEN
863               CALL ZTRMM( 'Right', UPLO, TRANS, 'Non unit', MPC, K,
864     $                     ONE, T, MBV, WORK( IPW ), LW )
865               CALL ZGEBS2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
866     $                       LW )
867            ELSE
868               CALL ZGEBR2D( ICTXT, 'Rowwise', ' ', MPC, K, WORK( IPW ),
869     $                       LW, MYROW, IVCOL )
870            END IF
871*
872*               C            C      -     W       *     V
873*           C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
874*                        MPC x NQC    MPC x K         K x NQC
875*
876            CALL ZGEMM( 'No transpose', 'No transpose', MPC, NQC, K,
877     $                  -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
878     $                  C( IOFFC ), LDC )
879*
880         END IF
881*
882      END IF
883*
884      RETURN
885*
886*     End of PZLARFB
887*
888      END
889