1      SUBROUTINE PBDTRNV( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,
2     $                    BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,
3     $                    WORK )
4*
5*  -- PB-BLAS routine (version 2.1) --
6*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7*     April 28, 1996
8*
9*     Jaeyoung Choi, Oak Ridge National Laboratory
10*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11*     David Walker,  Oak Ridge National Laboratory
12*
13*     .. Scalar Arguments ..
14      CHARACTER*1        TRANS, XDIST
15      INTEGER            ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16     $                   IYROW, N, NB, NZ
17      DOUBLE PRECISION   BETA
18*     ..
19*     .. Array Arguments ..
20      DOUBLE PRECISION   WORK( * ), X( * ), Y( * )
21*     ..
22*
23*  Purpose
24*  =======
25*
26*  PBDTRNV transposes a column vector to row vector, or a row vector to
27*  column vector by reallocating data distribution.
28*
29*     Y := X'
30*
31*  where X and Y are N vectors.
32*
33*  Parameters
34*  ==========
35*
36*  ICONTXT (input) INTEGER
37*          ICONTXT is the BLACS mechanism for partitioning communication
38*          space.  A defining property of a context is that a message in
39*          a context cannot be sent or received in another context.  The
40*          BLACS context includes the definition of a grid, and each
41*          process' coordinates in it.
42*
43*  XDIST   (input) CHARACTER*1
44*          XDIST specifies whether X is a column vector or a row vector,
45*
46*            XDIST = 'C',  X is a column vector (distributed columnwise)
47*            XDIST = 'R',  X is a row vector    (distributed rowwise)
48*
49*  TRANS   (input) CHARACTER*1
50*          TRANS specifies whether the transposed format is transpose
51*          or conjugate transpose.  If the vectors X and Y are real,
52*          the argument is ignored.
53*
54*             TRANS = 'T',  transpose
55*             TRANS = 'C',  conjugate transpose
56*
57*  N       (input) INTEGER
58*          N specifies the (global) number of the vector X and the
59*          vector Y.  N >= 0.
60*
61*  NB      (input) INTEGER
62*          NB specifies the block size of vectors X and Y.  NB >= 0.
63*
64*  NZ      (input) INTEGER
65*          NZ is the column offset to specify the column distance from
66*          the beginning of the block to the first element of the
67*          vector X, and the row offset to the first element of the
68*          vector Y if XDIST = 'C'.
69*          Otherwise, it is row offset to specify the row distance
70*          from the beginning of the block to the first element of the
71*          vector X, and the column offset to the first element of the
72*          vector Y.  0 < NZ <= NB.
73*
74*  X       (input) DOUBLE PRECISION array of dimension  at least
75*          ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76*          ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77*          The incremented array X must contain the vector X.
78*
79*  INCX    (input) INTEGER
80*          INCX specifies the increment for the elements of X.
81*          INCX <> 0.
82*
83*  BETA    (input) DOUBLE PRECISION
84*          BETA specifies scaler beta.
85*
86*  Y       (input/output) DOUBLE PRECISION array of dimension at least
87*          ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88*          ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89*          The incremented array Y must contain the vector Y.
90*          Y will not be referenced if beta is zero.
91*
92*  INCY    (input) INTEGER
93*          INCY specifies the increment for the elements of Y.
94*          INCY <> 0.
95*
96*  IXROW   (input) INTEGER
97*          IXROW specifies a row of the process template, which holds
98*          the first element of the vector X. If X is a row vector and
99*          all rows of processes have a copy of X, then set IXROW = -1.
100*
101*  IXCOL   (input) INTEGER
102*          IXCOL specifies  a column of the process template,
103*          which holds the first element of the vector X.  If  X is  a
104*          column block and all columns of processes have a copy of X,
105*          then set IXCOL = -1.
106*
107*  IYROW   (input) INTEGER
108*          IYROW specifies the current row process which holds the
109*          first element of the vector Y, which is transposed of X.
110*          If X  is a column vector and the transposed  row vector Y is
111*          distributed all rows of processes, set IYROW = -1.
112*
113*  IYCOL   (input) INTEGER
114*          IYCOL specifies  the current column process  which holds
115*          the first element of the vector Y, which is transposed of Y.
116*          If X is a row block and the transposed column vector Y is
117*          distributed all columns of processes, set IYCOL = -1.
118*
119*  WORK    (workspace) DOUBLE PRECISION array of dimension Size(WORK).
120*          It needs extra working space of x**T or x**H.
121*
122*  Parameters Details
123*  ==================
124*
125*  Nx      It is a local portion  of N owned by a process, where x is
126*          replaced by  either p (=NPROW) or q (=NPCOL)).  The value is
127*          determined by N, NB, NZ, x, and MI, where NB is a block size,
128*          NZ is a offset from the beginning of the block,  and MI is a
129*          row or column position  in a process template. Nx is equal
130*          to  or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131*
132*  Communication Scheme
133*  ====================
134*
135*  The communication scheme of the routine is set to '1-tree', which is
136*  fan-out.  (For details, see BLACS user's guide.)
137*
138*  Memory Requirement of WORK
139*  ==========================
140*
141*  NN   = N + NZ
142*  Npb  = CEIL( NN, NB*NPROW )
143*  Nqb  = CEIL( NN, NB*NPCOL )
144*  LCMP = LCM / NPROW
145*  LCMQ = LCM / NPCOL
146*
147*   (1) XDIST = 'C'
148*     (a) IXCOL != -1
149*         Size(WORK) = CEIL(Nqb,LCMQ)*NB
150*     (b) IXCOL = -1
151*         Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152*
153*   (2) XDIST = 'R'
154*     (a) IXROW != -1
155*         Size(WORK) = CEIL(Npb,LCMP)*NB
156*     (b) IXROW = -1
157*         Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158*
159*  Notes
160*  -----
161*  More precise space can be computed as
162*
163*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164*  CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165*
166*  =====================================================================
167*
168*     .. Parameters ..
169      DOUBLE PRECISION   ONE, ZERO
170      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
171*     ..
172*     .. Local Scalars ..
173      LOGICAL            COLFORM, ROWFORM
174      INTEGER            I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
175     $                   LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW,
176     $                   MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW,
177     $                   NQ, NQ0, NQ1
178      DOUBLE PRECISION   TBETA
179*     ..
180*     .. External Functions ..
181      LOGICAL            LSAME
182      INTEGER            ILCM, ICEIL, NUMROC
183      EXTERNAL           LSAME, ILCM, ICEIL, NUMROC
184*     ..
185*     .. External Subroutines ..
186      EXTERNAL           BLACS_GRIDINFO, DGEBR2D, DGEBS2D, DGERV2D,
187     $                   DGESD2D, PBDTR2A1, PBDTR2B1, PBDTRGET,
188     $                   PBDTRST1, PBDVECADD, PXERBLA
189*     ..
190*     .. Intrinsic Functions ..
191      INTRINSIC          MAX, MIN, MOD
192*     ..
193*     .. Executable Statements ..
194*
195*     Quick return if possible.
196*
197      IF( N.EQ.0 ) RETURN
198*
199      CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL )
200*
201      COLFORM = LSAME( XDIST, 'C' )
202      ROWFORM = LSAME( XDIST, 'R' )
203*
204*     Test the input parameters.
205*
206      INFO = 0
207      IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN
208         INFO = 2
209      ELSE IF( N   .LT.0                          ) THEN
210         INFO = 4
211      ELSE IF( NB  .LT.1                          ) THEN
212         INFO = 5
213      ELSE IF( NZ  .LT.0 .OR. NZ.GE.NB            ) THEN
214         INFO = 6
215      ELSE IF( INCX.EQ.0                          ) THEN
216         INFO = 8
217      ELSE IF( INCY.EQ.0                          ) THEN
218         INFO = 11
219      ELSE IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW .OR.
220     $       ( IXROW.EQ.-1 .AND. COLFORM )        ) THEN
221         INFO = 12
222      ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL .OR.
223     $       ( IXCOL.EQ.-1 .AND. ROWFORM )        ) THEN
224         INFO = 13
225      ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW .OR.
226     $       ( IYROW.EQ.-1 .AND. ROWFORM )        ) THEN
227         INFO = 14
228      ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL .OR.
229     $       ( IYCOL.EQ.-1 .AND. COLFORM )        ) THEN
230         INFO = 15
231      END IF
232*
233   10 CONTINUE
234      IF( INFO.NE.0 ) THEN
235         CALL PXERBLA( ICONTXT, 'PBDTRNV ', INFO )
236         RETURN
237      END IF
238*
239*     Start the operations.
240*
241*     LCM : the least common multiple of NPROW and NPCOL
242*
243      LCM  = ILCM( NPROW, NPCOL )
244      LCMP = LCM   / NPROW
245      LCMQ = LCM   / NPCOL
246      IGD  = NPCOL / LCMP
247      NN   = N + NZ
248*
249*     When x is a column vector
250*
251      IF( COLFORM ) THEN
252*
253*       Form  y <== x'  ( x is a column vector )
254*
255*                                        ||
256*                                        ||
257*            _____________               ||
258*            -----(y)-----      <==     (x)
259*                                        ||
260*                                        ||
261*                                        ||
262*
263        IF(      IXROW.LT.0  .OR. IXROW.GE.NPROW ) THEN
264          INFO = 12
265        ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL ) THEN
266          INFO = 13
267        ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW ) THEN
268          INFO = 14
269        ELSE IF( IYCOL.LT.0  .OR. IYCOL.GE.NPCOL ) THEN
270          INFO = 15
271        END IF
272        IF( INFO.NE.0 ) GO TO 10
273*
274*       MRROW : row relative position in template from IXROW
275*       MRCOL : column relative position in template from IYCOL
276*
277        MRROW = MOD( NPROW+MYROW-IXROW, NPROW )
278        MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL )
279        JYROW = IYROW
280        IF( IYROW.EQ.-1 ) JYROW = IXROW
281*
282        NP  = NUMROC( NN, NB, MYROW, IXROW, NPROW )
283        IF( MRROW.EQ.0 ) NP = NP - NZ
284        NQ  = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL )
285        IF( MRCOL.EQ.0 ) NQ = NQ - NZ
286        NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ )
287*
288*       When a column process of IXCOL has a column block A,
289*
290        IF( IXCOL .GE. 0 ) THEN
291          TBETA = ZERO
292          IF( MYROW.EQ.JYROW ) TBETA = BETA
293          KZ = NZ
294*
295          DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1
296            MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW )
297            MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL )
298            IF( LCMQ.EQ.1 )  NQ0 = NUMROC( NN, NB, I, 0, NPCOL )
299            JDEX  = (I/NPCOL) * NB
300            IF( MRCOL.EQ.0 ) JDEX = MAX(0, JDEX-NZ)
301*
302*           A source node copies the blocks to WORK, and send it
303*
304            IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IXCOL ) THEN
305*
306*             The source node is a destination node
307*
308              IDEX = (I/NPROW) * NB
309              IF( MRROW.EQ.0 ) IDEX = MAX( 0, IDEX-NZ )
310              IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN
311                CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ,
312     $                          X(IDEX*INCX+1), INCX, TBETA,
313     $                          Y(JDEX*INCY+1), INCY, LCMP, LCMQ )
314*
315*             The source node sends blocks to a destination node
316*
317              ELSE
318                CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ,
319     $                         X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
320     $                         LCMP, 1 )
321                CALL DGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1,
322     $                        JYROW, MCCOL )
323              END IF
324*
325*           A destination node receives the copied vector
326*
327            ELSE IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN
328              IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN
329                CALL DGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY,
330     $                        MCROW, IXCOL )
331              ELSE
332                CALL DGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1,
333     $                        MCROW, IXCOL )
334                CALL PBDTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA,
335     $                         Y(JDEX*INCY+1), INCY, LCMQ*NB )
336              END IF
337            END IF
338            KZ = 0
339   20     CONTINUE
340*
341*         Broadcast a row block of WORK in each column of template
342*
343          IF( IYROW.EQ.-1 ) THEN
344            IF( MYROW.EQ.JYROW ) THEN
345              CALL DGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY )
346            ELSE
347              CALL DGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY,
348     $                     JYROW, MYCOL )
349             END IF
350          END IF
351*
352*       When all column procesors have a copy of the column block A,
353*
354        ELSE
355          IF( LCMQ.EQ.1 ) NQ0 = NQ
356*
357*         Processors, which have diagonal blocks of X, copy them to
358*         WORK array in transposed form
359*
360          KZ = 0
361          IF( MRROW.EQ.0 ) KZ = NZ
362          JZ = 0
363          IF( MRROW.EQ.0 .AND. MYCOL.EQ.IYCOL ) JZ = NZ
364*
365          DO 30 I = 0, LCMP - 1
366            IF( MRCOL.EQ.MOD(NPROW*I+MRROW, NPCOL) ) THEN
367              IDEX = MAX( 0, I*NB-KZ )
368              IF( LCMQ.EQ.1 .AND. (IYROW.EQ.-1.OR.IYROW.EQ.MYROW) ) THEN
369                 CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ,
370     $                          X(IDEX*INCX+1), INCX, BETA, Y, INCY,
371     $                          LCMP, 1 )
372              ELSE
373                 CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ,
374     $                          X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
375     $                          LCMP, 1 )
376              END IF
377            END IF
378   30     CONTINUE
379*
380*         Get diagonal blocks of A for each column of the template
381*
382          MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW )
383          IF( LCMQ.GT.1 ) THEN
384            MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL )
385            CALL PBDTRGET( ICONTXT, 'Row', 1, NQ0, ICEIL( NN, NB ),
386     $                     WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL,
387     $                     NPROW, NPCOL )
388          END IF
389*
390*         Broadcast a row block of WORK in every row of template
391*
392          IF( IYROW.EQ.-1 ) THEN
393            IF( MYROW.EQ.MCROW ) THEN
394              IF( LCMQ.GT.1 ) THEN
395                KZ = 0
396                IF( MYCOL.EQ.IYCOL ) KZ = NZ
397                CALL PBDTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1,
398     $                         BETA, Y, INCY, LCMP, LCMQ, NQ0 )
399              END IF
400              CALL DGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY )
401            ELSE
402              CALL DGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY,
403     $                      MCROW, MYCOL )
404            END IF
405*
406*         Send a row block of WORK to the destination row
407*
408          ELSE
409            IF( LCMQ.EQ.1 ) THEN
410              IF( MYROW.EQ.MCROW ) THEN
411                IF( MYROW.NE.IYROW )
412     $            CALL DGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL )
413              ELSE IF( MYROW.EQ.IYROW ) THEN
414                IF( BETA.EQ.ZERO ) THEN
415                  CALL DGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL )
416                ELSE
417                  CALL DGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL )
418                  CALL PBDVECADD( ICONTXT, 'G', NQ0, ONE, WORK, 1,
419     $                            BETA, Y, INCY )
420                END IF
421              END IF
422*
423            ELSE
424              NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) )
425              IF( MYROW.EQ.MCROW ) THEN
426                IF( MYROW.NE.IYROW )
427     $            CALL DGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL )
428              ELSE IF( MYROW.EQ.IYROW ) THEN
429                CALL DGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL )
430              END IF
431*
432              IF( MYROW.EQ.IYROW ) THEN
433                KZ = 0
434                IF( MYCOL.EQ.IYCOL ) KZ = NZ
435                CALL PBDTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1,
436     $                         BETA, Y, INCY, LCMP, LCMQ, NQ0 )
437              END IF
438            END IF
439          END IF
440        END IF
441*
442*     When x is a row vector
443*
444      ELSE
445*
446*       Form  y <== x'  ( x is a row block )
447*
448*           ||
449*           ||
450*           ||               _____________
451*          (y)      <==      -----(x)-----
452*           ||
453*           ||
454*           ||
455*
456        IF(      IXROW.LT.-1 .OR. IXROW.GE.NPROW ) THEN
457          INFO = 12
458        ELSE IF( IXCOL.LT.0  .OR. IXCOL.GE.NPCOL ) THEN
459          INFO = 13
460        ELSE IF( IYROW.LT.0  .OR. IYROW.GE.NPROW ) THEN
461          INFO = 14
462        ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL ) THEN
463          INFO = 15
464        END IF
465        IF( INFO.NE.0 ) GO TO 10
466*
467*       MRROW : row relative position in template from IYROW
468*       MRCOL : column relative position in template from IXCOL
469*
470        MRROW = MOD( NPROW+MYROW-IYROW, NPROW )
471        MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL )
472        JYCOL = IYCOL
473        IF( IYCOL.EQ.-1 ) JYCOL = IXCOL
474*
475        NP  = NUMROC( NN, NB, MYROW, IYROW, NPROW )
476        IF( MRROW.EQ.0 ) NP = NP - NZ
477        NQ  = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL )
478        IF( MRCOL.EQ.0 ) NQ = NQ - NZ
479        NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP )
480*
481*       When a row process of IXROW has a row block A,
482*
483        IF( IXROW .GE. 0 ) THEN
484          TBETA = ZERO
485          IF( MYCOL.EQ.JYCOL ) TBETA = BETA
486          KZ = NZ
487*
488          DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1
489            MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW )
490            MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL )
491            IF( LCMP.EQ.1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW )
492            JDEX  = (I/NPROW) * NB
493            IF( MRROW.EQ.0 ) JDEX = MAX(0, JDEX-NZ)
494*
495*           A source node copies the blocks to WORK, and send it
496*
497            IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.MCCOL ) THEN
498*
499*             The source node is a destination node
500*
501              IDEX = (I/NPCOL) * NB
502              IF( MRCOL.EQ.0 ) IDEX = MAX( 0, IDEX-NZ )
503              IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN
504                CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ,
505     $                         X(IDEX*INCX+1), INCX, TBETA,
506     $                         Y(JDEX*INCY+1), INCY, LCMQ, LCMP )
507*
508*             The source node sends blocks to a destination node
509*
510              ELSE
511                CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ,
512     $                         X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
513     $                         LCMQ, 1 )
514                CALL DGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1,
515     $                        MCROW, JYCOL )
516              END IF
517*
518*           A destination node receives the copied blocks
519*
520            ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN
521              IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN
522                CALL DGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY,
523     $                        IXROW, MCCOL )
524              ELSE
525                CALL DGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1,
526     $                        IXROW, MCCOL )
527                CALL PBDTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA,
528     $                         Y(JDEX*INCY+1), INCY, LCMP*NB )
529              END IF
530            END IF
531            KZ = 0
532   40     CONTINUE
533*
534*         Broadcast a column vector Y in each row of template
535*
536          IF( IYCOL.EQ.-1 ) THEN
537            IF( MYCOL.EQ.JYCOL ) THEN
538              CALL DGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY )
539            ELSE
540              CALL DGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY,
541     $                      MYROW, JYCOL )
542            END IF
543          END IF
544*
545*       When all row procesors have a copy of the row block A,
546*
547        ELSE
548          IF( LCMP.EQ.1 ) NP0 = NP
549*
550*         Processors, which have diagonal blocks of A, copy them to
551*         WORK array in transposed form
552*
553          KZ = 0
554          IF( MRCOL.EQ.0 ) KZ = NZ
555          JZ = 0
556          IF( MRCOL.EQ.0 .AND. MYROW.EQ.IYROW ) JZ = NZ
557*
558          DO 50 I = 0, LCMQ-1
559            IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN
560              IDEX = MAX( 0, I*NB-KZ )
561              IF( LCMP.EQ.1 .AND. (IYCOL.EQ.-1.OR.IYCOL.EQ.MYCOL) ) THEN
562                CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ,
563     $                          X(IDEX*INCX+1), INCX, BETA, Y, INCY,
564     $                          LCMQ, 1 )
565              ELSE
566                CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ,
567     $                         X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
568     $                         LCMQ, 1 )
569              END IF
570            END IF
571   50     CONTINUE
572*
573*         Get diagonal blocks of A for each row of the template
574*
575          MCCOL = MOD( MOD(MRROW, NPCOL) + IXCOL, NPCOL )
576          IF( LCMP.GT.1 ) THEN
577            MCROW = MOD( NPROW+MYROW-IYROW, NPROW )
578            CALL PBDTRGET( ICONTXT, 'Col', 1, NP0, ICEIL( NN, NB ),
579     $                     WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL,
580     $                     NPROW, NPCOL )
581          END IF
582*
583*         Broadcast a column block of WORK in every column of template
584*
585          IF( IYCOL.EQ.-1 ) THEN
586            IF( MYCOL.EQ.MCCOL ) THEN
587              IF( LCMP.GT.1 ) THEN
588                KZ = 0
589                IF( MYROW.EQ.IYROW ) KZ = NZ
590                CALL PBDTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1,
591     $                         BETA, Y, INCY, LCMP, LCMQ, NP0 )
592              END IF
593              CALL DGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY )
594            ELSE
595              CALL DGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY,
596     $                      MYROW, MCCOL )
597            END IF
598*
599*         Send a column block of WORK to the destination column
600*
601          ELSE
602            IF( LCMP.EQ.1 ) THEN
603              IF( MYCOL.EQ.MCCOL ) THEN
604                IF( MYCOL.NE.IYCOL )
605     $            CALL DGESD2D( ICONTXT, 1, NP, WORK, 1, MYROW, IYCOL )
606              ELSE IF( MYCOL.EQ.IYCOL ) THEN
607                IF( BETA.EQ.ZERO ) THEN
608                  CALL DGERV2D( ICONTXT, 1, NP, Y, INCY, MYROW, MCCOL )
609                ELSE
610                  CALL DGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, MCCOL )
611                  CALL PBDVECADD( ICONTXT, 'G', NP, ONE, WORK, 1, BETA,
612     $                            Y, INCY )
613                END IF
614              END IF
615*
616            ELSE
617              NP1 = NP0 * MIN( LCMP, MAX( 0, ICEIL(NN,NB)-MCROW ) )
618              IF( MYCOL.EQ.MCCOL ) THEN
619                IF( MYCOL.NE.IYCOL )
620     $            CALL DGESD2D( ICONTXT, 1, NP1, WORK, 1, MYROW, IYCOL )
621              ELSE IF( MYCOL.EQ.IYCOL ) THEN
622                CALL DGERV2D( ICONTXT, 1, NP1, WORK, 1, MYROW, MCCOL )
623              END IF
624*
625              IF( MYCOL.EQ.IYCOL ) THEN
626                KZ = 0
627                IF( MYROW.EQ.IYROW ) KZ = NZ
628                CALL PBDTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1,
629     $                         BETA, Y, INCY, LCMP, LCMQ, NP0 )
630              END IF
631            END IF
632          END IF
633        END IF
634      END IF
635*
636      RETURN
637*
638*     End of PBDTRNV
639*
640      END
641*
642*=======================================================================
643*     SUBROUTINE PBDTR2A1
644*=======================================================================
645*
646      SUBROUTINE PBDTR2A1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,
647     $                     INTV )
648*
649*  -- PB-BLAS routine (version 2.1) --
650*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
651*     April 28, 1996
652*
653*     .. Scalar Arguments ..
654      INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, INTV
655      DOUBLE PRECISION     BETA
656*     ..
657*     .. Array Arguments ..
658      DOUBLE PRECISION     X( * ), Y( * )
659*     ..
660*
661*  Purpose
662*  =======
663*
664*     y <== x
665*     y is a scattered vector, copied from a condensed vector x.
666*
667*     ..
668*     .. Intrinsic Functions ..
669      INTRINSIC            MIN
670*     ..
671*     .. External Functions ..
672      INTEGER              ICEIL
673      EXTERNAL             ICEIL
674*     ..
675*     .. External Subroutines ..
676      EXTERNAL             PBDVECADD
677*     ..
678*     .. Parameters ..
679      DOUBLE PRECISION     ONE
680      PARAMETER          ( ONE = 1.0D+0 )
681*     ..
682*     .. Local Variables ..
683      INTEGER              IX, IY, JZ, K, ITER
684*
685      IX = 0
686      IY = 0
687      JZ = NZ
688      ITER = ICEIL( N+NZ, INTV )
689*
690      IF( ITER.GT.1 ) THEN
691         CALL PBDVECADD( ICONTXT, 'G', NB-JZ, ONE, X(IX*INCX+1), INCX,
692     $                   BETA, Y(IY*INCY+1), INCY )
693         IX = IX + NB   - JZ
694         IY = IY + INTV - JZ
695         JZ = 0
696*
697         DO 10 K = 2, ITER-1
698            CALL PBDVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), INCX,
699     $                      BETA, Y(IY*INCY+1), INCY )
700            IX = IX + NB
701            IY = IY + INTV
702   10    CONTINUE
703      END IF
704*
705      CALL PBDVECADD( ICONTXT, 'G', MIN( N-IY, NB-JZ ), ONE,
706     $                X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY )
707*
708      RETURN
709*
710*     End of PBDTR2A1
711*
712      END
713*
714*=======================================================================
715*     SUBROUTINE PBDTR2B1
716*=======================================================================
717*
718      SUBROUTINE PBDTR2B1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,
719     $                     INCY, JINX, JINY )
720*
721*  -- PB-BLAS routine (version 2.1) --
722*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
723*     April 28, 1996
724*
725*     .. Scalar Arguments ..
726      CHARACTER*1          TRANS
727      INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
728      DOUBLE PRECISION     BETA
729*     ..
730*     .. Array Arguments ..
731      DOUBLE PRECISION     X( * ), Y( * )
732*     ..
733*
734*  Purpose
735*  =======
736*
737*     y <== x + beta * y
738*     y is a condensed vector, copied from a scattered vector x
739*
740*     ..
741*     .. Intrinsic Functions ..
742      INTRINSIC            MIN
743*     ..
744*     .. External Functions ..
745      INTEGER              ICEIL
746      EXTERNAL             ICEIL
747*     ..
748*     .. External Subroutines ..
749      EXTERNAL             PBDVECADD
750*     ..
751*     .. Parameters ..
752      DOUBLE PRECISION     ONE
753      PARAMETER          ( ONE = 1.0D+0 )
754*     ..
755*     .. Local Variables ..
756      INTEGER              IX, IY, JZ, K, ITER, LENX, LENY
757*
758      IF( JINX.EQ.1 .AND. JINY.EQ.1 ) THEN
759         CALL PBDVECADD( ICONTXT, TRANS, N, ONE, X, INCX, BETA,
760     $                   Y, INCY )
761*
762      ELSE
763         IX   = 0
764         IY   = 0
765         JZ   = NZ
766         LENX = NB * JINX
767         LENY = NB * JINY
768         ITER = ICEIL( N+NZ, LENX )
769*
770         IF( ITER.GT.1 ) THEN
771            CALL PBDVECADD( ICONTXT, TRANS, NB-JZ, ONE, X(IX*INCX+1),
772     $                      INCX, BETA, Y(IY*INCY+1), INCY )
773            IX = IX + LENX - JZ
774            IY = IY + LENY - JZ
775            JZ = 0
776*
777            DO 10 K = 2, ITER-1
778               CALL PBDVECADD( ICONTXT, TRANS, NB, ONE, X(IX*INCX+1),
779     $                         INCX, BETA, Y(IY*INCY+1), INCY )
780               IX = IX + LENX
781               IY = IY + LENY
782   10       CONTINUE
783         END IF
784*
785         CALL PBDVECADD( ICONTXT, TRANS, MIN( N-IX, NB-JZ ), ONE,
786     $                   X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY )
787      END IF
788*
789      RETURN
790*
791*     End of PBDTR2B1
792*
793      END
794