1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
pdtrmm_(F_CHAR_T SIDE,F_CHAR_T UPLO,F_CHAR_T TRANS,F_CHAR_T DIAG,int * M,int * N,double * ALPHA,double * A,int * IA,int * JA,int * DESCA,double * B,int * IB,int * JB,int * DESCB)20 void pdtrmm_( F_CHAR_T SIDE, F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG,
21               int * M, int * N, double * ALPHA,
22               double * A, int * IA, int * JA, int * DESCA,
23               double * B, int * IB, int * JB, int * DESCB )
24 #else
25 void pdtrmm_( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA,
26               A, IA, JA, DESCA, B, IB, JB, DESCB )
27 /*
28 *  .. Scalar Arguments ..
29 */
30    F_CHAR_T       DIAG, SIDE, TRANS, UPLO;
31    int            * IA, * IB, * JA, * JB, * M, * N;
32    double         * ALPHA;
33 /*
34 *  .. Array Arguments ..
35 */
36    int            * DESCA, * DESCB;
37    double         * A, * B;
38 #endif
39 {
40 /*
41 *  Purpose
42 *  =======
43 *
44 *  PDTRMM  performs one of the matrix-matrix operations
45 *
46 *     sub( B ) := alpha * op( sub( A ) ) * sub( B ),
47 *
48 *     or
49 *
50 *     sub( B ) := alpha * sub( B ) * op( sub( A ) ),
51 *
52 *  where
53 *
54 *     sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1)  if SIDE = 'L',
55 *                      A(IA:IA+N-1,JA:JA+N-1)  if SIDE = 'R', and,
56 *
57 *     sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1).
58 *
59 *  Alpha  is a scalar,  sub( B )  is an m by n submatrix,  sub( A ) is a
60 *  unit, or non-unit, upper or lower triangular submatrix and op( X ) is
61 *  one of
62 *
63 *     op( X ) = X   or   op( X ) = X'.
64 *
65 *  Notes
66 *  =====
67 *
68 *  A description  vector  is associated with each 2D block-cyclicly dis-
69 *  tributed matrix.  This  vector  stores  the  information  required to
70 *  establish the  mapping  between a  matrix entry and its corresponding
71 *  process and memory location.
72 *
73 *  In  the  following  comments,   the character _  should  be  read  as
74 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
75 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
76 *
77 *  NOTATION         STORED IN       EXPLANATION
78 *  ---------------- --------------- ------------------------------------
79 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
80 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
81 *                                   the NPROW x NPCOL BLACS process grid
82 *                                   A  is  distributed over. The context
83 *                                   itself  is  global,  but  the handle
84 *                                   (the integer value) may vary.
85 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
86 *                                   ted matrix A, M_A >= 0.
87 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
88 *                                   buted matrix A, N_A >= 0.
89 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
90 *                                   block of the matrix A, IMB_A > 0.
91 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
92 *                                   left   block   of   the  matrix   A,
93 *                                   INB_A > 0.
94 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
95 *                                   bute the last  M_A-IMB_A  rows of A,
96 *                                   MB_A > 0.
97 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
98 *                                   bute the last  N_A-INB_A  columns of
99 *                                   A, NB_A > 0.
100 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
101 *                                   row of the matrix  A is distributed,
102 *                                   NPROW > RSRC_A >= 0.
103 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
104 *                                   first column of  A  is  distributed.
105 *                                   NPCOL > CSRC_A >= 0.
106 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
107 *                                   array  storing  the  local blocks of
108 *                                   the distributed matrix A,
109 *                                   IF( Lc( 1, N_A ) > 0 )
110 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
111 *                                   ELSE
112 *                                      LLD_A >= 1.
113 *
114 *  Let K be the number of  rows of a matrix A starting at the global in-
115 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
116 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
117 *  receive if these K rows were distributed over NPROW processes.  If  K
118 *  is the number of columns of a matrix  A  starting at the global index
119 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
120 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
121 *  these K columns were distributed over NPCOL processes.
122 *
123 *  The values of Lr() and Lc() may be determined via a call to the func-
124 *  tion PB_Cnumroc:
125 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
126 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
127 *
128 *  Arguments
129 *  =========
130 *
131 *  SIDE    (global input) CHARACTER*1
132 *          On entry,  SIDE  specifies whether  op( sub( A ) ) multiplies
133 *          sub( B ) from the left or right as follows:
134 *
135 *          SIDE = 'L' or 'l'  sub( B ) := alpha*op( sub( A ) )*sub( B ),
136 *
137 *          SIDE = 'R' or 'r'  sub( B ) := alpha*sub( B )*op( sub( A ) ).
138 *
139 *  UPLO    (global input) CHARACTER*1
140 *          On entry,  UPLO  specifies whether the submatrix  sub( A ) is
141 *          an upper or lower triangular submatrix as follows:
142 *
143 *             UPLO = 'U' or 'u'   sub( A ) is an upper triangular
144 *                                 submatrix,
145 *
146 *             UPLO = 'L' or 'l'   sub( A ) is a  lower triangular
147 *                                 submatrix.
148 *
149 *  TRANSA  (global input) CHARACTER*1
150 *          On entry,  TRANSA  specifies the form of op( sub( A ) ) to be
151 *          used in the matrix multiplication as follows:
152 *
153 *             TRANSA = 'N' or 'n'   op( sub( A ) ) = sub( A ),
154 *
155 *             TRANSA = 'T' or 't'   op( sub( A ) ) = sub( A )',
156 *
157 *             TRANSA = 'C' or 'c'   op( sub( A ) ) = sub( A )'.
158 *
159 *  DIAG    (global input) CHARACTER*1
160 *          On entry,  DIAG  specifies  whether or not  sub( A )  is unit
161 *          triangular as follows:
162 *
163 *             DIAG = 'U' or 'u'  sub( A )  is  assumed to be unit trian-
164 *                                gular,
165 *
166 *             DIAG = 'N' or 'n'  sub( A ) is not assumed to be unit tri-
167 *                                angular.
168 *
169 *  M       (global input) INTEGER
170 *          On entry,  M  specifies the number of rows of  the  submatrix
171 *          sub( B ). M  must be at least zero.
172 *
173 *  N       (global input) INTEGER
174 *          On entry, N  specifies the number of columns of the submatrix
175 *          sub( B ). N  must be at least zero.
176 *
177 *  ALPHA   (global input) DOUBLE PRECISION
178 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
179 *          supplied  as  zero  then  the  local entries of  the array  B
180 *          corresponding to the entries of the submatrix  sub( B )  need
181 *          not be set on input.
182 *
183 *  A       (local input) DOUBLE PRECISION array
184 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
185 *          at least Lc( 1, JA+M-1 ) when  SIDE = 'L' or 'l'   and  is at
186 *          least Lc( 1, JA+N-1 ) otherwise.  Before  entry,  this  array
187 *          contains the local entries of the matrix A.
188 *          Before entry with  UPLO = 'U' or 'u', this array contains the
189 *          local entries corresponding to  the entries of the upper tri-
190 *          angular submatrix  sub( A ), and the local entries correspon-
191 *          ding to the entries of the strictly lower triangular part  of
192 *          the submatrix  sub( A )  are not referenced.
193 *          Before entry with  UPLO = 'L' or 'l', this array contains the
194 *          local entries corresponding to  the entries of the lower tri-
195 *          angular submatrix  sub( A ), and the local entries correspon-
196 *          ding to the entries of the strictly upper triangular part  of
197 *          the submatrix  sub( A )  are not referenced.
198 *          Note  that  when DIAG = 'U' or 'u', the local entries corres-
199 *          ponding to the  diagonal elements  of the submatrix  sub( A )
200 *          are not referenced either, but are assumed to be unity.
201 *
202 *  IA      (global input) INTEGER
203 *          On entry, IA  specifies A's global row index, which points to
204 *          the beginning of the submatrix sub( A ).
205 *
206 *  JA      (global input) INTEGER
207 *          On entry, JA  specifies A's global column index, which points
208 *          to the beginning of the submatrix sub( A ).
209 *
210 *  DESCA   (global and local input) INTEGER array
211 *          On entry, DESCA  is an integer array of dimension DLEN_. This
212 *          is the array descriptor for the matrix A.
213 *
214 *  B       (local input/local output) DOUBLE PRECISION array
215 *          On entry, B is an array of dimension (LLD_B, Kb), where Kb is
216 *          at least Lc( 1, JB+N-1 ).  Before  entry, this array contains
217 *          the local entries of the matrix  B.
218 *          On exit, the local entries of this array corresponding to the
219 *          to  the entries of the submatrix sub( B ) are  overwritten by
220 *          the local entries of the m by n transformed submatrix.
221 *
222 *  IB      (global input) INTEGER
223 *          On entry, IB  specifies B's global row index, which points to
224 *          the beginning of the submatrix sub( B ).
225 *
226 *  JB      (global input) INTEGER
227 *          On entry, JB  specifies B's global column index, which points
228 *          to the beginning of the submatrix sub( B ).
229 *
230 *  DESCB   (global and local input) INTEGER array
231 *          On entry, DESCB  is an integer array of dimension DLEN_. This
232 *          is the array descriptor for the matrix B.
233 *
234 *  -- Written on April 1, 1998 by
235 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
236 *
237 *  ---------------------------------------------------------------------
238 */
239 /*
240 *  .. Local Scalars ..
241 */
242    char           DiagA, DirB, OpC, OpR, SideOp, TopC, TopR, TranOp, UploA,
243                   Var, ctop, ctopsave, rtop, rtopsave;
244    int            Ai, Aj, Bi, Bj, ChooseAB, ForceTop, ctxt, info, lside, mycol,
245                   myrow, nb, notran, nounit, npcol, nprow, upper;
246    double         ABestL, ABestR, Best, tmp1, tmp2, tmp3, tmp4;
247    PBTYP_T        * type;
248 /*
249 *  .. Local Arrays ..
250 */
251    int            Ad[DLEN_], Bd[DLEN_];
252 /* ..
253 *  .. Executable Statements ..
254 *
255 */
256    lside  = ( ( SideOp = Mupcase( F2C_CHAR( SIDE  )[0] ) ) ==   CLEFT );
257    upper  = ( ( UploA  = Mupcase( F2C_CHAR( UPLO  )[0] ) ) ==  CUPPER );
258    notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
259    nounit = ( ( DiagA  = Mupcase( F2C_CHAR( DIAG  )[0] ) ) == CNOUNIT );
260    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
261    PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd );
262 #ifndef NO_ARGCHK
263 /*
264 *  Test the input parameters
265 */
266    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
267    if( !( info = ( ( nprow == -1 ) ? -( 1101 + CTXT_ ) : 0 ) ) )
268    {
269       if( ( !lside ) && ( SideOp != CRIGHT ) )
270       {
271          PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal SIDE = %c\n", SideOp );
272          info = -1;
273       }
274       else if( ( !upper ) && ( UploA != CLOWER ) )
275       {
276          PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal UPLO = %c\n", UploA );
277          info = -2;
278       }
279       else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
280       {
281          PB_Cwarn( ctxt, __LINE__, "PDTRMM", "Illegal TRANS = %c\n", TranOp );
282          info = -3;
283       }
284       if( ( !nounit ) && ( DiagA != CUNIT ) )
285       {
286          PB_Cwarn( ctxt, __LINE__, "PDTRMM",
287                    "Illegal DIAG = %c\n", DiagA );
288          info = -4;
289       }
290       if( lside )
291          PB_Cchkmat( ctxt, "PDTRMM", "A", *M, 5, *M, 5, Ai, Aj, Ad, 11,
292                      &info );
293       else
294          PB_Cchkmat( ctxt, "PDTRMM", "A", *N, 6, *N, 6, Ai, Aj, Ad, 11,
295                      &info );
296       PB_Cchkmat(    ctxt, "PDTRMM", "B", *M, 5, *N, 6, Bi, Bj, Bd, 15,
297                      &info );
298    }
299    if( info ) { PB_Cabort( ctxt, "PDTRMM", info ); return; }
300 #endif
301 /*
302 *  Quick return if possible
303 */
304    if( *M == 0 || *N == 0 ) return;
305 /*
306 *  Get type structure
307 */
308    type = PB_Cdtypeset();
309 /*
310 *  And when alpha is zero
311 */
312    if( ALPHA[REAL_PART] == ZERO )
313    {
314       PB_Cplapad( type, ALL, NOCONJG, *M, *N, type->zero, type->zero,
315                   ((char *) B), Bi, Bj, Bd );
316       return;
317    }
318 /*
319 *  Start the operations
320 */
321 #ifdef NO_ARGCHK
322    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
323 #endif
324 /*
325 *  Algorithm selection is based on approximation of the communication volume
326 *  for distributed and aligned operands.
327 *
328 *  ABestR, ABestL : both operands sub( A ) and sub( B ) are communicated
329 *                   ( N >> M when SIDE is left and M >> N otherwise )
330 *  Best           : only sub( B ) is communicated
331 *                   ( M >> N when SIDE is left and N >> M otherwise )
332 */
333    if( lside )
334    {
335       if( notran )
336       {
337          tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[NB_], npcol );
338          ABestR = (double)(*M) *
339            ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) +
340              ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) );
341          tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol );
342                                              tmp4 = DNROC( *M, Bd[MB_], nprow );
343          Best = (double)(*N) *
344                 ( CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) +
345                   ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) );
346          ChooseAB = ( ( 1.1 * ABestR ) <= Best );
347       }
348       else
349       {
350          tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol );
351          tmp4 = DNROC( *N, Bd[NB_], npcol );
352          ABestL = (double)(*M) *
353            ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) +
354              CBRATIO *
355              ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) );
356          ABestR = (double)(*M) *
357            ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 / TWO ) +
358              ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4       ) +
359              MAX( tmp2, tmp1 ) / TWO );
360          tmp1 = DNROC( *M, Ad[MB_], nprow ); tmp2 = DNROC( *M, Ad[NB_], npcol );
361                                              tmp4 = DNROC( *M, Bd[MB_], nprow );
362          Best = (double)(*N) *
363                 ( ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) +
364                   CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) );
365          ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) ||
366                       ( ( 1.1 * ABestR ) <= Best ) );
367       }
368    }
369    else
370    {
371       if( notran )
372       {
373          tmp2 = DNROC( *N, Ad[NB_], npcol ); tmp3 = DNROC( *M, Bd[MB_], nprow );
374          ABestR = (double)(*N) *
375            ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) +
376              ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) );
377          tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol );
378          tmp3 = DNROC( *N, Bd[NB_], npcol );
379          Best = (double)(*M) *
380                 ( CBRATIO * ( nprow == 1 ? ZERO : tmp2 ) +
381                   ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) );
382          ChooseAB = ( ( 1.1 * ABestR ) <= Best );
383       }
384       else
385       {
386          tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol );
387          tmp3 = DNROC( *M, Bd[MB_], nprow );
388          ABestL = (double)(*N) *
389            ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) +
390              CBRATIO *
391              ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) );
392          ABestR = (double)(*N) *
393            ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 / TWO ) +
394              ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3       ) +
395              MAX( tmp2, tmp1 ) / TWO );
396          tmp1 = DNROC( *N, Ad[MB_], nprow ); tmp2 = DNROC( *N, Ad[NB_], npcol );
397          tmp3 = DNROC( *N, Bd[NB_], npcol );
398          Best = (double)(*M) *
399                 ( ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) +
400                   CBRATIO * ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) );
401          ChooseAB = ( ( ( 1.1 * ABestL ) <= Best ) ||
402                       ( ( 1.1 * ABestR ) <= Best ) );
403       }
404    }
405 /*
406 *  BLACS topologies are enforced iff M and N are strictly greater than the
407 *  logical block size returned by pilaenv_. Otherwise, it is assumed that the
408 *  routine calling this routine has already selected an adequate topology.
409 */
410    nb       = pilaenv_( &ctxt, C2F_CHAR( &type->type ) );
411    ForceTop = ( ( *M > nb ) && ( *N > nb ) );
412 
413    if( ChooseAB )
414    {
415       if( lside )
416       {
417          if( notran )
418          {
419             OpR = CBCAST; OpC = CBCAST; Var = CRIGHT;
420             if( upper ) { TopR = TopC = CTOP_IRING; }
421             else        { TopR = TopC = CTOP_DRING; }
422          }
423          else
424          {
425             if( ABestL <= ABestR )
426             {
427                OpR = CBCAST; OpC = CCOMBINE; Var = CLEFT;
428                if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; }
429                else        { TopR = CTOP_IRING; TopC = CTOP_DRING; }
430             }
431             else
432             {
433                OpR = CBCAST; OpC = CBCAST;  Var = CRIGHT;
434                if( upper ) { TopR = TopC = CTOP_DRING; }
435                else        { TopR = TopC = CTOP_IRING; }
436             }
437          }
438       }
439       else
440       {
441          if( notran )
442          {
443             OpR = CBCAST; OpC = CBCAST; Var = CRIGHT;
444             if( upper ) { TopR = TopC = CTOP_DRING; }
445             else        { TopR = TopC = CTOP_IRING; }
446          }
447          else
448          {
449             if( ABestL <= ABestR )
450             {
451                OpR = CCOMBINE; OpC = CBCAST; Var = CLEFT;
452                if( upper ) { TopR = CTOP_DRING; TopC = CTOP_IRING; }
453                else        { TopR = CTOP_IRING; TopC = CTOP_DRING; }
454             }
455             else
456             {
457                OpR = CBCAST; OpC = CBCAST;  Var = CRIGHT;
458                if( upper ) { TopR = TopC = CTOP_IRING; }
459                else        { TopR = TopC = CTOP_DRING; }
460             }
461          }
462       }
463 
464       rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
465       ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
466 
467       if( ForceTop )
468       {
469          if( ( rtopsave = rtop ) != TopR )
470             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    &TopR );
471          if( ( ctopsave = ctop ) != TopC )
472             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
473 /*
474 *  Remove the next 4 lines when the BLACS combine operations support ring
475 *  topologies
476 */
477          if( OpR == CCOMBINE )
478             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_DEFAULT );
479          if( OpC == CCOMBINE )
480             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
481       }
482 
483       PB_CptrmmAB( type, &Var, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ),
484                    &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad,
485                    ((char *)B), Bi, Bj, Bd );
486    }
487    else
488    {
489       if( ( lside && notran ) || ( !( lside ) && !( notran ) ) )
490       {
491          OpR = CCOMBINE; OpC = CBCAST;
492          rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
493          ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
494 
495          if( ForceTop )
496          {
497             rtopsave = rtop;
498             ctopsave = ctop;
499 /*
500 *  No clear winner for the ring topologies, so that if a ring topology is
501 *  already selected, keep it.
502 */
503             if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
504                 ( rtop != CTOP_SRING ) )
505                rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_SRING );
506             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
507 /*
508 *  Remove the next line when the BLACS combine operations support ring
509 *  topologies
510 */
511             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_DEFAULT );
512          }
513       }
514       else
515       {
516          OpR = CBCAST; OpC = CCOMBINE;
517          rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
518          ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
519 
520          if( ForceTop )
521          {
522             rtopsave = rtop;
523             ctopsave = ctop;
524 /*
525 *  No clear winner for the ring topologies, so that if a ring topology is
526 *  already selected, keep it.
527 */
528             if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
529                 ( ctop != CTOP_SRING ) )
530                ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
531             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_DEFAULT );
532 /*
533 *  Remove the next line when the BLACS combine operations support ring
534 *  topologies
535 */
536             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
537          }
538       }
539 
540       if( lside )
541          DirB = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
542       else
543          DirB = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
544 
545       PB_CptrmmB( type, &DirB, &SideOp, &UploA, ( notran ? NOTRAN : TRAN ),
546                   &DiagA, *M, *N, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad,
547                   ((char *)B), Bi, Bj, Bd );
548    }
549 /*
550 *  Restore the BLACS topologies when necessary.
551 */
552    if( ForceTop )
553    {
554       rtopsave = *PB_Ctop( &ctxt, &OpR, ROW,    &rtopsave );
555       ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave );
556    }
557 /*
558 *  End of PDTRMM
559 */
560 }
561