1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
pcher2k_(F_CHAR_T UPLO,F_CHAR_T TRANS,int * N,int * K,float * ALPHA,float * A,int * IA,int * JA,int * DESCA,float * B,int * IB,int * JB,int * DESCB,float * BETA,float * C,int * IC,int * JC,int * DESCC)20 void pcher2k_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * N, int * K,
21               float * ALPHA,
22               float * A, int * IA, int * JA, int * DESCA,
23               float * B, int * IB, int * JB, int * DESCB,
24               float * BETA,
25               float * C, int * IC, int * JC, int * DESCC )
26 #else
27 void pcher2k_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA,
28               B, IB, JB, DESCB, BETA, C, IC, JC, DESCC )
29 /*
30 *  .. Scalar Arguments ..
31 */
32    F_CHAR_T       TRANS, UPLO;
33    int            * IA, * IB, * IC, * JA, * JB, * JC, * K, * N;
34    float          * ALPHA, * BETA;
35 /*
36 *  .. Array Arguments ..
37 */
38    int            * DESCA, * DESCB, * DESCC;
39    float          * A, * B, * C;
40 #endif
41 {
42 /*
43 *  Purpose
44 *  =======
45 *
46 *  PCHER2K  performs one of the Hermitian rank 2k operations
47 *
48 *     sub( C ) := alpha*sub( A )*conjg( sub( B ) )' +
49 *                 conjg( alpha )*sub( B )*conjg( sub( A ) )' +
50 *                 beta*sub( C ),
51 *
52 *  or
53 *
54 *     sub( C ) := alpha*conjg( sub( A )' )*sub( B ) +
55 *                 conjg( alpha )*conjg( sub( B )' )*sub( A ) +
56 *                 beta*sub( C ),
57 *
58 *  where
59 *
60 *     sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1),
61 *
62 *     sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1)  if TRANS = 'N',
63 *                      A(IA:IA+K-1,JA:JA+N-1)  otherwise, and,
64 *
65 *     sub( B ) denotes B(IB:IB+N-1,JB:JB+K-1)  if TRANS = 'N',
66 *                      B(IB:IB+K-1,JB:JB+N-1)  otherwise.
67 *
68 *  Alpha  and  beta  are scalars with beta real,  sub( C )  is an n by n
69 *  Hermitian submatrix and  sub( A ) and sub( B ) are n by k submatrices
70 *  in the first case and k by n submatrices in the second case.
71 *
72 *  Notes
73 *  =====
74 *
75 *  A description  vector  is associated with each 2D block-cyclicly dis-
76 *  tributed matrix.  This  vector  stores  the  information  required to
77 *  establish the  mapping  between a  matrix entry and its corresponding
78 *  process and memory location.
79 *
80 *  In  the  following  comments,   the character _  should  be  read  as
81 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
82 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
83 *
84 *  NOTATION         STORED IN       EXPLANATION
85 *  ---------------- --------------- ------------------------------------
86 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
87 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
88 *                                   the NPROW x NPCOL BLACS process grid
89 *                                   A  is  distributed over. The context
90 *                                   itself  is  global,  but  the handle
91 *                                   (the integer value) may vary.
92 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
93 *                                   ted matrix A, M_A >= 0.
94 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
95 *                                   buted matrix A, N_A >= 0.
96 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
97 *                                   block of the matrix A, IMB_A > 0.
98 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
99 *                                   left   block   of   the  matrix   A,
100 *                                   INB_A > 0.
101 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
102 *                                   bute the last  M_A-IMB_A  rows of A,
103 *                                   MB_A > 0.
104 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
105 *                                   bute the last  N_A-INB_A  columns of
106 *                                   A, NB_A > 0.
107 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
108 *                                   row of the matrix  A is distributed,
109 *                                   NPROW > RSRC_A >= 0.
110 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
111 *                                   first column of  A  is  distributed.
112 *                                   NPCOL > CSRC_A >= 0.
113 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
114 *                                   array  storing  the  local blocks of
115 *                                   the distributed matrix A,
116 *                                   IF( Lc( 1, N_A ) > 0 )
117 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
118 *                                   ELSE
119 *                                      LLD_A >= 1.
120 *
121 *  Let K be the number of  rows of a matrix A starting at the global in-
122 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
123 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
124 *  receive if these K rows were distributed over NPROW processes.  If  K
125 *  is the number of columns of a matrix  A  starting at the global index
126 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
127 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
128 *  these K columns were distributed over NPCOL processes.
129 *
130 *  The values of Lr() and Lc() may be determined via a call to the func-
131 *  tion PB_Cnumroc:
132 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
133 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
134 *
135 *  Arguments
136 *  =========
137 *
138 *  UPLO    (global input) CHARACTER*1
139 *          On  entry,   UPLO  specifies  whether  the  local  pieces  of
140 *          the array  C  containing the  upper or lower triangular  part
141 *          of the Hermitian submatrix  sub( C )  are to be referenced as
142 *          follows:
143 *
144 *             UPLO = 'U' or 'u'   Only the local pieces corresponding to
145 *                                 the   upper  triangular  part  of  the
146 *                                 Hermitian submatrix sub( C ) are to be
147 *                                 referenced,
148 *
149 *             UPLO = 'L' or 'l'   Only the local pieces corresponding to
150 *                                 the   lower  triangular  part  of  the
151 *                                 Hermitian submatrix sub( C ) are to be
152 *                                 referenced.
153 *
154 *  TRANS   (global input) CHARACTER*1
155 *          On entry,  TRANS  specifies the  operation to be performed as
156 *          follows:
157 *
158 *             TRANS = 'N' or 'n'
159 *               sub( C ) := alpha*sub( A )*conjg( sub( B )' ) +
160 *                           conjg( alpha )*sub( B )*conjg( sub( A )' ) +
161 *                           beta*sub( C ),
162 *
163 *             TRANS = 'C' or 'c'
164 *               sub( C ) := alpha*conjg( sub( A )' )*sub( B ) +
165 *                           conjg( alpha )*conjg( sub( B )' )*sub( A ) +
166 *                           beta*sub( C ).
167 *
168 *  N       (global input) INTEGER
169 *          On entry,  N specifies the order of the  submatrix  sub( C ).
170 *          N must be at least zero.
171 *
172 *  K       (global input) INTEGER
173 *          On entry with  TRANS = 'N' or 'n',  K specifies the number of
174 *          columns of  the  submatrices  sub( A )  and  sub( B ), and on
175 *          entry with TRANS = 'C' or 'c', K specifies the number of rows
176 *          of the submatrices sub( A ) and sub( B ). K  must be at least
177 *          zero.
178 *
179 *  ALPHA   (global input) COMPLEX
180 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
181 *          supplied  as  zero  then  the  local entries of the arrays  A
182 *          and  B  corresponding  to  the  entries  of  the  submatrices
183 *          sub( A ) and sub( B ) respectively need not be set  on input.
184 *
185 *  A       (local input) COMPLEX array
186 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
187 *          at least Lc( 1, JA+K-1 ) when  TRANS = 'N' or 'n', and  is at
188 *          least Lc( 1, JA+N-1 ) otherwise.  Before  entry,  this  array
189 *          contains the local entries of the matrix A.
190 *          Before entry with TRANS = 'N' or 'n', this array contains the
191 *          local entries corresponding to the entries of the n by k sub-
192 *          matrix sub( A ), otherwise the local entries corresponding to
193 *          the entries of the k by n submatrix sub( A ).
194 *
195 *  IA      (global input) INTEGER
196 *          On entry, IA  specifies A's global row index, which points to
197 *          the beginning of the submatrix sub( A ).
198 *
199 *  JA      (global input) INTEGER
200 *          On entry, JA  specifies A's global column index, which points
201 *          to the beginning of the submatrix sub( A ).
202 *
203 *  DESCA   (global and local input) INTEGER array
204 *          On entry, DESCA  is an integer array of dimension DLEN_. This
205 *          is the array descriptor for the matrix A.
206 *
207 *  B       (local input) COMPLEX array
208 *          On entry, B is an array of dimension (LLD_B, Kb), where Kb is
209 *          at least Lc( 1, JB+K-1 ) when  TRANS = 'N' or 'n', and  is at
210 *          least Lc( 1, JB+N-1 ) otherwise.  Before  entry,  this  array
211 *          contains the local entries of the matrix B.
212 *          Before entry with TRANS = 'N' or 'n', this array contains the
213 *          local entries corresponding to the entries of the n by k sub-
214 *          matrix sub( B ), otherwise the local entries corresponding to
215 *          the entries of the k by n submatrix sub( B ).
216 *
217 *  IB      (global input) INTEGER
218 *          On entry, IB  specifies B's global row index, which points to
219 *          the beginning of the submatrix sub( B ).
220 *
221 *  JB      (global input) INTEGER
222 *          On entry, JB  specifies B's global column index, which points
223 *          to the beginning of the submatrix sub( B ).
224 *
225 *  DESCB   (global and local input) INTEGER array
226 *          On entry, DESCB  is an integer array of dimension DLEN_. This
227 *          is the array descriptor for the matrix B.
228 *
229 *  BETA    (global input) REAL
230 *          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
231 *          supplied  as  zero  then  the  local entries of  the array  C
232 *          corresponding to the entries of the submatrix  sub( C )  need
233 *          not be set on input.
234 *
235 *  C       (local input/local output) COMPLEX array
236 *          On entry, C is an array of dimension (LLD_C, Kc), where Kc is
237 *          at least Lc( 1, JC+N-1 ).  Before  entry, this array contains
238 *          the local entries of the matrix C.
239 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
240 *          the local entries corresponding to the upper triangular  part
241 *          of the  Hermitian  submatrix  sub( C ), and the local entries
242 *          corresponding to the  strictly lower triangular  of  sub( C )
243 *          are not  referenced.  On exit,  the upper triangular part  of
244 *          sub( C ) is overwritten by the  upper triangular part  of the
245 *          updated submatrix.
246 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
247 *          the local entries corresponding to the lower triangular  part
248 *          of the  Hermitian  submatrix  sub( C ), and the local entries
249 *          corresponding to the  strictly upper triangular  of  sub( C )
250 *          are not  referenced.  On exit,  the lower triangular part  of
251 *          sub( C ) is overwritten by the  lower triangular part  of the
252 *          updated submatrix.
253 *          Note that the  imaginary parts  of the local entries  corres-
254 *          ponding to the  diagonal elements  of  sub( C )  need not  be
255 *          set,  they are assumed to be zero,  and on exit they are  set
256 *          to zero.
257 *
258 *  IC      (global input) INTEGER
259 *          On entry, IC  specifies C's global row index, which points to
260 *          the beginning of the submatrix sub( C ).
261 *
262 *  JC      (global input) INTEGER
263 *          On entry, JC  specifies C's global column index, which points
264 *          to the beginning of the submatrix sub( C ).
265 *
266 *  DESCC   (global and local input) INTEGER array
267 *          On entry, DESCC  is an integer array of dimension DLEN_. This
268 *          is the array descriptor for the matrix C.
269 *
270 *  -- Written on April 1, 1998 by
271 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
272 *
273 *  ---------------------------------------------------------------------
274 */
275 /*
276 *  .. Local Scalars ..
277 */
278    char           DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave,
279                   rtop, rtopsave;
280    int            Ai, Aj, Bi, Bj, ChooseABC, Ci, Cj, ForceTop, ctxt, info,
281                   mycol, myrow, nb, notran, npcol, nprow, upper;
282    double         ABCest, ABest, tmp1, tmp2, tmp3, tmp4;
283    PBTYP_T        * type;
284 /*
285 *  .. Local Arrays ..
286 */
287    int            Ad[DLEN_], Bd[DLEN_], Cd[DLEN_];
288 /* ..
289 *  .. Executable Statements ..
290 *
291 */
292    upper  = ( ( UploC  = Mupcase( F2C_CHAR( UPLO  )[0] ) ) ==  CUPPER );
293    notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
294    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
295    PB_CargFtoC( *IB, *JB, DESCB, &Bi, &Bj, Bd );
296    PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
297 #ifndef NO_ARGCHK
298 /*
299 *  Test the input parameters
300 */
301    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
302    if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) )
303    {
304       if( ( !upper ) && ( UploC != CLOWER ) )
305       {
306          PB_Cwarn( ctxt, __LINE__, "PCHER2K", "Illegal UPLO = %c\n", UploC );
307          info = -1;
308       }
309       else if( ( !notran ) && ( TranOp != CCOTRAN ) )
310       {
311          PB_Cwarn( ctxt, __LINE__, "PCHER2K", "Illegal TRANS = %c\n", TranOp );
312          info = -2;
313       }
314       if( notran )
315       {
316          PB_Cchkmat( ctxt, "PCHER2K", "A", *N, 3, *K, 4, Ai, Aj, Ad,  9,
317                      &info );
318          PB_Cchkmat( ctxt, "PCHER2K", "B", *N, 3, *K, 4, Bi, Bj, Bd, 13,
319                      &info );
320       }
321       else
322       {
323          PB_Cchkmat( ctxt, "PCHER2K", "A", *K, 4, *N, 3, Ai, Aj, Ad,  9,
324                      &info );
325          PB_Cchkmat( ctxt, "PCHER2K", "B", *K, 4, *N, 3, Bi, Bj, Bd, 13,
326                      &info );
327       }
328       PB_Cchkmat(    ctxt, "PCHER2K", "C", *N, 3, *N, 3, Ci, Cj, Cd, 18,
329                      &info );
330    }
331    if( info ) { PB_Cabort( ctxt, "PCHER2K", info ); return; }
332 #endif
333 /*
334 *  Quick return if possible
335 */
336    if( ( *N == 0 ) ||
337        ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ||
338            ( *K == 0                      ) ) &&
339          ( ( BETA[REAL_PART] == ONE )       ) ) )
340       return;
341 /*
342 *  Get type structure
343 */
344    type = PB_Cctypeset();
345 /*
346 *  And when alpha or K is zero
347 */
348    if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ||
349        ( *K == 0 ) )
350    {
351       if( BETA[REAL_PART] == ZERO )
352       {
353          PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero,
354                      ((char *) C), Ci, Cj, Cd );
355       }
356       else
357       {
358          PB_Cplascal( type, &UploC, CONJG,   *N, *N, ((char *) BETA),
359                       ((char *) C), Ci, Cj, Cd );
360       }
361       return;
362    }
363 /*
364 *  Start the operations
365 */
366 #ifdef NO_ARGCHK
367    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
368 #endif
369 /*
370 *  Algorithm selection is based on approximation of the communication volume
371 *  for distributed and aligned operands.
372 *
373 *  ABCest: operands sub( A ), sub( B ) and sub( C ) are communicated (K >> N)
374 *  ABest : only sub( A ) and sub( B ) are communicated               (N >> K)
375 */
376    if( notran )
377    {
378       tmp1   = DNROC( *N, Cd[MB_], nprow );
379       tmp3   = DNROC( *K, Ad[NB_], npcol ); tmp4 = DNROC( *K, Bd[NB_], npcol );
380       ABCest = (double)(*N) *
381         ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) +
382           ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp4 ) +
383           ( ( ( ( Ad[CSRC_] == -1 ) && ( Bd[CSRC_] == -1 ) ) ||
384               ( npcol == 1 ) ) ? ZERO : CBRATIO * tmp1 / TWO ) );
385       tmp1   = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
386       tmp3   = DNROC( *N, Ad[MB_], nprow ); tmp4 = DNROC( *N, Bd[MB_], nprow );
387       ABest  = (double)(*K) *
388                ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) +
389                  ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) +
390                  TWO * ( nprow == 1 ? ZERO : tmp2 ) +
391                  MAX( tmp2, tmp3 ) + MAX( tmp2, tmp4 ) );
392    }
393    else
394    {
395       tmp2   = DNROC( *N, Cd[NB_], npcol );
396       tmp3   = DNROC( *K, Ad[MB_], nprow ); tmp4 = DNROC( *K, Bd[MB_], nprow );
397       ABCest = (double)(*N) *
398         ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp3 ) +
399           ( ( ( Bd[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) +
400           ( ( ( ( Ad[RSRC_] == -1 ) && ( Bd[RSRC_] == -1 ) ) ||
401               ( nprow == 1 ) ) ? ZERO : CBRATIO * tmp2 / TWO ) );
402       tmp1   = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
403       tmp3   = DNROC( *N, Ad[NB_], npcol ); tmp4 = DNROC( *N, Bd[NB_], npcol );
404       ABest  = (double)(*K) *
405                ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) +
406                  ( ( ( Bd[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) +
407                  TWO * ( npcol == 1 ? ZERO : tmp1 ) +
408                  MAX( tmp1, tmp3 ) + MAX( tmp1, tmp4 ) );
409    }
410 /*
411 *  Shift a little the cross-over point between both algorithms.
412 */
413    ChooseABC = ( ( 1.4 * ABCest ) <= ABest );
414 /*
415 *  BLACS topologies are enforced iff N and K are strictly greater than the
416 *  logical block size returned by pilaenv_. Otherwise, it is assumed that the
417 *  routine calling this routine has already selected an adequate topology.
418 */
419    nb       = pilaenv_( &ctxt, C2F_CHAR( &type->type ) );
420    ForceTop = ( ( *N > nb ) && ( *K > nb ) );
421 
422    if( ChooseABC )
423    {
424       if( notran )
425       {
426          OpC  = CBCAST;
427          ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
428 
429          if( ForceTop )
430          {
431             OpR  = CCOMBINE;
432             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
433 
434             rtopsave = rtop;
435             ctopsave = ctop;
436 
437             if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
438             else        { TopR = CTOP_DRING; TopC = CTOP_IRING; }
439 
440             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
441             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    &TopR );
442 /*
443 *  Remove the next line when the BLACS combine operations support ring
444 *  topologies
445 */
446             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_DEFAULT );
447          }
448 
449          DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
450       }
451       else
452       {
453          OpR  = CBCAST;
454          rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
455 
456          if( ForceTop )
457          {
458             OpC  = CCOMBINE;
459             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
460 
461             rtopsave = rtop;
462             ctopsave = ctop;
463 
464             if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
465             else        { TopR = CTOP_DRING; TopC = CTOP_IRING; }
466 
467             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    &TopR );
468             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
469 /*
470 *  Remove the next line when the BLACS combine operations support ring
471 *  topologies
472 */
473             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
474          }
475 
476          DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
477       }
478 
479       PB_Cpsyr2kAC( type, &DirA, CONJG,   &UploC, ( notran ? NOTRAN : COTRAN ),
480                     *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad,
481                     ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci,
482                     Cj, Cd );
483    }
484    else
485    {
486       if( notran )
487       {
488          OpR  = CBCAST;
489          rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
490 
491          if( ForceTop )
492          {
493             OpC  = CBCAST;
494             ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
495 
496             rtopsave = rtop;
497             ctopsave = ctop;
498 /*
499 *  No clear winner for the ring topologies, so that if a ring topology is
500 *  already selected, keep it.
501 */
502             if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
503                 ( rtop != CTOP_SRING ) )
504                rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_SRING );
505             if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
506                 ( ctop != CTOP_SRING ) )
507                ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
508          }
509 
510          DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
511       }
512       else
513       {
514          OpC  = CBCAST;
515          ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
516 
517          if( ForceTop )
518          {
519             OpR  = CBCAST;
520             rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_GET );
521 
522             rtopsave = rtop;
523             ctopsave = ctop;
524 /*
525 *  No clear winner for the ring topologies, so that if a ring topology is
526 *  already selected, keep it.
527 */
528             if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
529                 ( rtop != CTOP_SRING ) )
530                rtop = *PB_Ctop( &ctxt, &OpR, ROW,    TOP_SRING );
531             if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
532                 ( ctop != CTOP_SRING ) )
533                ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
534          }
535 
536          DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
537       }
538 
539       PB_Cpsyr2kA( type, &DirA, CONJG,   &UploC, ( notran ? NOTRAN : COTRAN ),
540                    *N, *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad,
541                    ((char *)B), Bi, Bj, Bd, ((char *)BETA), ((char *)C), Ci, Cj,
542                    Cd );
543    }
544 /*
545 *  Restore the BLACS topologies when necessary.
546 */
547    if( ForceTop )
548    {
549       rtopsave = *PB_Ctop( &ctxt, &OpR, ROW,    &rtopsave );
550       ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave );
551    }
552 /*
553 *  End of PCHER2K
554 */
555 }
556