1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
pcgeadd_(F_CHAR_T TRANS,int * M,int * N,float * ALPHA,float * A,int * IA,int * JA,int * DESCA,float * BETA,float * C,int * IC,int * JC,int * DESCC)20 void pcgeadd_( F_CHAR_T TRANS, int * M, int * N,
21                float * ALPHA,
22                float * A, int * IA, int * JA, int * DESCA,
23                float * BETA,
24                float * C, int * IC, int * JC, int * DESCC )
25 #else
26 void pcgeadd_( TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC )
27 /*
28 *  .. Scalar Arguments ..
29 */
30    F_CHAR_T       TRANS;
31    int            * IA, * IC, * JA, * JC, * M, * N;
32    float          * ALPHA, * BETA;
33 /*
34 *  .. Array Arguments ..
35 */
36    int            * DESCA, * DESCC;
37    float          * A, * C;
38 #endif
39 {
40 /*
41 *  Purpose
42 *  =======
43 *
44 *  PCGEADD  adds a matrix to another
45 *
46 *     sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )
47 *
48 *  where
49 *
50 *     sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1),  and, op( X )  is one  of
51 *
52 *     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ).
53 *
54 *  Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1)   if TRANS = 'N',
55 *                               A(IA:IA+N-1,JA:JA+M-1)'  if TRANS = 'T',
56 *                        conjg(A(IA:IA+N-1,JA:JA+M-1)')  if TRANS = 'C'.
57 *
58 *  Alpha  and  beta  are scalars, sub( C ) and op( sub( A ) ) are m by n
59 *  submatrices.
60 *
61 *  Notes
62 *  =====
63 *
64 *  A description  vector  is associated with each 2D block-cyclicly dis-
65 *  tributed matrix.  This  vector  stores  the  information  required to
66 *  establish the  mapping  between a  matrix entry and its corresponding
67 *  process and memory location.
68 *
69 *  In  the  following  comments,   the character _  should  be  read  as
70 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
71 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
72 *
73 *  NOTATION         STORED IN       EXPLANATION
74 *  ---------------- --------------- ------------------------------------
75 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
76 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
77 *                                   the NPROW x NPCOL BLACS process grid
78 *                                   A  is  distributed over. The context
79 *                                   itself  is  global,  but  the handle
80 *                                   (the integer value) may vary.
81 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
82 *                                   ted matrix A, M_A >= 0.
83 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
84 *                                   buted matrix A, N_A >= 0.
85 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
86 *                                   block of the matrix A, IMB_A > 0.
87 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
88 *                                   left   block   of   the  matrix   A,
89 *                                   INB_A > 0.
90 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
91 *                                   bute the last  M_A-IMB_A  rows of A,
92 *                                   MB_A > 0.
93 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
94 *                                   bute the last  N_A-INB_A  columns of
95 *                                   A, NB_A > 0.
96 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
97 *                                   row of the matrix  A is distributed,
98 *                                   NPROW > RSRC_A >= 0.
99 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
100 *                                   first column of  A  is  distributed.
101 *                                   NPCOL > CSRC_A >= 0.
102 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
103 *                                   array  storing  the  local blocks of
104 *                                   the distributed matrix A,
105 *                                   IF( Lc( 1, N_A ) > 0 )
106 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
107 *                                   ELSE
108 *                                      LLD_A >= 1.
109 *
110 *  Let K be the number of  rows of a matrix A starting at the global in-
111 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
112 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
113 *  receive if these K rows were distributed over NPROW processes.  If  K
114 *  is the number of columns of a matrix  A  starting at the global index
115 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
116 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
117 *  these K columns were distributed over NPCOL processes.
118 *
119 *  The values of Lr() and Lc() may be determined via a call to the func-
120 *  tion PB_Cnumroc:
121 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
122 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
123 *
124 *  Arguments
125 *  =========
126 *
127 *  TRANS   (global input) CHARACTER*1
128 *          On entry,  TRANS   specifies the form of op( sub( A ) ) to be
129 *          used in the matrix addition as follows:
130 *
131 *             TRANS = 'N' or 'n'   op( sub( A ) ) = sub( A ),
132 *
133 *             TRANS = 'T' or 't'   op( sub( A ) ) = sub( A )',
134 *
135 *             TRANS = 'C' or 'c'   op( sub( A ) ) = conjg( sub( A )' ).
136 *
137 *  M       (global input) INTEGER
138 *          On entry,  M  specifies the number of rows of  the  submatrix
139 *          sub( C ) and the number of columns of the submatrix sub( A ).
140 *          M  must be at least zero.
141 *
142 *  N       (global input) INTEGER
143 *          On entry, N  specifies the number of columns of the submatrix
144 *          sub( C ) and the number of rows of the submatrix sub( A ).  N
145 *          must be at least zero.
146 *
147 *  ALPHA   (global input) COMPLEX
148 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
149 *          supplied  as  zero  then  the  local entries of  the array  A
150 *          corresponding to the entries of the submatrix  sub( A )  need
151 *          not be set on input.
152 *
153 *  A       (local input) COMPLEX array
154 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
155 *          at least Lc( 1, JA+M-1 ).  Before  entry, this array contains
156 *          the local entries of the matrix A.
157 *
158 *  IA      (global input) INTEGER
159 *          On entry, IA  specifies A's global row index, which points to
160 *          the beginning of the submatrix sub( A ).
161 *
162 *  JA      (global input) INTEGER
163 *          On entry, JA  specifies A's global column index, which points
164 *          to the beginning of the submatrix sub( A ).
165 *
166 *  DESCA   (global and local input) INTEGER array
167 *          On entry, DESCA  is an integer array of dimension DLEN_. This
168 *          is the array descriptor for the matrix A.
169 *
170 *  BETA    (global input) COMPLEX
171 *          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
172 *          supplied  as  zero  then  the  local entries of  the array  C
173 *          corresponding to the entries of the submatrix  sub( C )  need
174 *          not be set on input.
175 *
176 *  C       (local input/local output) COMPLEX array
177 *          On entry, C is an array of dimension (LLD_C, Kc), where Kc is
178 *          at least Lc( 1, JC+N-1 ).  Before  entry, this array contains
179 *          the local entries of the matrix C.
180 *          On exit, the entries of this array corresponding to the local
181 *          entries of the submatrix  sub( C )  are  overwritten  by  the
182 *          local entries of the m by n updated submatrix.
183 *
184 *  IC      (global input) INTEGER
185 *          On entry, IC  specifies C's global row index, which points to
186 *          the beginning of the submatrix sub( C ).
187 *
188 *  JC      (global input) INTEGER
189 *          On entry, JC  specifies C's global column index, which points
190 *          to the beginning of the submatrix sub( C ).
191 *
192 *  DESCC   (global and local input) INTEGER array
193 *          On entry, DESCC  is an integer array of dimension DLEN_. This
194 *          is the array descriptor for the matrix C.
195 *
196 *  -- Written on April 1, 1998 by
197 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
198 *
199 *  ---------------------------------------------------------------------
200 */
201 /*
202 *  .. Local Scalars ..
203 */
204    char           DirA, DirC, ctop, rtop;
205    int            Ai, Aj, Ci, Cj, TrA, ctxt, info, mycol, myrow, npcol, nprow,
206                   notran;
207 /*
208 *  .. Local Arrays ..
209 */
210    int            Ad[DLEN_], Cd[DLEN_];
211 /* ..
212 *  .. Executable Statements ..
213 *
214 */
215    notran = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
216    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
217    PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
218 #ifndef NO_ARGCHK
219 /*
220 *  Test the input parameters
221 */
222    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
223    if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
224    {
225       if( ( !notran ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) )
226       {
227          PB_Cwarn( ctxt, __LINE__, "PCGEADD", "Illegal TRANS = %c\n", TrA );
228          info = -1;
229       }
230       if( notran )
231          PB_Cchkmat( ctxt, "PCGEADD", "A", *M, 2, *N, 3, Ai, Aj, Ad,  8,
232                      &info );
233       else
234          PB_Cchkmat( ctxt, "PCGEADD", "A", *N, 3, *M, 2, Ai, Aj, Ad,  8,
235                      &info );
236       PB_Cchkmat( ctxt, "PCGEADD", "C", *M, 2, *N, 3, Ci, Cj, Cd, 13, &info );
237    }
238    if( info ) { PB_Cabort( ctxt, "PCGEADD", info ); return; }
239 #endif
240 /*
241 *  Quick return if possible
242 */
243    if( ( *M == 0 ) || ( *N == 0 ) ||
244        ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) &&
245          ( BETA [REAL_PART] ==  ONE && BETA [IMAG_PART] == ZERO ) ) )
246       return;
247 /*
248 *  And when alpha is zero
249 */
250    if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) )
251    {
252       if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
253       {
254          PB_Cplapad( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA),
255                      ((char *)BETA), ((char *) C), Ci, Cj, Cd );
256       }
257       else
258       {
259          PB_Cplascal( PB_Cctypeset(), ALL, NOCONJG, *M, *N, ((char *)BETA),
260                       ((char * )C), Ci, Cj, Cd );
261       }
262       return;
263    }
264 /*
265 *  Start the operations
266 */
267 /*
268 *  This operation mainly involves point-to-point send and receive communication.
269 *  There is therefore no particular BLACS topology to recommend. Still, one can
270 *  choose the main loop direction in which the operands will be added. This
271 *  selection is based on the current setting for the BLACS broadcast operations.
272 */
273    if( notran )
274    {
275       rtop = *PB_Ctop( &ctxt, BCAST, ROW,    TOP_GET );
276       ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
277 
278       if( *M <= *N )
279       {
280          DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
281          DirC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
282       }
283       else
284       {
285          DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
286          DirC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
287       }
288       PB_Cpgeadd( PB_Cctypeset(), &DirA, &DirC, NOCONJG, *M, *N,
289                   ((char *) ALPHA), ((char *) A), Ai, Aj, Ad,
290                   ((char *)  BETA), ((char *) C), Ci, Cj, Cd );
291    }
292    else if( TrA == CTRAN )
293    {
294       PB_Cptran( PB_Cctypeset(), NOCONJG, *M, *N, ((char *) ALPHA),
295                  ((char *) A), Ai, Aj, Ad, ((char *)  BETA), ((char *) C),
296                  Ci, Cj, Cd );
297    }
298    else
299    {
300       PB_Cptran( PB_Cctypeset(), CONJG,   *M, *N, ((char *) ALPHA),
301                  ((char *) A), Ai, Aj, Ad, ((char *)  BETA), ((char *) C),
302                  Ci, Cj, Cd );
303    }
304 /*
305 *  End of PCGEADD
306 */
307 }
308