1 #include "Bdef.h"
2
3 #ifdef T3EReductErr
4 #define DefCombTop '1'
5 #endif
6
7 #if (INTFACE == C_CALL)
Cigamn2d(int ConTxt,char * scope,char * top,int m,int n,int * A,int lda,int * rA,int * cA,int ldia,int rdest,int cdest)8 void Cigamn2d(int ConTxt, char *scope, char *top, int m, int n, int *A,
9 int lda, int *rA, int *cA, int ldia, int rdest, int cdest)
10 #else
11 F_VOID_FUNC igamn2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
12 int *A, int *lda, int *rA, int *cA, int *ldia,
13 int *rdest, int *cdest)
14 #endif
15 /*
16 * -- V1.1 BLACS routine --
17 * University of Tennessee, May 1, 1996
18 * Written by Clint Whaley.
19 *
20 * Purpose
21 * =======
22 * Combine amn operation for integer rectangular matrices.
23 *
24 * Arguments
25 * =========
26 *
27 * ConTxt (input) Ptr to int
28 * Index into MyConTxts00 (my contexts array).
29 *
30 * SCOPE (input) Ptr to char
31 * Limit the scope of the operation.
32 * = 'R' : Operation is performed by a process row.
33 * = 'C' : Operation is performed by a process column.
34 * = 'A' : Operation is performed by all processes in grid.
35 *
36 * TOP (input) Ptr to char
37 * Controls fashion in which messages flow within the operation.
38 *
39 * M (input) Ptr to int
40 * The number of rows of the matrix A. M >= 0.
41 *
42 * N (input) Ptr to int
43 * The number of columns of the matrix A. N >= 0.
44 *
45 * A (output) Ptr to integer two dimensional array
46 * The m by n matrix A. Fortran77 (column-major) storage
47 * assumed.
48 *
49 * LDA (input) Ptr to int
50 * The leading dimension of the array A. LDA >= M.
51 *
52 * RA (output) Integer Array, dimension (LDIA, N)
53 * Contains process row that the amn of each element
54 * of A was found on: i.e., rA(1,2) contains the process
55 * row that the amn of A(1,2) was found on.
56 * Values are left on process {rdest, cdest} only, others
57 * may be modified, but not left with interesting data.
58 * If rdest == -1, then result is left on all processes in scope.
59 * If LDIA == -1, this array is not accessed, and need not exist.
60 *
61 * CA (output) Integer Array, dimension (LDIA, N)
62 * Contains process column that the amn of each element
63 * of A was found on: i.e., cA(1,2) contains the process
64 * column that the max/min of A(1,2) was found on.
65 * Values are left on process {rdest, cdest} only, others
66 * may be modified, but not left with interesting data.
67 * If rdest == -1, then result is left on all processes in scope.
68 * If LDIA == -1, this array is not accessed, and need not exist.
69 *
70 * LDIA (input) Ptr to int
71 * If (LDIA == -1), then the arrays RA and CA are not accessed.
72 * ELSE leading dimension of the arrays RA and CA. LDIA >= M.
73 *
74 * RDEST (input) Ptr to int
75 * The process row of the destination of the amn.
76 * If rdest == -1, then result is left on all processes in scope.
77 *
78 * CDEST (input) Ptr to int
79 * The process column of the destination of the amn.
80 * If rdest == -1, then CDEST ignored.
81 *
82 * ------------------------------------------------------------------------
83 */
84 {
85 void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
86 int *, int *);
87 void BI_UpdateBuffs(BLACBUFF *);
88 BLACBUFF *BI_GetBuff(int);
89 int BI_BuffIsFree(BLACBUFF *, int);
90 BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
91 BI_MPI_Datatype, int *);
92 BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
93 BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
94 BI_MPI_Datatype, int *);
95 void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
96 void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
97 int, int);
98 void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
99 int, int);
100 void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
101 void BI_ivvamn(int, char *, char *);
102 void BI_ivvamn2(int, char *, char *);
103 void BI_iMPI_amn(void *, void *, int *, BI_MPI_Datatype *);
104 void BI_iMPI_amn2(void *, void *, int *, BI_MPI_Datatype *);
105 /*
106 * Variable Declarations
107 */
108 VVFUNPTR vvop;
109 BLACBUFF *bp, *bp2;
110 BLACSCONTEXT *ctxt;
111 char ttop, tscope;
112 int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
113 int len[2];
114 BI_MPI_Aint disp[2];
115 BI_MPI_Datatype dtypes[2];
116 BI_MPI_Op BlacComb;
117 BI_MPI_Datatype MyType;
118 BI_DistType *dist, mydist;
119 extern BLACBUFF *BI_ActiveQ;
120 extern BLACBUFF BI_AuxBuff;
121
122 MGetConTxt(Mpval(ConTxt), ctxt);
123 ttop = F2C_CharTrans(top);
124 ttop = Mlowcase(ttop);
125 tscope = F2C_CharTrans(scope);
126 tscope = Mlowcase(tscope);
127 /*
128 * If the user has set the default combine topology, use it instead of
129 * BLACS default
130 */
131 #ifdef DefCombTop
132 if (ttop == ' ') ttop = DefCombTop;
133 #endif
134 if (Mpval(cdest) == -1) trdest = -1;
135 else trdest = Mpval(rdest);
136 #if (BlacsDebugLvl > 0)
137 BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
138 Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
139 if (Mpval(ldia) < Mpval(m))
140 {
141 if (Mpval(ldia) != -1)
142 BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
143 "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
144 Mpval(m));
145 }
146 #endif
147 if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
148 else tlda = Mpval(m);
149 if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
150 else tldia = Mpval(ldia);
151 switch(tscope)
152 {
153 case 'r':
154 ctxt->scp = &ctxt->rscp;
155 if (trdest == -1) dest = -1;
156 else dest = Mpval(cdest);
157 break;
158 case 'c':
159 ctxt->scp = &ctxt->cscp;
160 dest = trdest;
161 break;
162 case 'a':
163 ctxt->scp = &ctxt->ascp;
164 if (trdest == -1) dest = -1;
165 else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
166 break;
167 default:
168 BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
169 tscope);
170 }
171
172
173 /*
174 * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
175 * topology if we've got one
176 */
177 if (ttop == ' ') if ( (Mpval(m) < 1) || (Mpval(n) < 1) ) ttop = '1';
178 N = Mpval(m) * Mpval(n);
179 /*
180 * If process who has amn is to be communicated, must set up distance
181 * vector after value vector
182 */
183 if (Mpval(ldia) != -1)
184 {
185 vvop = BI_ivvamn;
186 length = N * sizeof(int);
187 i = length % sizeof(BI_DistType); /* ensure dist vec aligned correctly */
188 if (i) length += sizeof(BI_DistType) - i;
189 idist = length;
190 length += N * sizeof(BI_DistType);
191 /*
192 * For performance, insist second buffer is at least 8-byte aligned
193 */
194 j = 8;
195 if (sizeof(int) > j) j = sizeof(int);
196 i = length % j;
197 if (i) length += j - i;
198 i = 2 * length;
199
200 bp = BI_GetBuff(i);
201 bp2 = &BI_AuxBuff;
202 bp2->Buff = &bp->Buff[length];
203 BI_imvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
204 /*
205 * Fill in distance vector
206 */
207 if (dest == -1) mydist = ctxt->scp->Iam;
208 else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
209 dist = (BI_DistType *) &bp->Buff[idist];
210 for (i=0; i < N; i++) dist[i] = mydist;
211
212 /*
213 * Create the MPI datatype holding both user's buffer and distance vector
214 */
215 len[0] = len[1] = N;
216 disp[0] = 0;
217 disp[1] = idist;
218 dtypes[0] = BI_MPI_INT;
219 dtypes[1] = BI_MpiDistType;
220 #ifdef ZeroByteTypeBug
221 if (N > 0)
222 {
223 #endif
224 i = 2;
225 BI_MPI_Type_struct(i, len, disp, dtypes, &MyType, ierr);
226 BI_MPI_Type_commit(&MyType, ierr);
227 bp->N = bp2->N = 1;
228 bp->dtype = bp2->dtype = MyType;
229 #ifdef ZeroByteTypeBug
230 }
231 else
232 {
233 bp->N = bp2->N = 0;
234 bp->dtype = bp2->dtype = BI_MPI_INT;
235 }
236 #endif
237 }
238 else
239 {
240 vvop = BI_ivvamn2;
241 length = N * sizeof(int);
242 /*
243 * If A is contiguous, we can use it as one of our buffers
244 */
245 if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
246 {
247 bp = &BI_AuxBuff;
248 bp->Buff = (char *) A;
249 bp2 = BI_GetBuff(length);
250 }
251 else
252 {
253 bp = BI_GetBuff(length*2);
254 bp2 = &BI_AuxBuff;
255 bp2->Buff = &bp->Buff[length];
256 BI_imvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
257 }
258 bp->N = bp2->N = N;
259 bp->dtype = bp2->dtype = BI_MPI_INT;
260 }
261
262 switch(ttop)
263 {
264 case ' ': /* use MPI's reduction by default */
265 i = 1;
266 if (Mpval(ldia) == -1)
267 {
268 BI_MPI_Op_create(BI_iMPI_amn2, i, &BlacComb, ierr);
269 }
270 else
271 {
272 BI_MPI_Op_create(BI_iMPI_amn, i, &BlacComb, ierr);
273 BI_AuxBuff.Len = N; /* set this up for the MPI OP wrappers */
274 }
275
276 if (trdest != -1)
277 {
278 BI_MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
279 ctxt->scp->comm, ierr);
280 if (ctxt->scp->Iam == dest)
281 {
282 BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
283 if (Mpval(ldia) != -1)
284 BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
285 (BI_DistType *) &bp2->Buff[idist],
286 trdest, Mpval(cdest));
287 }
288 }
289 else
290 {
291 BI_MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
292 ctxt->scp->comm, ierr);
293 BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
294 if (Mpval(ldia) != -1)
295 BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
296 (BI_DistType *) &bp2->Buff[idist],
297 trdest, Mpval(cdest));
298 }
299 BI_MPI_Op_free(&BlacComb, ierr);
300 if (Mpval(ldia) != -1)
301 #ifdef ZeroByteTypeBug
302 if (N > 0)
303 #endif
304 BI_MPI_Type_free(&MyType, ierr);
305 if (BI_ActiveQ) BI_UpdateBuffs(NULL);
306 return;
307 break;
308 case 'i':
309 BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
310 break;
311 case 'd':
312 BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
313 break;
314 case 's':
315 BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
316 break;
317 case 'm':
318 BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
319 break;
320 case '1':
321 case '2':
322 case '3':
323 case '4':
324 case '5':
325 case '6':
326 case '7':
327 case '8':
328 case '9':
329 BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
330 break;
331 case 'f':
332 BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
333 break;
334 case 't':
335 BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
336 break;
337 case 'h':
338 /*
339 * Use bidirectional exchange if everyone wants answer
340 */
341 if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
342 BI_BeComb(ctxt, bp, bp2, N, vvop);
343 else
344 BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
345 break;
346 default :
347 BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
348 ttop);
349 }
350
351 if (Mpval(ldia) != -1)
352 #ifdef ZeroByteTypeBug
353 if (N > 0)
354 #endif
355 BI_MPI_Type_free(&MyType, ierr);
356 /*
357 * If I am selected to receive answer
358 */
359 if ( (ctxt->scp->Iam == dest) || (dest == -1) )
360 {
361 /*
362 * Translate the distances stored in the latter part of bp->Buff into
363 * process grid coordinates, and output these coordinates in the
364 * arrays rA and cA.
365 */
366 if (Mpval(ldia) != -1)
367 BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
368 dist, trdest, Mpval(cdest));
369 /*
370 * Unpack the amn array
371 */
372 if (bp != &BI_AuxBuff) BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
373 }
374 }
375