1 #include "Bdef.h"
2 
3 #ifdef T3EReductErr
4    #define DefCombTop '1'
5 #endif
6 
7 #if (INTFACE == C_CALL)
Cigamn2d(int ConTxt,char * scope,char * top,int m,int n,int * A,int lda,int * rA,int * cA,int ldia,int rdest,int cdest)8 void Cigamn2d(int ConTxt, char *scope, char *top, int m, int n, int *A,
9               int lda, int *rA, int *cA, int ldia, int rdest, int cdest)
10 #else
11 F_VOID_FUNC igamn2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
12                      int *A, int *lda, int *rA, int *cA, int *ldia,
13                      int *rdest, int *cdest)
14 #endif
15 /*
16  *  -- V1.1 BLACS routine --
17  *  University of Tennessee, May 1, 1996
18  *  Written by Clint Whaley.
19  *
20  *  Purpose
21  *  =======
22  *  Combine amn operation for integer rectangular matrices.
23  *
24  *  Arguments
25  *  =========
26  *
27  *  ConTxt  (input) Ptr to int
28  *          Index into MyConTxts00 (my contexts array).
29  *
30  *  SCOPE   (input) Ptr to char
31  *          Limit the scope of the operation.
32  *          = 'R' :   Operation is performed by a process row.
33  *          = 'C' :   Operation is performed by a process column.
34  *          = 'A' :   Operation is performed by all processes in grid.
35  *
36  *  TOP     (input) Ptr to char
37  *          Controls fashion in which messages flow within the operation.
38  *
39  *  M       (input) Ptr to int
40  *          The number of rows of the matrix A.  M >= 0.
41  *
42  *  N       (input) Ptr to int
43  *          The number of columns of the matrix A.  N >= 0.
44  *
45  *  A       (output) Ptr to integer two dimensional array
46  *          The m by n matrix A.  Fortran77 (column-major) storage
47  *          assumed.
48  *
49  *  LDA     (input) Ptr to int
50  *          The leading dimension of the array A.  LDA >= M.
51  *
52  *  RA      (output) Integer Array, dimension (LDIA, N)
53  *          Contains process row that the amn of each element
54  *          of A was found on: i.e., rA(1,2) contains the process
55  *          row that the amn of A(1,2) was found on.
56  *          Values are left on process {rdest, cdest} only, others
57  *          may be modified, but not left with interesting data.
58  *          If rdest == -1, then result is left on all processes in scope.
59  *          If LDIA == -1, this array is not accessed, and need not exist.
60  *
61  *  CA      (output) Integer Array, dimension (LDIA, N)
62  *          Contains process column that the amn of each element
63  *          of A was found on: i.e., cA(1,2) contains the process
64  *          column that the max/min of A(1,2) was found on.
65  *          Values are left on process {rdest, cdest} only, others
66  *          may be modified, but not left with interesting data.
67  *          If rdest == -1, then result is left on all processes in scope.
68  *          If LDIA == -1, this array is not accessed, and need not exist.
69  *
70  *  LDIA    (input) Ptr to int
71  *          If (LDIA == -1), then the arrays RA and CA are not accessed.
72  *          ELSE leading dimension of the arrays RA and CA.  LDIA >= M.
73  *
74  *  RDEST   (input) Ptr to int
75  *          The process row of the destination of the amn.
76  *          If rdest == -1, then result is left on all processes in scope.
77  *
78  *  CDEST   (input) Ptr to int
79  *          The process column of the destination of the amn.
80  *          If rdest == -1, then CDEST ignored.
81  *
82  * ------------------------------------------------------------------------
83  */
84 {
85    void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
86                     int *, int *);
87    void BI_UpdateBuffs(BLACBUFF *);
88    BLACBUFF *BI_GetBuff(int);
89    int BI_BuffIsFree(BLACBUFF *, int);
90    BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
91                                    BI_MPI_Datatype, int *);
92    BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
93    BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
94                                    BI_MPI_Datatype, int *);
95    void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
96    void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
97                      int, int);
98    void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
99                     int, int);
100    void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
101    void BI_ivvamn(int, char *, char *);
102    void BI_ivvamn2(int, char *, char *);
103    void BI_iMPI_amn(void *, void *, int *, BI_MPI_Datatype *);
104    void BI_iMPI_amn2(void *, void *, int *, BI_MPI_Datatype *);
105 /*
106  *  Variable Declarations
107  */
108    VVFUNPTR vvop;
109    BLACBUFF *bp, *bp2;
110    BLACSCONTEXT *ctxt;
111    char ttop, tscope;
112    int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
113    int len[2];
114    BI_MPI_Aint disp[2];
115    BI_MPI_Datatype dtypes[2];
116    BI_MPI_Op BlacComb;
117    BI_MPI_Datatype MyType;
118    BI_DistType *dist, mydist;
119    extern BLACBUFF *BI_ActiveQ;
120    extern BLACBUFF BI_AuxBuff;
121 
122    MGetConTxt(Mpval(ConTxt), ctxt);
123    ttop = F2C_CharTrans(top);
124    ttop = Mlowcase(ttop);
125    tscope = F2C_CharTrans(scope);
126    tscope = Mlowcase(tscope);
127 /*
128  *  If the user has set the default combine topology, use it instead of
129  *  BLACS default
130  */
131 #ifdef DefCombTop
132    if (ttop == ' ') ttop = DefCombTop;
133 #endif
134    if (Mpval(cdest) == -1) trdest = -1;
135    else trdest = Mpval(rdest);
136 #if (BlacsDebugLvl > 0)
137    BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
138                Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
139    if (Mpval(ldia) < Mpval(m))
140    {
141       if (Mpval(ldia) != -1)
142          BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
143                       "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
144                       Mpval(m));
145    }
146 #endif
147    if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
148    else tlda = Mpval(m);
149    if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
150    else tldia = Mpval(ldia);
151    switch(tscope)
152    {
153    case 'r':
154       ctxt->scp = &ctxt->rscp;
155       if (trdest == -1) dest = -1;
156       else dest = Mpval(cdest);
157       break;
158    case 'c':
159       ctxt->scp = &ctxt->cscp;
160       dest = trdest;
161       break;
162    case 'a':
163       ctxt->scp = &ctxt->ascp;
164       if (trdest == -1) dest = -1;
165       else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
166       break;
167    default:
168       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
169                   tscope);
170    }
171 
172 
173 /*
174  * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
175  * topology if we've got one
176  */
177    if (ttop == ' ') if ( (Mpval(m) < 1) || (Mpval(n) < 1) ) ttop = '1';
178    N = Mpval(m) * Mpval(n);
179 /*
180  * If process who has amn is to be communicated, must set up distance
181  * vector after value vector
182  */
183    if (Mpval(ldia) != -1)
184    {
185       vvop = BI_ivvamn;
186       length = N * sizeof(int);
187       i = length % sizeof(BI_DistType);  /* ensure dist vec aligned correctly */
188       if (i) length += sizeof(BI_DistType) - i;
189       idist = length;
190       length += N * sizeof(BI_DistType);
191 /*
192  *    For performance, insist second buffer is at least 8-byte aligned
193  */
194       j = 8;
195       if (sizeof(int) > j) j = sizeof(int);
196       i = length % j;
197       if (i) length += j - i;
198       i = 2 * length;
199 
200       bp = BI_GetBuff(i);
201       bp2 = &BI_AuxBuff;
202       bp2->Buff = &bp->Buff[length];
203       BI_imvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
204 /*
205  *    Fill in distance vector
206  */
207       if (dest == -1) mydist = ctxt->scp->Iam;
208       else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
209       dist = (BI_DistType *) &bp->Buff[idist];
210       for (i=0; i < N; i++) dist[i] = mydist;
211 
212 /*
213  *    Create the MPI datatype holding both user's buffer and distance vector
214  */
215       len[0] = len[1] = N;
216       disp[0] = 0;
217       disp[1] = idist;
218       dtypes[0] = BI_MPI_INT;
219       dtypes[1] = BI_MpiDistType;
220 #ifdef ZeroByteTypeBug
221       if (N > 0)
222       {
223 #endif
224       i = 2;
225       BI_MPI_Type_struct(i, len, disp, dtypes, &MyType, ierr);
226       BI_MPI_Type_commit(&MyType, ierr);
227       bp->N = bp2->N = 1;
228       bp->dtype = bp2->dtype = MyType;
229 #ifdef ZeroByteTypeBug
230       }
231       else
232       {
233          bp->N = bp2->N = 0;
234          bp->dtype = bp2->dtype = BI_MPI_INT;
235       }
236 #endif
237    }
238    else
239    {
240       vvop = BI_ivvamn2;
241       length = N * sizeof(int);
242 /*
243  *    If A is contiguous, we can use it as one of our buffers
244  */
245       if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
246       {
247          bp = &BI_AuxBuff;
248          bp->Buff = (char *) A;
249          bp2 = BI_GetBuff(length);
250       }
251       else
252       {
253          bp = BI_GetBuff(length*2);
254          bp2 = &BI_AuxBuff;
255          bp2->Buff = &bp->Buff[length];
256          BI_imvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
257       }
258       bp->N = bp2->N = N;
259       bp->dtype = bp2->dtype = BI_MPI_INT;
260    }
261 
262    switch(ttop)
263    {
264    case ' ':         /* use MPI's reduction by default */
265       i = 1;
266       if (Mpval(ldia) == -1)
267       {
268          BI_MPI_Op_create(BI_iMPI_amn2, i, &BlacComb, ierr);
269       }
270       else
271       {
272          BI_MPI_Op_create(BI_iMPI_amn, i, &BlacComb, ierr);
273          BI_AuxBuff.Len = N;  /* set this up for the MPI OP wrappers */
274       }
275 
276       if (trdest != -1)
277       {
278          BI_MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
279 	 	       ctxt->scp->comm, ierr);
280          if (ctxt->scp->Iam == dest)
281 	 {
282 	    BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
283 	    if (Mpval(ldia) != -1)
284                BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
285                             (BI_DistType *) &bp2->Buff[idist],
286 			    trdest, Mpval(cdest));
287 	 }
288       }
289       else
290       {
291          BI_MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
292 		          ctxt->scp->comm, ierr);
293 	 BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
294          if (Mpval(ldia) != -1)
295             BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
296                          (BI_DistType *) &bp2->Buff[idist],
297                          trdest, Mpval(cdest));
298       }
299       BI_MPI_Op_free(&BlacComb, ierr);
300       if (Mpval(ldia) != -1)
301 #ifdef ZeroByteTypeBug
302          if (N > 0)
303 #endif
304          BI_MPI_Type_free(&MyType, ierr);
305       if (BI_ActiveQ) BI_UpdateBuffs(NULL);
306       return;
307       break;
308    case 'i':
309       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
310       break;
311    case 'd':
312       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
313       break;
314    case 's':
315       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
316       break;
317    case 'm':
318       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
319       break;
320    case '1':
321    case '2':
322    case '3':
323    case '4':
324    case '5':
325    case '6':
326    case '7':
327    case '8':
328    case '9':
329       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
330       break;
331    case 'f':
332       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
333       break;
334    case 't':
335       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
336       break;
337    case 'h':
338 /*
339  *    Use bidirectional exchange if everyone wants answer
340  */
341       if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
342          BI_BeComb(ctxt, bp, bp2, N, vvop);
343       else
344          BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
345       break;
346    default :
347       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
348                   ttop);
349    }
350 
351    if (Mpval(ldia) != -1)
352 #ifdef ZeroByteTypeBug
353       if (N > 0)
354 #endif
355       BI_MPI_Type_free(&MyType, ierr);
356 /*
357  * If I am selected to receive answer
358  */
359    if ( (ctxt->scp->Iam == dest) || (dest == -1) )
360    {
361 /*
362  *    Translate the distances stored in the latter part of bp->Buff into
363  *    process grid coordinates, and output these coordinates in the
364  *    arrays rA and cA.
365  */
366       if (Mpval(ldia) != -1)
367          BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
368                       dist, trdest, Mpval(cdest));
369 /*
370  *    Unpack the amn array
371  */
372       if (bp != &BI_AuxBuff) BI_ivmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
373    }
374 }
375