1 #include "Bdef.h"
2 
3 #ifdef T3EReductErr
4    #define DefCombTop '1'
5 #endif
6 
7 #if (INTFACE == C_CALL)
Csgamx2d(int ConTxt,char * scope,char * top,int m,int n,float * A,int lda,int * rA,int * cA,int ldia,int rdest,int cdest)8 void Csgamx2d(int ConTxt, char *scope, char *top, int m, int n, float *A,
9               int lda, int *rA, int *cA, int ldia, int rdest, int cdest)
10 #else
11 F_VOID_FUNC sgamx2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
12                      float *A, int *lda, int *rA, int *cA, int *ldia,
13                      int *rdest, int *cdest)
14 #endif
15 /*
16  *  -- V1.1 BLACS routine --
17  *  University of Tennessee, May 1, 1996
18  *  Written by Clint Whaley.
19  *
20  *  Purpose
21  *  =======
22  *  Combine amx operation for real rectangular matrices.
23  *
24  *  Arguments
25  *  =========
26  *
27  *  ConTxt  (input) Ptr to int
28  *          Index into MyConTxts00 (my contexts array).
29  *
30  *  SCOPE   (input) Ptr to char
31  *          Limit the scope of the operation.
32  *          = 'R' :   Operation is performed by a process row.
33  *          = 'C' :   Operation is performed by a process column.
34  *          = 'A' :   Operation is performed by all processes in grid.
35  *
36  *  TOP     (input) Ptr to char
37  *          Controls fashion in which messages flow within the operation.
38  *
39  *  M       (input) Ptr to int
40  *          The number of rows of the matrix A.  M >= 0.
41  *
42  *  N       (input) Ptr to int
43  *          The number of columns of the matrix A.  N >= 0.
44  *
45  *  A       (output) Ptr to real two dimensional array
46  *          The m by n matrix A.  Fortran77 (column-major) storage
47  *          assumed.
48  *
49  *  LDA     (input) Ptr to int
50  *          The leading dimension of the array A.  LDA >= M.
51  *
52  *  RA      (output) Integer Array, dimension (LDIA, N)
53  *          Contains process row that the amx of each element
54  *          of A was found on: i.e., rA(1,2) contains the process
55  *          row that the amx of A(1,2) was found on.
56  *          Values are left on process {rdest, cdest} only, others
57  *          may be modified, but not left with interesting data.
58  *          If rdest == -1, then result is left on all processes in scope.
59  *          If LDIA == -1, this array is not accessed, and need not exist.
60  *
61  *  CA      (output) Integer Array, dimension (LDIA, N)
62  *          Contains process column that the amx of each element
63  *          of A was found on: i.e., cA(1,2) contains the process
64  *          column that the max/min of A(1,2) was found on.
65  *          Values are left on process {rdest, cdest} only, others
66  *          may be modified, but not left with interesting data.
67  *          If rdest == -1, then result is left on all processes in scope.
68  *          If LDIA == -1, this array is not accessed, and need not exist.
69  *
70  *  LDIA    (input) Ptr to int
71  *          If (LDIA == -1), then the arrays RA and CA are not accessed.
72  *          ELSE leading dimension of the arrays RA and CA.  LDIA >= M.
73  *
74  *  RDEST   (input) Ptr to int
75  *          The process row of the destination of the amx.
76  *          If rdest == -1, then result is left on all processes in scope.
77  *
78  *  CDEST   (input) Ptr to int
79  *          The process column of the destination of the amx.
80  *          If rdest == -1, then CDEST ignored.
81  *
82  * ------------------------------------------------------------------------
83  */
84 {
85    void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
86                     int *, int *);
87    void BI_UpdateBuffs(BLACBUFF *);
88    BLACBUFF *BI_GetBuff(int);
89    int BI_BuffIsFree(BLACBUFF *, int);
90    BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
91                                    BI_MPI_Datatype, int *);
92    BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
93    BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
94                                    BI_MPI_Datatype, int *);
95    void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
96    void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
97                      int, int);
98    void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
99                     int, int);
100    void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
101    void BI_svvamx(int, char *, char *);
102    void BI_svvamx2(int, char *, char *);
103    void BI_sMPI_amx(void *, void *, int *, BI_MPI_Datatype *);
104    void BI_sMPI_amx2(void *, void *, int *, BI_MPI_Datatype *);
105 /*
106  *  Variable Declarations
107  */
108    VVFUNPTR vvop;
109    BLACBUFF *bp, *bp2;
110    BLACSCONTEXT *ctxt;
111    char ttop, tscope;
112    int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
113    int len[2];
114    BI_MPI_Aint disp[2];
115    BI_MPI_Datatype dtypes[2];
116    BI_MPI_Op BlacComb;
117    BI_MPI_Datatype MyType;
118    BI_DistType *dist, mydist;
119    extern BLACBUFF *BI_ActiveQ;
120    extern BLACBUFF BI_AuxBuff;
121 
122    MGetConTxt(Mpval(ConTxt), ctxt);
123    ttop = F2C_CharTrans(top);
124    ttop = Mlowcase(ttop);
125    tscope = F2C_CharTrans(scope);
126    tscope = Mlowcase(tscope);
127 /*
128  *  If the user has set the default combine topology, use it instead of
129  *  BLACS default
130  */
131 #ifdef DefCombTop
132    if (ttop == ' ') ttop = DefCombTop;
133 #endif
134    if (Mpval(cdest) == -1) trdest = -1;
135    else trdest = Mpval(rdest);
136 #if (BlacsDebugLvl > 0)
137    BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
138                Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
139    if (Mpval(ldia) < Mpval(m))
140    {
141       if (Mpval(ldia) != -1)
142          BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
143                       "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
144                       Mpval(m));
145    }
146 #endif
147    if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
148    else tlda = Mpval(m);
149    if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
150    else tldia = Mpval(ldia);
151    switch(tscope)
152    {
153    case 'r':
154       ctxt->scp = &ctxt->rscp;
155       if (trdest == -1) dest = -1;
156       else dest = Mpval(cdest);
157       break;
158    case 'c':
159       ctxt->scp = &ctxt->cscp;
160       dest = trdest;
161       break;
162    case 'a':
163       ctxt->scp = &ctxt->ascp;
164       if (trdest == -1) dest = -1;
165       else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
166       break;
167    default:
168       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
169                   tscope);
170    }
171 
172 
173 /*
174  * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
175  * topology if we've got one
176  */
177    if (ttop == ' ')
178       if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat) ) ttop = '1';
179    N = Mpval(m) * Mpval(n);
180 /*
181  * If process who has amx is to be communicated, must set up distance
182  * vector after value vector
183  */
184    if (Mpval(ldia) != -1)
185    {
186       vvop = BI_svvamx;
187       length = N * sizeof(float);
188       i = length % sizeof(BI_DistType);  /* ensure dist vec aligned correctly */
189       if (i) length += sizeof(BI_DistType) - i;
190       idist = length;
191       length += N * sizeof(BI_DistType);
192 /*
193  *    For performance, insist second buffer is at least 8-byte aligned
194  */
195       j = 8;
196       if (sizeof(float) > j) j = sizeof(float);
197       i = length % j;
198       if (i) length += j - i;
199       i = 2 * length;
200 
201       bp = BI_GetBuff(i);
202       bp2 = &BI_AuxBuff;
203       bp2->Buff = &bp->Buff[length];
204       BI_smvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
205 /*
206  *    Fill in distance vector
207  */
208       if (dest == -1) mydist = ctxt->scp->Iam;
209       else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
210       dist = (BI_DistType *) &bp->Buff[idist];
211       for (i=0; i < N; i++) dist[i] = mydist;
212 
213 /*
214  *    Create the MPI datatype holding both user's buffer and distance vector
215  */
216       len[0] = len[1] = N;
217       disp[0] = 0;
218       disp[1] = idist;
219       dtypes[0] = BI_MPI_FLOAT;
220       dtypes[1] = BI_MpiDistType;
221 #ifdef ZeroByteTypeBug
222       if (N > 0)
223       {
224 #endif
225       i = 2;
226       BI_MPI_Type_struct(i, len, disp, dtypes, &MyType, ierr);
227       BI_MPI_Type_commit(&MyType, ierr);
228       bp->N = bp2->N = 1;
229       bp->dtype = bp2->dtype = MyType;
230 #ifdef ZeroByteTypeBug
231       }
232       else
233       {
234          bp->N = bp2->N = 0;
235          bp->dtype = bp2->dtype = BI_MPI_INT;
236       }
237 #endif
238    }
239    else
240    {
241       vvop = BI_svvamx2;
242       length = N * sizeof(float);
243 /*
244  *    If A is contiguous, we can use it as one of our buffers
245  */
246       if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
247       {
248          bp = &BI_AuxBuff;
249          bp->Buff = (char *) A;
250          bp2 = BI_GetBuff(length);
251       }
252       else
253       {
254          bp = BI_GetBuff(length*2);
255          bp2 = &BI_AuxBuff;
256          bp2->Buff = &bp->Buff[length];
257          BI_smvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
258       }
259       bp->N = bp2->N = N;
260       bp->dtype = bp2->dtype = BI_MPI_FLOAT;
261    }
262 
263    switch(ttop)
264    {
265    case ' ':         /* use MPI's reduction by default */
266       i = 1;
267       if (Mpval(ldia) == -1)
268       {
269          BI_MPI_Op_create(BI_sMPI_amx2, i, &BlacComb, ierr);
270       }
271       else
272       {
273          BI_MPI_Op_create(BI_sMPI_amx, i, &BlacComb, ierr);
274          BI_AuxBuff.Len = N;  /* set this up for the MPI OP wrappers */
275       }
276 
277       if (trdest != -1)
278       {
279          BI_MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
280 	 	       ctxt->scp->comm, ierr);
281          if (ctxt->scp->Iam == dest)
282 	 {
283 	    BI_svmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
284 	    if (Mpval(ldia) != -1)
285                BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
286                             (BI_DistType *) &bp2->Buff[idist],
287 			    trdest, Mpval(cdest));
288 	 }
289       }
290       else
291       {
292          BI_MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
293 		          ctxt->scp->comm, ierr);
294 	 BI_svmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
295          if (Mpval(ldia) != -1)
296             BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
297                          (BI_DistType *) &bp2->Buff[idist],
298                          trdest, Mpval(cdest));
299       }
300       BI_MPI_Op_free(&BlacComb, ierr);
301       if (Mpval(ldia) != -1)
302 #ifdef ZeroByteTypeBug
303          if (N > 0)
304 #endif
305          BI_MPI_Type_free(&MyType, ierr);
306       if (BI_ActiveQ) BI_UpdateBuffs(NULL);
307       return;
308       break;
309    case 'i':
310       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
311       break;
312    case 'd':
313       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
314       break;
315    case 's':
316       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
317       break;
318    case 'm':
319       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
320       break;
321    case '1':
322    case '2':
323    case '3':
324    case '4':
325    case '5':
326    case '6':
327    case '7':
328    case '8':
329    case '9':
330       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
331       break;
332    case 'f':
333       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
334       break;
335    case 't':
336       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
337       break;
338    case 'h':
339 /*
340  *    Use bidirectional exchange if everyone wants answer
341  */
342       if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
343          BI_BeComb(ctxt, bp, bp2, N, vvop);
344       else
345          BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
346       break;
347    default :
348       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
349                   ttop);
350    }
351 
352    if (Mpval(ldia) != -1)
353 #ifdef ZeroByteTypeBug
354       if (N > 0)
355 #endif
356       BI_MPI_Type_free(&MyType, ierr);
357 /*
358  * If I am selected to receive answer
359  */
360    if ( (ctxt->scp->Iam == dest) || (dest == -1) )
361    {
362 /*
363  *    Translate the distances stored in the latter part of bp->Buff into
364  *    process grid coordinates, and output these coordinates in the
365  *    arrays rA and cA.
366  */
367       if (Mpval(ldia) != -1)
368          BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
369                       dist, trdest, Mpval(cdest));
370 /*
371  *    Unpack the amx array
372  */
373       if (bp != &BI_AuxBuff) BI_svmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
374    }
375 }
376