1 #include "Bdef.h"
2 
3 
4 #if (INTFACE == C_CALL)
Czgsum2d(int ConTxt,char * scope,char * top,int m,int n,double * A,int lda,int rdest,int cdest)5 void Czgsum2d(int ConTxt, char *scope, char *top, int m, int n, double *A,
6               int lda, int rdest, int cdest)
7 #else
8 F_VOID_FUNC zgsum2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
9                      double *A, int *lda, int *rdest, int *cdest)
10 #endif
11 /*
12  *  -- V1.1 BLACS routine --
13  *  University of Tennessee, May 1, 1996
14  *  Written by Clint Whaley.
15  *
16  *  Purpose
17  *  =======
18  *  Combine sum operation for double complex rectangular matrices.
19  *
20  *  Arguments
21  *  =========
22  *
23  *  ConTxt  (input) Ptr to int
24  *          Index into MyConTxts00 (my contexts array).
25  *
26  *  SCOPE   (input) Ptr to char
27  *          Limit the scope of the operation.
28  *          = 'R' :   Operation is performed by a process row.
29  *          = 'C' :   Operation is performed by a process column.
30  *          = 'A' :   Operation is performed by all processes in grid.
31  *
32  *  TOP     (input) Ptr to char
33  *          Controls fashion in which messages flow within the operation.
34  *
35  *  M       (input) Ptr to int
36  *          The number of rows of the matrix A.  M >= 0.
37  *
38  *  N       (input) Ptr to int
39  *          The number of columns of the matrix A.  N >= 0.
40  *
41  *  A       (output) Ptr to double complex two dimensional array
42  *          The m by n matrix A.  Fortran77 (column-major) storage
43  *          assumed.
44  *
45  *  LDA     (input) Ptr to int
46  *          The leading dimension of the array A.  LDA >= M.
47  *
48  *  RDEST   (input) Ptr to int
49  *          The process row of the destination of the sum.
50  *          If rdest == -1, then result is left on all processes in scope.
51  *
52  *  CDEST   (input) Ptr to int
53  *          The process column of the destination of the sum.
54  *          If rdest == -1, then CDEST ignored.
55  *
56  * ------------------------------------------------------------------------
57  */
58 {
59    void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
60                     int *, int *);
61    void BI_UpdateBuffs(BLACBUFF *);
62    BLACBUFF *BI_GetBuff(int);
63    int BI_BuffIsFree(BLACBUFF *, int);
64    MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
65                                    MPI_Datatype, int *);
66    BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
67    MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
68                                    MPI_Datatype, int *);
69    void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
70    void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
71                      int, int);
72    void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
73                     int, int);
74    void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
75    void BI_zvvsum(int, char *, char *);
76    void BI_zMPI_sum(void *, void *, int *, MPI_Datatype *);
77 /*
78  *  Variable Declarations
79  */
80    BLACBUFF *bp, *bp2;
81    BLACSCONTEXT *ctxt;
82    char ttop, tscope;
83    int N, length, dest, tlda, trdest, ierr;
84    MPI_Op BlacComb;
85    extern BLACBUFF *BI_ActiveQ;
86    extern BLACBUFF BI_AuxBuff;
87 
88    MGetConTxt(Mpval(ConTxt), ctxt);
89    ttop = F2C_CharTrans(top);
90    ttop = Mlowcase(ttop);
91    tscope = F2C_CharTrans(scope);
92    tscope = Mlowcase(tscope);
93 /*
94  *  If the user has set the default combine topology, use it instead of
95  *  BLACS default
96  */
97 #ifdef DefCombTop
98    if (ttop == ' ') ttop = DefCombTop;
99 #endif
100    if (Mpval(cdest) == -1) trdest = -1;
101    else trdest = Mpval(rdest);
102 #if (BlacsDebugLvl > 0)
103    BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
104                Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
105 #endif
106    if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
107    else tlda = Mpval(m);
108    switch(tscope)
109    {
110    case 'r':
111       ctxt->scp = &ctxt->rscp;
112       if (trdest == -1) dest = -1;
113       else dest = Mpval(cdest);
114       break;
115    case 'c':
116       ctxt->scp = &ctxt->cscp;
117       dest = trdest;
118       break;
119    case 'a':
120       ctxt->scp = &ctxt->ascp;
121       if (trdest == -1) dest = -1;
122       else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
123       break;
124    default:
125       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
126                   tscope);
127    }
128 
129 
130 /*
131  * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
132  * topology if we've got one.  Also, we can't use MPI functions if we need to
133  * guarantee repeatability.
134  */
135    if (ttop == ' ')
136       if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat) ) ttop = '1';
137    N = Mpval(m) * Mpval(n);
138    length = N * sizeof(DCOMPLEX);
139 /*
140  * If A is contiguous, we can use it as one of the buffers
141  */
142    if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
143    {
144       bp = &BI_AuxBuff;
145       bp->Buff = (char *) A;
146       bp2 = BI_GetBuff(length);
147    }
148 /*
149  * Otherwise, we must allocate both buffers
150  */
151    else
152    {
153       bp = BI_GetBuff(length*2);
154       bp2 = &BI_AuxBuff;
155       bp2->Buff = &bp->Buff[length];
156       BI_zmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
157    }
158    bp->dtype = bp2->dtype = MPI_DOUBLE_COMPLEX;
159    bp->N = bp2->N = N;
160 
161    switch(ttop)
162    {
163    case ' ':         /* use MPI's reduction by default */
164       length = 1;
165       ierr=MPI_Op_create(BI_zMPI_sum, length, &BlacComb);
166       if (dest != -1)
167       {
168          ierr=MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
169                        dest, ctxt->scp->comm);
170          if (ctxt->scp->Iam == dest)
171 	    BI_zvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
172       }
173       else
174       {
175          ierr=MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
176 		          ctxt->scp->comm);
177 	 BI_zvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
178       }
179       ierr=MPI_Op_free(&BlacComb);
180       if (BI_ActiveQ) BI_UpdateBuffs(NULL);
181       return;
182       break;
183    case 'i':
184       BI_MringComb(ctxt, bp, bp2, N, BI_zvvsum, dest, 1);
185       break;
186    case 'd':
187       BI_MringComb(ctxt, bp, bp2, N, BI_zvvsum, dest, -1);
188       break;
189    case 's':
190       BI_MringComb(ctxt, bp, bp2, N, BI_zvvsum, dest, 2);
191       break;
192    case 'm':
193       BI_MringComb(ctxt, bp, bp2, N, BI_zvvsum, dest, ctxt->Nr_co);
194       break;
195    case '1':
196    case '2':
197    case '3':
198    case '4':
199    case '5':
200    case '6':
201    case '7':
202    case '8':
203    case '9':
204       BI_TreeComb(ctxt, bp, bp2, N, BI_zvvsum, dest, ttop-47);
205       break;
206    case 'f':
207       BI_TreeComb(ctxt, bp, bp2, N, BI_zvvsum, dest, FULLCON);
208       break;
209    case 't':
210       BI_TreeComb(ctxt, bp, bp2, N, BI_zvvsum, dest, ctxt->Nb_co);
211       break;
212    case 'h':
213 /*
214  *    Use bidirectional exchange if everyone wants answer
215  */
216       if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
217          BI_BeComb(ctxt, bp, bp2, N, BI_zvvsum);
218       else
219          BI_TreeComb(ctxt, bp, bp2, N, BI_zvvsum, dest, 2);
220       break;
221    default :
222       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
223                   ttop);
224    }
225 
226 /*
227  * If I am selected to receive answer
228  */
229    if (bp != &BI_AuxBuff)
230    {
231       if ( (ctxt->scp->Iam == dest) || (dest == -1) )
232          BI_zvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
233       BI_UpdateBuffs(bp);
234    }
235    else
236    {
237       if (BI_ActiveQ) BI_UpdateBuffs(NULL);
238       BI_BuffIsFree(bp, 1);
239    }
240 }
241