1 
2 #include <petscsys.h>        /*I  "petscsys.h"  I*/
3 
4 
5 /*@C
6   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
7 
8   Collective
9 
10   Input Parameters:
11 + comm     - Communicator
12 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
13              message from current node to ith node. Optionally NULL
14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
15              Optionally NULL.
16 
17   Output Parameters:
18 . nrecvs    - number of messages received
19 
20   Level: developer
21 
22   Notes:
23   With this info, the correct message lengths can be determined using
24   PetscGatherMessageLengths()
25 
26   Either iflags or ilengths should be provided.  If iflags is not
27   provided (NULL) it can be computed from ilengths. If iflags is
28   provided, ilengths is not required.
29 
30 .seealso: PetscGatherMessageLengths()
31 @*/
PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt * nrecvs)32 PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
33 {
34   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
35   PetscErrorCode ierr;
36 
37   PetscFunctionBegin;
38   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
39   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
40 
41   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
42 
43   /* If iflags not provided, compute iflags from ilengths */
44   if (!iflags) {
45     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
46     iflags_local = iflags_localm;
47     for (i=0; i<size; i++) {
48       if (ilengths[i]) iflags_local[i] = 1;
49       else iflags_local[i] = 0;
50     }
51   } else iflags_local = (PetscMPIInt*) iflags;
52 
53   /* Post an allreduce to determine the numer of messages the current node will receive */
54   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
55   *nrecvs = recv_buf[rank];
56 
57   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 
62 /*@C
63   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
64   including (from-id,length) pairs for each message.
65 
66   Collective
67 
68   Input Parameters:
69 + comm      - Communicator
70 . nsends    - number of messages that are to be sent.
71 . nrecvs    - number of messages being received
72 - ilengths  - an array of integers of length sizeof(comm)
73               a non zero ilengths[i] represent a message to i of length ilengths[i]
74 
75 
76   Output Parameters:
77 + onodes    - list of node-ids from which messages are expected
78 - olengths  - corresponding message lengths
79 
80   Level: developer
81 
82   Notes:
83   With this info, the correct MPI_Irecv() can be posted with the correct
84   from-id, with a buffer with the right amount of memory required.
85 
86   The calling function deallocates the memory in onodes and olengths
87 
88   To determine nrecvs, one can use PetscGatherNumberOfMessages()
89 
90 .seealso: PetscGatherNumberOfMessages()
91 @*/
PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt ** onodes,PetscMPIInt ** olengths)92 PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
93 {
94   PetscErrorCode ierr;
95   PetscMPIInt    size,rank,tag,i,j;
96   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
97   MPI_Status     *w_status = NULL;
98 
99   PetscFunctionBegin;
100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
101   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
102   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
103 
104   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
105   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
106   s_waits = r_waits+nrecvs;
107 
108   /* Post the Irecv to get the message length-info */
109   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
110   for (i=0; i<nrecvs; i++) {
111     ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr);
112   }
113 
114   /* Post the Isends with the message length-info */
115   for (i=0,j=0; i<size; ++i) {
116     if (ilengths[i]) {
117       ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr);
118       j++;
119     }
120   }
121 
122   /* Post waits on sends and receivs */
123   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);}
124 
125   /* Pack up the received data */
126   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
127   for (i=0; i<nrecvs; ++i) {
128     (*onodes)[i] = w_status[i].MPI_SOURCE;
129 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
130     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
131        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
132        does not put correct value in recv buffer. See also
133        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
134        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
135      */
136     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
137 #endif
138   }
139   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 /*@C
144   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
145   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
146   except it takes TWO ilenths and output TWO olengths.
147 
148   Collective
149 
150   Input Parameters:
151 + comm      - Communicator
152 . nsends    - number of messages that are to be sent.
153 . nrecvs    - number of messages being received
154 - ilengths1, ilengths2 - array of integers of length sizeof(comm)
155               a non zero ilengths[i] represent a message to i of length ilengths[i]
156 
157   Output Parameters:
158 + onodes    - list of node-ids from which messages are expected
159 - olengths1, olengths2 - corresponding message lengths
160 
161   Level: developer
162 
163   Notes:
164   With this info, the correct MPI_Irecv() can be posted with the correct
165   from-id, with a buffer with the right amount of memory required.
166 
167   The calling function deallocates the memory in onodes and olengths
168 
169   To determine nrecvs, one can use PetscGatherNumberOfMessages()
170 
171 .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
172 @*/
PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt ** onodes,PetscMPIInt ** olengths1,PetscMPIInt ** olengths2)173 PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
174 {
175   PetscErrorCode ierr;
176   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
177   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
178   MPI_Status     *w_status = NULL;
179 
180   PetscFunctionBegin;
181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
182   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
183 
184   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
185   ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
186   s_waits = r_waits + nrecvs;
187 
188   /* Post the Irecv to get the message length-info */
189   ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
190   ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
191   for (i=0; i<nrecvs; i++) {
192     buf_j = buf_r + (2*i);
193     ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr);
194   }
195 
196   /* Post the Isends with the message length-info */
197   for (i=0,j=0; i<size; ++i) {
198     if (ilengths1[i]) {
199       buf_j    = buf_s + (2*j);
200       buf_j[0] = *(ilengths1+i);
201       buf_j[1] = *(ilengths2+i);
202       ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr);
203       j++;
204     }
205   }
206   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);
207 
208   /* Post waits on sends and receivs */
209   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);}
210 
211 
212   /* Pack up the received data */
213   ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
214   for (i=0; i<nrecvs; ++i) {
215     (*onodes)[i]    = w_status[i].MPI_SOURCE;
216     buf_j           = buf_r + (2*i);
217     (*olengths1)[i] = buf_j[0];
218     (*olengths2)[i] = buf_j[1];
219   }
220 
221   ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 /*
226 
227   Allocate a bufffer sufficient to hold messages of size specified in olengths.
228   And post Irecvs on these buffers using node info from onodes
229 
230  */
PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt *** rbuf,MPI_Request ** r_waits)231 PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
232 {
233   PetscErrorCode ierr;
234   PetscInt       **rbuf_t,i,len = 0;
235   MPI_Request    *r_waits_t;
236 
237   PetscFunctionBegin;
238   /* compute memory required for recv buffers */
239   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
240 
241   /* allocate memory for recv buffers */
242   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
243   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
244   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
245 
246   /* Post the receives */
247   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
248   for (i=0; i<nrecvs; ++i) {
249     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRQ(ierr);
250   }
251 
252   *rbuf    = rbuf_t;
253   *r_waits = r_waits_t;
254   PetscFunctionReturn(0);
255 }
256 
PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar *** rbuf,MPI_Request ** r_waits)257 PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
258 {
259   PetscErrorCode ierr;
260   PetscMPIInt    i;
261   PetscScalar    **rbuf_t;
262   MPI_Request    *r_waits_t;
263   PetscInt       len = 0;
264 
265   PetscFunctionBegin;
266   /* compute memory required for recv buffers */
267   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
268 
269   /* allocate memory for recv buffers */
270   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
271   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
272   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
273 
274   /* Post the receives */
275   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
276   for (i=0; i<nrecvs; ++i) {
277     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRQ(ierr);
278   }
279 
280   *rbuf    = rbuf_t;
281   *r_waits = r_waits_t;
282   PetscFunctionReturn(0);
283 }
284