1 // Copyright (c) 2017-2021, Lawrence Livermore National Security, LLC and
2 // other Axom Project Developers. See the top-level LICENSE file for details.
3 //
4 // SPDX-License-Identifier: (BSD-3-Clause)
5
6 #include "lulesh.h"
7
8 // If no MPI, then this whole file is stubbed out
9 #ifdef AXOM_USE_MPI
10
11 #include <mpi.h>
12 #include <string.h>
13
14 /* Comm Routines */
15
16 #define ALLOW_UNPACKED_PLANE false
17 #define ALLOW_UNPACKED_ROW false
18 #define ALLOW_UNPACKED_COL false
19
20 /*
21 There are coherence issues for packing and unpacking message
22 buffers. Ideally, you would like a lot of threads to
23 cooperate in the assembly/dissassembly of each message.
24 To do that, each thread should really be operating in a
25 different coherence zone.
26
27 Let's assume we have three fields, f1 through f3, defined on
28 a 61x61x61 cube. If we want to send the block boundary
29 information for each field to each neighbor processor across
30 each cube face, then we have three cases for the
31 memory layout/coherence of data on each of the six cube
32 boundaries:
33
34 (a) Two of the faces will be in contiguous memory blocks
35 (b) Two of the faces will be comprised of pencils of
36 contiguous memory.
37 (c) Two of the faces will have large strides between
38 every value living on the face.
39
40 How do you pack and unpack this data in buffers to
41 simultaneous achieve the best memory efficiency and
42 the most thread independence?
43
44 Do do you pack field f1 through f3 tighly to reduce message
45 size? Do you align each field on a cache coherence boundary
46 within the message so that threads can pack and unpack each
47 field independently? For case (b), do you align each
48 boundary pencil of each field separately? This increases
49 the message size, but could improve cache coherence so
50 each pencil could be processed independently by a separate
51 thread with no conflicts.
52
53 Also, memory access for case (c) would best be done without
54 going through the cache (the stride is so large it just causes
55 a lot of useless cache evictions). Is it worth creating
56 a special case version of the packing algorithm that uses
57 non-coherent load/store opcodes?
58 */
59
60 /******************************************/
61
62
63 /* doRecv flag only works with regular block structure */
CommRecv(Domain & domain,int msgType,Index_t xferFields,Index_t dx,Index_t dy,Index_t dz,bool doRecv,bool planeOnly)64 void CommRecv(Domain& domain, int msgType, Index_t xferFields,
65 Index_t dx, Index_t dy, Index_t dz, bool doRecv, bool planeOnly) {
66
67 if (domain.numRanks() == 1)
68 return ;
69
70 /* post recieve buffers for all incoming messages */
71 int myRank ;
72 Index_t maxPlaneComm = xferFields * domain.maxPlaneSize() ;
73 Index_t maxEdgeComm = xferFields * domain.maxEdgeSize() ;
74 Index_t pmsg = 0 ; /* plane comm msg */
75 Index_t emsg = 0 ; /* edge comm msg */
76 Index_t cmsg = 0 ; /* corner comm msg */
77 MPI_Datatype baseType = ((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE) ;
78 bool rowMin, rowMax, colMin, colMax, planeMin, planeMax ;
79
80 /* assume communication to 6 neighbors by default */
81 rowMin = rowMax = colMin = colMax = planeMin = planeMax = true ;
82
83 if (domain.rowLoc() == 0) {
84 rowMin = false ;
85 }
86 if (domain.rowLoc() == (domain.tp()-1)) {
87 rowMax = false ;
88 }
89 if (domain.colLoc() == 0) {
90 colMin = false ;
91 }
92 if (domain.colLoc() == (domain.tp()-1)) {
93 colMax = false ;
94 }
95 if (domain.planeLoc() == 0) {
96 planeMin = false ;
97 }
98 if (domain.planeLoc() == (domain.tp()-1)) {
99 planeMax = false ;
100 }
101
102 for (Index_t i=0; i<26; ++i) {
103 domain.recvRequest[i] = MPI_REQUEST_NULL ;
104 }
105
106 MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
107
108 /* post receives */
109
110 /* receive data from neighboring domain faces */
111 if (planeMin && doRecv) {
112 /* contiguous memory */
113 int fromRank = myRank - domain.tp()*domain.tp() ;
114 int recvCount = dx * dy * xferFields ;
115 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm],
116 recvCount, baseType, fromRank, msgType,
117 MPI_COMM_WORLD, &domain.recvRequest[pmsg]) ;
118 ++pmsg ;
119 }
120 if (planeMax) {
121 /* contiguous memory */
122 int fromRank = myRank + domain.tp()*domain.tp() ;
123 int recvCount = dx * dy * xferFields ;
124 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm],
125 recvCount, baseType, fromRank, msgType,
126 MPI_COMM_WORLD, &domain.recvRequest[pmsg]) ;
127 ++pmsg ;
128 }
129 if (rowMin && doRecv) {
130 /* semi-contiguous memory */
131 int fromRank = myRank - domain.tp() ;
132 int recvCount = dx * dz * xferFields ;
133 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm],
134 recvCount, baseType, fromRank, msgType,
135 MPI_COMM_WORLD, &domain.recvRequest[pmsg]) ;
136 ++pmsg ;
137 }
138 if (rowMax) {
139 /* semi-contiguous memory */
140 int fromRank = myRank + domain.tp() ;
141 int recvCount = dx * dz * xferFields ;
142 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm],
143 recvCount, baseType, fromRank, msgType,
144 MPI_COMM_WORLD, &domain.recvRequest[pmsg]) ;
145 ++pmsg ;
146 }
147 if (colMin && doRecv) {
148 /* scattered memory */
149 int fromRank = myRank - 1 ;
150 int recvCount = dy * dz * xferFields ;
151 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm],
152 recvCount, baseType, fromRank, msgType,
153 MPI_COMM_WORLD, &domain.recvRequest[pmsg]) ;
154 ++pmsg ;
155 }
156 if (colMax) {
157 /* scattered memory */
158 int fromRank = myRank + 1 ;
159 int recvCount = dy * dz * xferFields ;
160 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm],
161 recvCount, baseType, fromRank, msgType,
162 MPI_COMM_WORLD, &domain.recvRequest[pmsg]) ;
163 ++pmsg ;
164 }
165
166 if (!planeOnly) {
167 /* receive data from domains connected only by an edge */
168 if (rowMin && colMin && doRecv) {
169 int fromRank = myRank - domain.tp() - 1 ;
170 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
171 emsg * maxEdgeComm],
172 dz * xferFields, baseType, fromRank, msgType,
173 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
174 ++emsg ;
175 }
176
177 if (rowMin && planeMin && doRecv) {
178 int fromRank = myRank - domain.tp()*domain.tp() - domain.tp() ;
179 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
180 emsg * maxEdgeComm],
181 dx * xferFields, baseType, fromRank, msgType,
182 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
183 ++emsg ;
184 }
185
186 if (colMin && planeMin && doRecv) {
187 int fromRank = myRank - domain.tp()*domain.tp() - 1 ;
188 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
189 emsg * maxEdgeComm],
190 dy * xferFields, baseType, fromRank, msgType,
191 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
192 ++emsg ;
193 }
194
195 if (rowMax && colMax) {
196 int fromRank = myRank + domain.tp() + 1 ;
197 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
198 emsg * maxEdgeComm],
199 dz * xferFields, baseType, fromRank, msgType,
200 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
201 ++emsg ;
202 }
203
204 if (rowMax && planeMax) {
205 int fromRank = myRank + domain.tp()*domain.tp() + domain.tp() ;
206 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
207 emsg * maxEdgeComm],
208 dx * xferFields, baseType, fromRank, msgType,
209 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
210 ++emsg ;
211 }
212
213 if (colMax && planeMax) {
214 int fromRank = myRank + domain.tp()*domain.tp() + 1 ;
215 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
216 emsg * maxEdgeComm],
217 dy * xferFields, baseType, fromRank, msgType,
218 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
219 ++emsg ;
220 }
221
222 if (rowMax && colMin) {
223 int fromRank = myRank + domain.tp() - 1 ;
224 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
225 emsg * maxEdgeComm],
226 dz * xferFields, baseType, fromRank, msgType,
227 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
228 ++emsg ;
229 }
230
231 if (rowMin && planeMax) {
232 int fromRank = myRank + domain.tp()*domain.tp() - domain.tp() ;
233 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
234 emsg * maxEdgeComm],
235 dx * xferFields, baseType, fromRank, msgType,
236 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
237 ++emsg ;
238 }
239
240 if (colMin && planeMax) {
241 int fromRank = myRank + domain.tp()*domain.tp() - 1 ;
242 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
243 emsg * maxEdgeComm],
244 dy * xferFields, baseType, fromRank, msgType,
245 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
246 ++emsg ;
247 }
248
249 if (rowMin && colMax && doRecv) {
250 int fromRank = myRank - domain.tp() + 1 ;
251 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
252 emsg * maxEdgeComm],
253 dz * xferFields, baseType, fromRank, msgType,
254 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
255 ++emsg ;
256 }
257
258 if (rowMax && planeMin && doRecv) {
259 int fromRank = myRank - domain.tp()*domain.tp() + domain.tp() ;
260 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
261 emsg * maxEdgeComm],
262 dx * xferFields, baseType, fromRank, msgType,
263 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
264 ++emsg ;
265 }
266
267 if (colMax && planeMin && doRecv) {
268 int fromRank = myRank - domain.tp()*domain.tp() + 1 ;
269 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
270 emsg * maxEdgeComm],
271 dy * xferFields, baseType, fromRank, msgType,
272 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg]) ;
273 ++emsg ;
274 }
275
276 /* receive data from domains connected only by a corner */
277 if (rowMin && colMin && planeMin && doRecv) {
278 /* corner at domain logical coord (0, 0, 0) */
279 int fromRank = myRank - domain.tp()*domain.tp() - domain.tp() - 1 ;
280 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
281 emsg * maxEdgeComm +
282 cmsg * CACHE_COHERENCE_PAD_REAL],
283 xferFields, baseType, fromRank, msgType,
284 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
285 ++cmsg ;
286 }
287 if (rowMin && colMin && planeMax) {
288 /* corner at domain logical coord (0, 0, 1) */
289 int fromRank = myRank + domain.tp()*domain.tp() - domain.tp() - 1 ;
290 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
291 emsg * maxEdgeComm +
292 cmsg * CACHE_COHERENCE_PAD_REAL],
293 xferFields, baseType, fromRank, msgType,
294 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
295 ++cmsg ;
296 }
297 if (rowMin && colMax && planeMin && doRecv) {
298 /* corner at domain logical coord (1, 0, 0) */
299 int fromRank = myRank - domain.tp()*domain.tp() - domain.tp() + 1 ;
300 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
301 emsg * maxEdgeComm +
302 cmsg * CACHE_COHERENCE_PAD_REAL],
303 xferFields, baseType, fromRank, msgType,
304 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
305 ++cmsg ;
306 }
307 if (rowMin && colMax && planeMax) {
308 /* corner at domain logical coord (1, 0, 1) */
309 int fromRank = myRank + domain.tp()*domain.tp() - domain.tp() + 1 ;
310 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
311 emsg * maxEdgeComm +
312 cmsg * CACHE_COHERENCE_PAD_REAL],
313 xferFields, baseType, fromRank, msgType,
314 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
315 ++cmsg ;
316 }
317 if (rowMax && colMin && planeMin && doRecv) {
318 /* corner at domain logical coord (0, 1, 0) */
319 int fromRank = myRank - domain.tp()*domain.tp() + domain.tp() - 1 ;
320 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
321 emsg * maxEdgeComm +
322 cmsg * CACHE_COHERENCE_PAD_REAL],
323 xferFields, baseType, fromRank, msgType,
324 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
325 ++cmsg ;
326 }
327 if (rowMax && colMin && planeMax) {
328 /* corner at domain logical coord (0, 1, 1) */
329 int fromRank = myRank + domain.tp()*domain.tp() + domain.tp() - 1 ;
330 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
331 emsg * maxEdgeComm +
332 cmsg * CACHE_COHERENCE_PAD_REAL],
333 xferFields, baseType, fromRank, msgType,
334 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
335 ++cmsg ;
336 }
337 if (rowMax && colMax && planeMin && doRecv) {
338 /* corner at domain logical coord (1, 1, 0) */
339 int fromRank = myRank - domain.tp()*domain.tp() + domain.tp() + 1 ;
340 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
341 emsg * maxEdgeComm +
342 cmsg * CACHE_COHERENCE_PAD_REAL],
343 xferFields, baseType, fromRank, msgType,
344 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
345 ++cmsg ;
346 }
347 if (rowMax && colMax && planeMax) {
348 /* corner at domain logical coord (1, 1, 1) */
349 int fromRank = myRank + domain.tp()*domain.tp() + domain.tp() + 1 ;
350 MPI_Irecv(&domain.commDataRecv[pmsg * maxPlaneComm +
351 emsg * maxEdgeComm +
352 cmsg * CACHE_COHERENCE_PAD_REAL],
353 xferFields, baseType, fromRank, msgType,
354 MPI_COMM_WORLD, &domain.recvRequest[pmsg+emsg+cmsg]) ;
355 ++cmsg ;
356 }
357 }
358 }
359
360 /******************************************/
361
CommSend(Domain & domain,int msgType,Index_t xferFields,Domain_member * fieldData,Index_t dx,Index_t dy,Index_t dz,bool doSend,bool planeOnly)362 void CommSend(Domain& domain, int msgType,
363 Index_t xferFields, Domain_member *fieldData,
364 Index_t dx, Index_t dy, Index_t dz, bool doSend, bool planeOnly)
365 {
366
367 if (domain.numRanks() == 1)
368 return ;
369
370 /* post recieve buffers for all incoming messages */
371 int myRank ;
372 Index_t maxPlaneComm = xferFields * domain.maxPlaneSize() ;
373 Index_t maxEdgeComm = xferFields * domain.maxEdgeSize() ;
374 Index_t pmsg = 0 ; /* plane comm msg */
375 Index_t emsg = 0 ; /* edge comm msg */
376 Index_t cmsg = 0 ; /* corner comm msg */
377 MPI_Datatype baseType = ((sizeof(Real_t) == 4) ? MPI_FLOAT : MPI_DOUBLE) ;
378 MPI_Status status[26] ;
379 Real_t *destAddr ;
380 bool rowMin, rowMax, colMin, colMax, planeMin, planeMax ;
381 /* assume communication to 6 neighbors by default */
382 rowMin = rowMax = colMin = colMax = planeMin = planeMax = true ;
383 if (domain.rowLoc() == 0) {
384 rowMin = false ;
385 }
386 if (domain.rowLoc() == (domain.tp()-1)) {
387 rowMax = false ;
388 }
389 if (domain.colLoc() == 0) {
390 colMin = false ;
391 }
392 if (domain.colLoc() == (domain.tp()-1)) {
393 colMax = false ;
394 }
395 if (domain.planeLoc() == 0) {
396 planeMin = false ;
397 }
398 if (domain.planeLoc() == (domain.tp()-1)) {
399 planeMax = false ;
400 }
401
402 for (Index_t i=0; i<26; ++i) {
403 domain.sendRequest[i] = MPI_REQUEST_NULL ;
404 }
405
406 MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
407
408 /* post sends */
409
410 if (planeMin | planeMax) {
411 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
412 int sendCount = dx * dy ;
413
414 if (planeMin) {
415 destAddr = &domain.commDataSend[pmsg * maxPlaneComm] ;
416 for (Index_t fi=0 ; fi<xferFields; ++fi) {
417 Domain_member src = fieldData[fi] ;
418 for (Index_t i=0; i<sendCount; ++i) {
419 destAddr[i] = (domain.*src)(i) ;
420 }
421 destAddr += sendCount ;
422 }
423 destAddr -= xferFields*sendCount ;
424
425 MPI_Isend(destAddr, xferFields*sendCount, baseType,
426 myRank - domain.tp()*domain.tp(), msgType,
427 MPI_COMM_WORLD, &domain.sendRequest[pmsg]) ;
428 ++pmsg ;
429 }
430 if (planeMax && doSend) {
431 destAddr = &domain.commDataSend[pmsg * maxPlaneComm] ;
432 for (Index_t fi=0 ; fi<xferFields; ++fi) {
433 Domain_member src = fieldData[fi] ;
434 for (Index_t i=0; i<sendCount; ++i) {
435 destAddr[i] = (domain.*src)(dx*dy*(dz - 1) + i) ;
436 }
437 destAddr += sendCount ;
438 }
439 destAddr -= xferFields*sendCount ;
440
441 MPI_Isend(destAddr, xferFields*sendCount, baseType,
442 myRank + domain.tp()*domain.tp(), msgType,
443 MPI_COMM_WORLD, &domain.sendRequest[pmsg]) ;
444 ++pmsg ;
445 }
446 }
447 if (rowMin | rowMax) {
448 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
449 int sendCount = dx * dz ;
450
451 if (rowMin) {
452 destAddr = &domain.commDataSend[pmsg * maxPlaneComm] ;
453 for (Index_t fi=0; fi<xferFields; ++fi) {
454 Domain_member src = fieldData[fi] ;
455 for (Index_t i=0; i<dz; ++i) {
456 for (Index_t j=0; j<dx; ++j) {
457 destAddr[i*dx+j] = (domain.*src)(i*dx*dy + j) ;
458 }
459 }
460 destAddr += sendCount ;
461 }
462 destAddr -= xferFields*sendCount ;
463
464 MPI_Isend(destAddr, xferFields*sendCount, baseType,
465 myRank - domain.tp(), msgType,
466 MPI_COMM_WORLD, &domain.sendRequest[pmsg]) ;
467 ++pmsg ;
468 }
469 if (rowMax && doSend) {
470 destAddr = &domain.commDataSend[pmsg * maxPlaneComm] ;
471 for (Index_t fi=0; fi<xferFields; ++fi) {
472 Domain_member src = fieldData[fi] ;
473 for (Index_t i=0; i<dz; ++i) {
474 for (Index_t j=0; j<dx; ++j) {
475 destAddr[i*dx+j] = (domain.*src)(dx*(dy - 1) + i*dx*dy + j) ;
476 }
477 }
478 destAddr += sendCount ;
479 }
480 destAddr -= xferFields*sendCount ;
481
482 MPI_Isend(destAddr, xferFields*sendCount, baseType,
483 myRank + domain.tp(), msgType,
484 MPI_COMM_WORLD, &domain.sendRequest[pmsg]) ;
485 ++pmsg ;
486 }
487 }
488 if (colMin | colMax) {
489 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
490 int sendCount = dy * dz ;
491
492 if (colMin) {
493 destAddr = &domain.commDataSend[pmsg * maxPlaneComm] ;
494 for (Index_t fi=0; fi<xferFields; ++fi) {
495 Domain_member src = fieldData[fi] ;
496 for (Index_t i=0; i<dz; ++i) {
497 for (Index_t j=0; j<dy; ++j) {
498 destAddr[i*dy + j] = (domain.*src)(i*dx*dy + j*dx) ;
499 }
500 }
501 destAddr += sendCount ;
502 }
503 destAddr -= xferFields*sendCount ;
504
505 MPI_Isend(destAddr, xferFields*sendCount, baseType,
506 myRank - 1, msgType,
507 MPI_COMM_WORLD, &domain.sendRequest[pmsg]) ;
508 ++pmsg ;
509 }
510 if (colMax && doSend) {
511 destAddr = &domain.commDataSend[pmsg * maxPlaneComm] ;
512 for (Index_t fi=0; fi<xferFields; ++fi) {
513 Domain_member src = fieldData[fi] ;
514 for (Index_t i=0; i<dz; ++i) {
515 for (Index_t j=0; j<dy; ++j) {
516 destAddr[i*dy + j] = (domain.*src)(dx - 1 + i*dx*dy + j*dx) ;
517 }
518 }
519 destAddr += sendCount ;
520 }
521 destAddr -= xferFields*sendCount ;
522
523 MPI_Isend(destAddr, xferFields*sendCount, baseType,
524 myRank + 1, msgType,
525 MPI_COMM_WORLD, &domain.sendRequest[pmsg]) ;
526 ++pmsg ;
527 }
528 }
529
530 if (!planeOnly) {
531 if (rowMin && colMin) {
532 int toRank = myRank - domain.tp() - 1 ;
533 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
534 emsg * maxEdgeComm] ;
535 for (Index_t fi=0; fi<xferFields; ++fi) {
536 Domain_member src = fieldData[fi] ;
537 for (Index_t i=0; i<dz; ++i) {
538 destAddr[i] = (domain.*src)(i*dx*dy) ;
539 }
540 destAddr += dz ;
541 }
542 destAddr -= xferFields*dz ;
543 MPI_Isend(destAddr, xferFields*dz, baseType, toRank, msgType,
544 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
545 ++emsg ;
546 }
547
548 if (rowMin && planeMin) {
549 int toRank = myRank - domain.tp()*domain.tp() - domain.tp() ;
550 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
551 emsg * maxEdgeComm] ;
552 for (Index_t fi=0; fi<xferFields; ++fi) {
553 Domain_member src = fieldData[fi] ;
554 for (Index_t i=0; i<dx; ++i) {
555 destAddr[i] = (domain.*src)(i) ;
556 }
557 destAddr += dx ;
558 }
559 destAddr -= xferFields*dx ;
560 MPI_Isend(destAddr, xferFields*dx, baseType, toRank, msgType,
561 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
562 ++emsg ;
563 }
564
565 if (colMin && planeMin) {
566 int toRank = myRank - domain.tp()*domain.tp() - 1 ;
567 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
568 emsg * maxEdgeComm] ;
569 for (Index_t fi=0; fi<xferFields; ++fi) {
570 Domain_member src = fieldData[fi] ;
571 for (Index_t i=0; i<dy; ++i) {
572 destAddr[i] = (domain.*src)(i*dx) ;
573 }
574 destAddr += dy ;
575 }
576 destAddr -= xferFields*dy ;
577 MPI_Isend(destAddr, xferFields*dy, baseType, toRank, msgType,
578 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
579 ++emsg ;
580 }
581
582 if (rowMax && colMax && doSend) {
583 int toRank = myRank + domain.tp() + 1 ;
584 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
585 emsg * maxEdgeComm] ;
586 for (Index_t fi=0; fi<xferFields; ++fi) {
587 Domain_member src = fieldData[fi] ;
588 for (Index_t i=0; i<dz; ++i) {
589 destAddr[i] = (domain.*src)(dx*dy - 1 + i*dx*dy) ;
590 }
591 destAddr += dz ;
592 }
593 destAddr -= xferFields*dz ;
594 MPI_Isend(destAddr, xferFields*dz, baseType, toRank, msgType,
595 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
596 ++emsg ;
597 }
598
599 if (rowMax && planeMax && doSend) {
600 int toRank = myRank + domain.tp()*domain.tp() + domain.tp() ;
601 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
602 emsg * maxEdgeComm] ;
603 for (Index_t fi=0; fi<xferFields; ++fi) {
604 Domain_member src = fieldData[fi] ;
605 for (Index_t i=0; i<dx; ++i) {
606 destAddr[i] = (domain.*src)(dx*(dy-1) + dx*dy*(dz-1) + i) ;
607 }
608 destAddr += dx ;
609 }
610 destAddr -= xferFields*dx ;
611 MPI_Isend(destAddr, xferFields*dx, baseType, toRank, msgType,
612 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
613 ++emsg ;
614 }
615
616 if (colMax && planeMax && doSend) {
617 int toRank = myRank + domain.tp()*domain.tp() + 1 ;
618 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
619 emsg * maxEdgeComm] ;
620 for (Index_t fi=0; fi<xferFields; ++fi) {
621 Domain_member src = fieldData[fi] ;
622 for (Index_t i=0; i<dy; ++i) {
623 destAddr[i] = (domain.*src)(dx*dy*(dz-1) + dx - 1 + i*dx) ;
624 }
625 destAddr += dy ;
626 }
627 destAddr -= xferFields*dy ;
628 MPI_Isend(destAddr, xferFields*dy, baseType, toRank, msgType,
629 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
630 ++emsg ;
631 }
632
633 if (rowMax && colMin && doSend) {
634 int toRank = myRank + domain.tp() - 1 ;
635 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
636 emsg * maxEdgeComm] ;
637 for (Index_t fi=0; fi<xferFields; ++fi) {
638 Domain_member src = fieldData[fi] ;
639 for (Index_t i=0; i<dz; ++i) {
640 destAddr[i] = (domain.*src)(dx*(dy-1) + i*dx*dy) ;
641 }
642 destAddr += dz ;
643 }
644 destAddr -= xferFields*dz ;
645 MPI_Isend(destAddr, xferFields*dz, baseType, toRank, msgType,
646 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
647 ++emsg ;
648 }
649
650 if (rowMin && planeMax && doSend) {
651 int toRank = myRank + domain.tp()*domain.tp() - domain.tp() ;
652 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
653 emsg * maxEdgeComm] ;
654 for (Index_t fi=0; fi<xferFields; ++fi) {
655 Domain_member src = fieldData[fi] ;
656 for (Index_t i=0; i<dx; ++i) {
657 destAddr[i] = (domain.*src)(dx*dy*(dz-1) + i) ;
658 }
659 destAddr += dx ;
660 }
661 destAddr -= xferFields*dx ;
662 MPI_Isend(destAddr, xferFields*dx, baseType, toRank, msgType,
663 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
664 ++emsg ;
665 }
666
667 if (colMin && planeMax && doSend) {
668 int toRank = myRank + domain.tp()*domain.tp() - 1 ;
669 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
670 emsg * maxEdgeComm] ;
671 for (Index_t fi=0; fi<xferFields; ++fi) {
672 Domain_member src = fieldData[fi] ;
673 for (Index_t i=0; i<dy; ++i) {
674 destAddr[i] = (domain.*src)(dx*dy*(dz-1) + i*dx) ;
675 }
676 destAddr += dy ;
677 }
678 destAddr -= xferFields*dy ;
679 MPI_Isend(destAddr, xferFields*dy, baseType, toRank, msgType,
680 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
681 ++emsg ;
682 }
683
684 if (rowMin && colMax) {
685 int toRank = myRank - domain.tp() + 1 ;
686 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
687 emsg * maxEdgeComm] ;
688 for (Index_t fi=0; fi<xferFields; ++fi) {
689 Domain_member src = fieldData[fi] ;
690 for (Index_t i=0; i<dz; ++i) {
691 destAddr[i] = (domain.*src)(dx - 1 + i*dx*dy) ;
692 }
693 destAddr += dz ;
694 }
695 destAddr -= xferFields*dz ;
696 MPI_Isend(destAddr, xferFields*dz, baseType, toRank, msgType,
697 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
698 ++emsg ;
699 }
700
701 if (rowMax && planeMin) {
702 int toRank = myRank - domain.tp()*domain.tp() + domain.tp() ;
703 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
704 emsg * maxEdgeComm] ;
705 for (Index_t fi=0; fi<xferFields; ++fi) {
706 Domain_member src = fieldData[fi] ;
707 for (Index_t i=0; i<dx; ++i) {
708 destAddr[i] = (domain.*src)(dx*(dy - 1) + i) ;
709 }
710 destAddr += dx ;
711 }
712 destAddr -= xferFields*dx ;
713 MPI_Isend(destAddr, xferFields*dx, baseType, toRank, msgType,
714 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
715 ++emsg ;
716 }
717
718 if (colMax && planeMin) {
719 int toRank = myRank - domain.tp()*domain.tp() + 1 ;
720 destAddr = &domain.commDataSend[pmsg * maxPlaneComm +
721 emsg * maxEdgeComm] ;
722 for (Index_t fi=0; fi<xferFields; ++fi) {
723 Domain_member src = fieldData[fi] ;
724 for (Index_t i=0; i<dy; ++i) {
725 destAddr[i] = (domain.*src)(dx - 1 + i*dx) ;
726 }
727 destAddr += dy ;
728 }
729 destAddr -= xferFields*dy ;
730 MPI_Isend(destAddr, xferFields*dy, baseType, toRank, msgType,
731 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg]) ;
732 ++emsg ;
733 }
734
735 if (rowMin && colMin && planeMin) {
736 /* corner at domain logical coord (0, 0, 0) */
737 int toRank = myRank - domain.tp()*domain.tp() - domain.tp() - 1 ;
738 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
739 emsg * maxEdgeComm +
740 cmsg * CACHE_COHERENCE_PAD_REAL] ;
741 for (Index_t fi=0; fi<xferFields; ++fi) {
742 comBuf[fi] = (domain.*fieldData[fi])(0) ;
743 }
744 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
745 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
746 ++cmsg ;
747 }
748 if (rowMin && colMin && planeMax && doSend) {
749 /* corner at domain logical coord (0, 0, 1) */
750 int toRank = myRank + domain.tp()*domain.tp() - domain.tp() - 1 ;
751 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
752 emsg * maxEdgeComm +
753 cmsg * CACHE_COHERENCE_PAD_REAL] ;
754 Index_t idx = dx*dy*(dz - 1) ;
755 for (Index_t fi=0; fi<xferFields; ++fi) {
756 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
757 }
758 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
759 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
760 ++cmsg ;
761 }
762 if (rowMin && colMax && planeMin) {
763 /* corner at domain logical coord (1, 0, 0) */
764 int toRank = myRank - domain.tp()*domain.tp() - domain.tp() + 1 ;
765 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
766 emsg * maxEdgeComm +
767 cmsg * CACHE_COHERENCE_PAD_REAL] ;
768 Index_t idx = dx - 1 ;
769 for (Index_t fi=0; fi<xferFields; ++fi) {
770 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
771 }
772 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
773 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
774 ++cmsg ;
775 }
776 if (rowMin && colMax && planeMax && doSend) {
777 /* corner at domain logical coord (1, 0, 1) */
778 int toRank = myRank + domain.tp()*domain.tp() - domain.tp() + 1 ;
779 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
780 emsg * maxEdgeComm +
781 cmsg * CACHE_COHERENCE_PAD_REAL] ;
782 Index_t idx = dx*dy*(dz - 1) + (dx - 1) ;
783 for (Index_t fi=0; fi<xferFields; ++fi) {
784 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
785 }
786 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
787 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
788 ++cmsg ;
789 }
790 if (rowMax && colMin && planeMin) {
791 /* corner at domain logical coord (0, 1, 0) */
792 int toRank = myRank - domain.tp()*domain.tp() + domain.tp() - 1 ;
793 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
794 emsg * maxEdgeComm +
795 cmsg * CACHE_COHERENCE_PAD_REAL] ;
796 Index_t idx = dx*(dy - 1) ;
797 for (Index_t fi=0; fi<xferFields; ++fi) {
798 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
799 }
800 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
801 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
802 ++cmsg ;
803 }
804 if (rowMax && colMin && planeMax && doSend) {
805 /* corner at domain logical coord (0, 1, 1) */
806 int toRank = myRank + domain.tp()*domain.tp() + domain.tp() - 1 ;
807 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
808 emsg * maxEdgeComm +
809 cmsg * CACHE_COHERENCE_PAD_REAL] ;
810 Index_t idx = dx*dy*(dz - 1) + dx*(dy - 1) ;
811 for (Index_t fi=0; fi<xferFields; ++fi) {
812 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
813 }
814 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
815 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
816 ++cmsg ;
817 }
818 if (rowMax && colMax && planeMin) {
819 /* corner at domain logical coord (1, 1, 0) */
820 int toRank = myRank - domain.tp()*domain.tp() + domain.tp() + 1 ;
821 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
822 emsg * maxEdgeComm +
823 cmsg * CACHE_COHERENCE_PAD_REAL] ;
824 Index_t idx = dx*dy - 1 ;
825 for (Index_t fi=0; fi<xferFields; ++fi) {
826 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
827 }
828 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
829 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
830 ++cmsg ;
831 }
832 if (rowMax && colMax && planeMax && doSend) {
833 /* corner at domain logical coord (1, 1, 1) */
834 int toRank = myRank + domain.tp()*domain.tp() + domain.tp() + 1 ;
835 Real_t *comBuf = &domain.commDataSend[pmsg * maxPlaneComm +
836 emsg * maxEdgeComm +
837 cmsg * CACHE_COHERENCE_PAD_REAL] ;
838 Index_t idx = dx*dy*dz - 1 ;
839 for (Index_t fi=0; fi<xferFields; ++fi) {
840 comBuf[fi] = (domain.*fieldData[fi])(idx) ;
841 }
842 MPI_Isend(comBuf, xferFields, baseType, toRank, msgType,
843 MPI_COMM_WORLD, &domain.sendRequest[pmsg+emsg+cmsg]) ;
844 ++cmsg ;
845 }
846 }
847
848 MPI_Waitall(26, domain.sendRequest, status) ;
849 }
850
851 /******************************************/
852
CommSBN(Domain & domain,int xferFields,Domain_member * fieldData)853 void CommSBN(Domain& domain, int xferFields, Domain_member *fieldData) {
854
855 if (domain.numRanks() == 1)
856 return ;
857
858 /* summation order should be from smallest value to largest */
859 /* or we could try out kahan summation! */
860
861 int myRank ;
862 Index_t maxPlaneComm = xferFields * domain.maxPlaneSize() ;
863 Index_t maxEdgeComm = xferFields * domain.maxEdgeSize() ;
864 Index_t pmsg = 0 ; /* plane comm msg */
865 Index_t emsg = 0 ; /* edge comm msg */
866 Index_t cmsg = 0 ; /* corner comm msg */
867 Index_t dx = domain.sizeX() + 1 ;
868 Index_t dy = domain.sizeY() + 1 ;
869 Index_t dz = domain.sizeZ() + 1 ;
870 MPI_Status status ;
871 Real_t *srcAddr ;
872 Index_t rowMin, rowMax, colMin, colMax, planeMin, planeMax ;
873 /* assume communication to 6 neighbors by default */
874 rowMin = rowMax = colMin = colMax = planeMin = planeMax = 1 ;
875 if (domain.rowLoc() == 0) {
876 rowMin = 0 ;
877 }
878 if (domain.rowLoc() == (domain.tp()-1)) {
879 rowMax = 0 ;
880 }
881 if (domain.colLoc() == 0) {
882 colMin = 0 ;
883 }
884 if (domain.colLoc() == (domain.tp()-1)) {
885 colMax = 0 ;
886 }
887 if (domain.planeLoc() == 0) {
888 planeMin = 0 ;
889 }
890 if (domain.planeLoc() == (domain.tp()-1)) {
891 planeMax = 0 ;
892 }
893
894 MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
895
896 if (planeMin | planeMax) {
897 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
898 Index_t opCount = dx * dy ;
899
900 if (planeMin) {
901 /* contiguous memory */
902 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
903 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
904 for (Index_t fi=0 ; fi<xferFields; ++fi) {
905 Domain_member dest = fieldData[fi] ;
906 for (Index_t i=0; i<opCount; ++i) {
907 (domain.*dest)(i) += srcAddr[i] ;
908 }
909 srcAddr += opCount ;
910 }
911 ++pmsg ;
912 }
913 if (planeMax) {
914 /* contiguous memory */
915 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
916 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
917 for (Index_t fi=0 ; fi<xferFields; ++fi) {
918 Domain_member dest = fieldData[fi] ;
919 for (Index_t i=0; i<opCount; ++i) {
920 (domain.*dest)(dx*dy*(dz - 1) + i) += srcAddr[i] ;
921 }
922 srcAddr += opCount ;
923 }
924 ++pmsg ;
925 }
926 }
927
928 if (rowMin | rowMax) {
929 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
930 Index_t opCount = dx * dz ;
931
932 if (rowMin) {
933 /* contiguous memory */
934 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
935 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
936 for (Index_t fi=0 ; fi<xferFields; ++fi) {
937 Domain_member dest = fieldData[fi] ;
938 for (Index_t i=0; i<dz; ++i) {
939 for (Index_t j=0; j<dx; ++j) {
940 (domain.*dest)(i*dx*dy + j) += srcAddr[i*dx + j] ;
941 }
942 }
943 srcAddr += opCount ;
944 }
945 ++pmsg ;
946 }
947 if (rowMax) {
948 /* contiguous memory */
949 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
950 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
951 for (Index_t fi=0 ; fi<xferFields; ++fi) {
952 Domain_member dest = fieldData[fi] ;
953 for (Index_t i=0; i<dz; ++i) {
954 for (Index_t j=0; j<dx; ++j) {
955 (domain.*dest)(dx*(dy - 1) + i*dx*dy + j) += srcAddr[i*dx + j] ;
956 }
957 }
958 srcAddr += opCount ;
959 }
960 ++pmsg ;
961 }
962 }
963 if (colMin | colMax) {
964 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
965 Index_t opCount = dy * dz ;
966
967 if (colMin) {
968 /* contiguous memory */
969 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
970 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
971 for (Index_t fi=0 ; fi<xferFields; ++fi) {
972 Domain_member dest = fieldData[fi] ;
973 for (Index_t i=0; i<dz; ++i) {
974 for (Index_t j=0; j<dy; ++j) {
975 (domain.*dest)(i*dx*dy + j*dx) += srcAddr[i*dy + j] ;
976 }
977 }
978 srcAddr += opCount ;
979 }
980 ++pmsg ;
981 }
982 if (colMax) {
983 /* contiguous memory */
984 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
985 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
986 for (Index_t fi=0 ; fi<xferFields; ++fi) {
987 Domain_member dest = fieldData[fi] ;
988 for (Index_t i=0; i<dz; ++i) {
989 for (Index_t j=0; j<dy; ++j) {
990 (domain.*dest)(dx - 1 + i*dx*dy + j*dx) += srcAddr[i*dy + j] ;
991 }
992 }
993 srcAddr += opCount ;
994 }
995 ++pmsg ;
996 }
997 }
998
999 if (rowMin & colMin) {
1000 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1001 emsg * maxEdgeComm] ;
1002 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1003 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1004 Domain_member dest = fieldData[fi] ;
1005 for (Index_t i=0; i<dz; ++i) {
1006 (domain.*dest)(i*dx*dy) += srcAddr[i] ;
1007 }
1008 srcAddr += dz ;
1009 }
1010 ++emsg ;
1011 }
1012
1013 if (rowMin & planeMin) {
1014 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1015 emsg * maxEdgeComm] ;
1016 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1017 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1018 Domain_member dest = fieldData[fi] ;
1019 for (Index_t i=0; i<dx; ++i) {
1020 (domain.*dest)(i) += srcAddr[i] ;
1021 }
1022 srcAddr += dx ;
1023 }
1024 ++emsg ;
1025 }
1026
1027 if (colMin & planeMin) {
1028 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1029 emsg * maxEdgeComm] ;
1030 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1031 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1032 Domain_member dest = fieldData[fi] ;
1033 for (Index_t i=0; i<dy; ++i) {
1034 (domain.*dest)(i*dx) += srcAddr[i] ;
1035 }
1036 srcAddr += dy ;
1037 }
1038 ++emsg ;
1039 }
1040
1041 if (rowMax & colMax) {
1042 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1043 emsg * maxEdgeComm] ;
1044 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1045 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1046 Domain_member dest = fieldData[fi] ;
1047 for (Index_t i=0; i<dz; ++i) {
1048 (domain.*dest)(dx*dy - 1 + i*dx*dy) += srcAddr[i] ;
1049 }
1050 srcAddr += dz ;
1051 }
1052 ++emsg ;
1053 }
1054
1055 if (rowMax & planeMax) {
1056 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1057 emsg * maxEdgeComm] ;
1058 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1059 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1060 Domain_member dest = fieldData[fi] ;
1061 for (Index_t i=0; i<dx; ++i) {
1062 (domain.*dest)(dx*(dy-1) + dx*dy*(dz-1) + i) += srcAddr[i] ;
1063 }
1064 srcAddr += dx ;
1065 }
1066 ++emsg ;
1067 }
1068
1069 if (colMax & planeMax) {
1070 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1071 emsg * maxEdgeComm] ;
1072 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1073 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1074 Domain_member dest = fieldData[fi] ;
1075 for (Index_t i=0; i<dy; ++i) {
1076 (domain.*dest)(dx*dy*(dz-1) + dx - 1 + i*dx) += srcAddr[i] ;
1077 }
1078 srcAddr += dy ;
1079 }
1080 ++emsg ;
1081 }
1082
1083 if (rowMax & colMin) {
1084 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1085 emsg * maxEdgeComm] ;
1086 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1087 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1088 Domain_member dest = fieldData[fi] ;
1089 for (Index_t i=0; i<dz; ++i) {
1090 (domain.*dest)(dx*(dy-1) + i*dx*dy) += srcAddr[i] ;
1091 }
1092 srcAddr += dz ;
1093 }
1094 ++emsg ;
1095 }
1096
1097 if (rowMin & planeMax) {
1098 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1099 emsg * maxEdgeComm] ;
1100 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1101 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1102 Domain_member dest = fieldData[fi] ;
1103 for (Index_t i=0; i<dx; ++i) {
1104 (domain.*dest)(dx*dy*(dz-1) + i) += srcAddr[i] ;
1105 }
1106 srcAddr += dx ;
1107 }
1108 ++emsg ;
1109 }
1110
1111 if (colMin & planeMax) {
1112 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1113 emsg * maxEdgeComm] ;
1114 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1115 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1116 Domain_member dest = fieldData[fi] ;
1117 for (Index_t i=0; i<dy; ++i) {
1118 (domain.*dest)(dx*dy*(dz-1) + i*dx) += srcAddr[i] ;
1119 }
1120 srcAddr += dy ;
1121 }
1122 ++emsg ;
1123 }
1124
1125 if (rowMin & colMax) {
1126 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1127 emsg * maxEdgeComm] ;
1128 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1129 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1130 Domain_member dest = fieldData[fi] ;
1131 for (Index_t i=0; i<dz; ++i) {
1132 (domain.*dest)(dx - 1 + i*dx*dy) += srcAddr[i] ;
1133 }
1134 srcAddr += dz ;
1135 }
1136 ++emsg ;
1137 }
1138
1139 if (rowMax & planeMin) {
1140 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1141 emsg * maxEdgeComm] ;
1142 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1143 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1144 Domain_member dest = fieldData[fi] ;
1145 for (Index_t i=0; i<dx; ++i) {
1146 (domain.*dest)(dx*(dy - 1) + i) += srcAddr[i] ;
1147 }
1148 srcAddr += dx ;
1149 }
1150 ++emsg ;
1151 }
1152
1153 if (colMax & planeMin) {
1154 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1155 emsg * maxEdgeComm] ;
1156 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1157 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1158 Domain_member dest = fieldData[fi] ;
1159 for (Index_t i=0; i<dy; ++i) {
1160 (domain.*dest)(dx - 1 + i*dx) += srcAddr[i] ;
1161 }
1162 srcAddr += dy ;
1163 }
1164 ++emsg ;
1165 }
1166
1167 if (rowMin & colMin & planeMin) {
1168 /* corner at domain logical coord (0, 0, 0) */
1169 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1170 emsg * maxEdgeComm +
1171 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1172 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1173 for (Index_t fi=0; fi<xferFields; ++fi) {
1174 (domain.*fieldData[fi])(0) += comBuf[fi] ;
1175 }
1176 ++cmsg ;
1177 }
1178 if (rowMin & colMin & planeMax) {
1179 /* corner at domain logical coord (0, 0, 1) */
1180 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1181 emsg * maxEdgeComm +
1182 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1183 Index_t idx = dx*dy*(dz - 1) ;
1184 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1185 for (Index_t fi=0; fi<xferFields; ++fi) {
1186 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1187 }
1188 ++cmsg ;
1189 }
1190 if (rowMin & colMax & planeMin) {
1191 /* corner at domain logical coord (1, 0, 0) */
1192 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1193 emsg * maxEdgeComm +
1194 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1195 Index_t idx = dx - 1 ;
1196 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1197 for (Index_t fi=0; fi<xferFields; ++fi) {
1198 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1199 }
1200 ++cmsg ;
1201 }
1202 if (rowMin & colMax & planeMax) {
1203 /* corner at domain logical coord (1, 0, 1) */
1204 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1205 emsg * maxEdgeComm +
1206 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1207 Index_t idx = dx*dy*(dz - 1) + (dx - 1) ;
1208 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1209 for (Index_t fi=0; fi<xferFields; ++fi) {
1210 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1211 }
1212 ++cmsg ;
1213 }
1214 if (rowMax & colMin & planeMin) {
1215 /* corner at domain logical coord (0, 1, 0) */
1216 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1217 emsg * maxEdgeComm +
1218 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1219 Index_t idx = dx*(dy - 1) ;
1220 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1221 for (Index_t fi=0; fi<xferFields; ++fi) {
1222 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1223 }
1224 ++cmsg ;
1225 }
1226 if (rowMax & colMin & planeMax) {
1227 /* corner at domain logical coord (0, 1, 1) */
1228 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1229 emsg * maxEdgeComm +
1230 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1231 Index_t idx = dx*dy*(dz - 1) + dx*(dy - 1) ;
1232 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1233 for (Index_t fi=0; fi<xferFields; ++fi) {
1234 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1235 }
1236 ++cmsg ;
1237 }
1238 if (rowMax & colMax & planeMin) {
1239 /* corner at domain logical coord (1, 1, 0) */
1240 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1241 emsg * maxEdgeComm +
1242 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1243 Index_t idx = dx*dy - 1 ;
1244 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1245 for (Index_t fi=0; fi<xferFields; ++fi) {
1246 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1247 }
1248 ++cmsg ;
1249 }
1250 if (rowMax & colMax & planeMax) {
1251 /* corner at domain logical coord (1, 1, 1) */
1252 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1253 emsg * maxEdgeComm +
1254 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1255 Index_t idx = dx*dy*dz - 1 ;
1256 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1257 for (Index_t fi=0; fi<xferFields; ++fi) {
1258 (domain.*fieldData[fi])(idx) += comBuf[fi] ;
1259 }
1260 ++cmsg ;
1261 }
1262 }
1263
1264 /******************************************/
1265
CommSyncPosVel(Domain & domain)1266 void CommSyncPosVel(Domain& domain) {
1267
1268 if (domain.numRanks() == 1)
1269 return ;
1270
1271 int myRank ;
1272 bool doRecv = false ;
1273 Index_t xferFields = 6 ; /* x, y, z, xd, yd, zd */
1274 Domain_member fieldData[6] ;
1275 Index_t maxPlaneComm = xferFields * domain.maxPlaneSize() ;
1276 Index_t maxEdgeComm = xferFields * domain.maxEdgeSize() ;
1277 Index_t pmsg = 0 ; /* plane comm msg */
1278 Index_t emsg = 0 ; /* edge comm msg */
1279 Index_t cmsg = 0 ; /* corner comm msg */
1280 Index_t dx = domain.sizeX() + 1 ;
1281 Index_t dy = domain.sizeY() + 1 ;
1282 Index_t dz = domain.sizeZ() + 1 ;
1283 MPI_Status status ;
1284 Real_t *srcAddr ;
1285 bool rowMin, rowMax, colMin, colMax, planeMin, planeMax ;
1286
1287 /* assume communication to 6 neighbors by default */
1288 rowMin = rowMax = colMin = colMax = planeMin = planeMax = true ;
1289 if (domain.rowLoc() == 0) {
1290 rowMin = false ;
1291 }
1292 if (domain.rowLoc() == (domain.tp()-1)) {
1293 rowMax = false ;
1294 }
1295 if (domain.colLoc() == 0) {
1296 colMin = false ;
1297 }
1298 if (domain.colLoc() == (domain.tp()-1)) {
1299 colMax = false ;
1300 }
1301 if (domain.planeLoc() == 0) {
1302 planeMin = false ;
1303 }
1304 if (domain.planeLoc() == (domain.tp()-1)) {
1305 planeMax = false ;
1306 }
1307
1308 fieldData[0] = &Domain::x ;
1309 fieldData[1] = &Domain::y ;
1310 fieldData[2] = &Domain::z ;
1311 fieldData[3] = &Domain::xd ;
1312 fieldData[4] = &Domain::yd ;
1313 fieldData[5] = &Domain::zd ;
1314
1315 MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
1316
1317 if (planeMin | planeMax) {
1318 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
1319 Index_t opCount = dx * dy ;
1320
1321 if (planeMin && doRecv) {
1322 /* contiguous memory */
1323 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1324 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1325 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1326 Domain_member dest = fieldData[fi] ;
1327 for (Index_t i=0; i<opCount; ++i) {
1328 (domain.*dest)(i) = srcAddr[i] ;
1329 }
1330 srcAddr += opCount ;
1331 }
1332 ++pmsg ;
1333 }
1334 if (planeMax) {
1335 /* contiguous memory */
1336 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1337 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1338 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1339 Domain_member dest = fieldData[fi] ;
1340 for (Index_t i=0; i<opCount; ++i) {
1341 (domain.*dest)(dx*dy*(dz - 1) + i) = srcAddr[i] ;
1342 }
1343 srcAddr += opCount ;
1344 }
1345 ++pmsg ;
1346 }
1347 }
1348
1349 if (rowMin | rowMax) {
1350 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
1351 Index_t opCount = dx * dz ;
1352
1353 if (rowMin && doRecv) {
1354 /* contiguous memory */
1355 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1356 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1357 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1358 Domain_member dest = fieldData[fi] ;
1359 for (Index_t i=0; i<dz; ++i) {
1360 for (Index_t j=0; j<dx; ++j) {
1361 (domain.*dest)(i*dx*dy + j) = srcAddr[i*dx + j] ;
1362 }
1363 }
1364 srcAddr += opCount ;
1365 }
1366 ++pmsg ;
1367 }
1368 if (rowMax) {
1369 /* contiguous memory */
1370 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1371 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1372 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1373 Domain_member dest = fieldData[fi] ;
1374 for (Index_t i=0; i<dz; ++i) {
1375 for (Index_t j=0; j<dx; ++j) {
1376 (domain.*dest)(dx*(dy - 1) + i*dx*dy + j) = srcAddr[i*dx + j] ;
1377 }
1378 }
1379 srcAddr += opCount ;
1380 }
1381 ++pmsg ;
1382 }
1383 }
1384
1385 if (colMin | colMax) {
1386 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
1387 Index_t opCount = dy * dz ;
1388
1389 if (colMin && doRecv) {
1390 /* contiguous memory */
1391 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1392 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1393 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1394 Domain_member dest = fieldData[fi] ;
1395 for (Index_t i=0; i<dz; ++i) {
1396 for (Index_t j=0; j<dy; ++j) {
1397 (domain.*dest)(i*dx*dy + j*dx) = srcAddr[i*dy + j] ;
1398 }
1399 }
1400 srcAddr += opCount ;
1401 }
1402 ++pmsg ;
1403 }
1404 if (colMax) {
1405 /* contiguous memory */
1406 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1407 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1408 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1409 Domain_member dest = fieldData[fi] ;
1410 for (Index_t i=0; i<dz; ++i) {
1411 for (Index_t j=0; j<dy; ++j) {
1412 (domain.*dest)(dx - 1 + i*dx*dy + j*dx) = srcAddr[i*dy + j] ;
1413 }
1414 }
1415 srcAddr += opCount ;
1416 }
1417 ++pmsg ;
1418 }
1419 }
1420
1421 if (rowMin && colMin && doRecv) {
1422 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1423 emsg * maxEdgeComm] ;
1424 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1425 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1426 Domain_member dest = fieldData[fi] ;
1427 for (Index_t i=0; i<dz; ++i) {
1428 (domain.*dest)(i*dx*dy) = srcAddr[i] ;
1429 }
1430 srcAddr += dz ;
1431 }
1432 ++emsg ;
1433 }
1434
1435 if (rowMin && planeMin && doRecv) {
1436 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1437 emsg * maxEdgeComm] ;
1438 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1439 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1440 Domain_member dest = fieldData[fi] ;
1441 for (Index_t i=0; i<dx; ++i) {
1442 (domain.*dest)(i) = srcAddr[i] ;
1443 }
1444 srcAddr += dx ;
1445 }
1446 ++emsg ;
1447 }
1448
1449 if (colMin && planeMin && doRecv) {
1450 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1451 emsg * maxEdgeComm] ;
1452 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1453 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1454 Domain_member dest = fieldData[fi] ;
1455 for (Index_t i=0; i<dy; ++i) {
1456 (domain.*dest)(i*dx) = srcAddr[i] ;
1457 }
1458 srcAddr += dy ;
1459 }
1460 ++emsg ;
1461 }
1462
1463 if (rowMax && colMax) {
1464 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1465 emsg * maxEdgeComm] ;
1466 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1467 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1468 Domain_member dest = fieldData[fi] ;
1469 for (Index_t i=0; i<dz; ++i) {
1470 (domain.*dest)(dx*dy - 1 + i*dx*dy) = srcAddr[i] ;
1471 }
1472 srcAddr += dz ;
1473 }
1474 ++emsg ;
1475 }
1476
1477 if (rowMax && planeMax) {
1478 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1479 emsg * maxEdgeComm] ;
1480 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1481 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1482 Domain_member dest = fieldData[fi] ;
1483 for (Index_t i=0; i<dx; ++i) {
1484 (domain.*dest)(dx*(dy-1) + dx*dy*(dz-1) + i) = srcAddr[i] ;
1485 }
1486 srcAddr += dx ;
1487 }
1488 ++emsg ;
1489 }
1490
1491 if (colMax && planeMax) {
1492 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1493 emsg * maxEdgeComm] ;
1494 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1495 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1496 Domain_member dest = fieldData[fi] ;
1497 for (Index_t i=0; i<dy; ++i) {
1498 (domain.*dest)(dx*dy*(dz-1) + dx - 1 + i*dx) = srcAddr[i] ;
1499 }
1500 srcAddr += dy ;
1501 }
1502 ++emsg ;
1503 }
1504
1505 if (rowMax && colMin) {
1506 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1507 emsg * maxEdgeComm] ;
1508 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1509 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1510 Domain_member dest = fieldData[fi] ;
1511 for (Index_t i=0; i<dz; ++i) {
1512 (domain.*dest)(dx*(dy-1) + i*dx*dy) = srcAddr[i] ;
1513 }
1514 srcAddr += dz ;
1515 }
1516 ++emsg ;
1517 }
1518
1519 if (rowMin && planeMax) {
1520 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1521 emsg * maxEdgeComm] ;
1522 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1523 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1524 Domain_member dest = fieldData[fi] ;
1525 for (Index_t i=0; i<dx; ++i) {
1526 (domain.*dest)(dx*dy*(dz-1) + i) = srcAddr[i] ;
1527 }
1528 srcAddr += dx ;
1529 }
1530 ++emsg ;
1531 }
1532
1533 if (colMin && planeMax) {
1534 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1535 emsg * maxEdgeComm] ;
1536 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1537 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1538 Domain_member dest = fieldData[fi] ;
1539 for (Index_t i=0; i<dy; ++i) {
1540 (domain.*dest)(dx*dy*(dz-1) + i*dx) = srcAddr[i] ;
1541 }
1542 srcAddr += dy ;
1543 }
1544 ++emsg ;
1545 }
1546
1547 if (rowMin && colMax && doRecv) {
1548 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1549 emsg * maxEdgeComm] ;
1550 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1551 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1552 Domain_member dest = fieldData[fi] ;
1553 for (Index_t i=0; i<dz; ++i) {
1554 (domain.*dest)(dx - 1 + i*dx*dy) = srcAddr[i] ;
1555 }
1556 srcAddr += dz ;
1557 }
1558 ++emsg ;
1559 }
1560
1561 if (rowMax && planeMin && doRecv) {
1562 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1563 emsg * maxEdgeComm] ;
1564 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1565 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1566 Domain_member dest = fieldData[fi] ;
1567 for (Index_t i=0; i<dx; ++i) {
1568 (domain.*dest)(dx*(dy - 1) + i) = srcAddr[i] ;
1569 }
1570 srcAddr += dx ;
1571 }
1572 ++emsg ;
1573 }
1574
1575 if (colMax && planeMin && doRecv) {
1576 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm +
1577 emsg * maxEdgeComm] ;
1578 MPI_Wait(&domain.recvRequest[pmsg+emsg], &status) ;
1579 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1580 Domain_member dest = fieldData[fi] ;
1581 for (Index_t i=0; i<dy; ++i) {
1582 (domain.*dest)(dx - 1 + i*dx) = srcAddr[i] ;
1583 }
1584 srcAddr += dy ;
1585 }
1586 ++emsg ;
1587 }
1588
1589
1590 if (rowMin && colMin && planeMin && doRecv) {
1591 /* corner at domain logical coord (0, 0, 0) */
1592 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1593 emsg * maxEdgeComm +
1594 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1595 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1596 for (Index_t fi=0; fi<xferFields; ++fi) {
1597 (domain.*fieldData[fi])(0) = comBuf[fi] ;
1598 }
1599 ++cmsg ;
1600 }
1601 if (rowMin && colMin && planeMax) {
1602 /* corner at domain logical coord (0, 0, 1) */
1603 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1604 emsg * maxEdgeComm +
1605 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1606 Index_t idx = dx*dy*(dz - 1) ;
1607 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1608 for (Index_t fi=0; fi<xferFields; ++fi) {
1609 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1610 }
1611 ++cmsg ;
1612 }
1613 if (rowMin && colMax && planeMin && doRecv) {
1614 /* corner at domain logical coord (1, 0, 0) */
1615 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1616 emsg * maxEdgeComm +
1617 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1618 Index_t idx = dx - 1 ;
1619 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1620 for (Index_t fi=0; fi<xferFields; ++fi) {
1621 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1622 }
1623 ++cmsg ;
1624 }
1625 if (rowMin && colMax && planeMax) {
1626 /* corner at domain logical coord (1, 0, 1) */
1627 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1628 emsg * maxEdgeComm +
1629 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1630 Index_t idx = dx*dy*(dz - 1) + (dx - 1) ;
1631 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1632 for (Index_t fi=0; fi<xferFields; ++fi) {
1633 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1634 }
1635 ++cmsg ;
1636 }
1637 if (rowMax && colMin && planeMin && doRecv) {
1638 /* corner at domain logical coord (0, 1, 0) */
1639 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1640 emsg * maxEdgeComm +
1641 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1642 Index_t idx = dx*(dy - 1) ;
1643 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1644 for (Index_t fi=0; fi<xferFields; ++fi) {
1645 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1646 }
1647 ++cmsg ;
1648 }
1649 if (rowMax && colMin && planeMax) {
1650 /* corner at domain logical coord (0, 1, 1) */
1651 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1652 emsg * maxEdgeComm +
1653 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1654 Index_t idx = dx*dy*(dz - 1) + dx*(dy - 1) ;
1655 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1656 for (Index_t fi=0; fi<xferFields; ++fi) {
1657 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1658 }
1659 ++cmsg ;
1660 }
1661 if (rowMax && colMax && planeMin && doRecv) {
1662 /* corner at domain logical coord (1, 1, 0) */
1663 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1664 emsg * maxEdgeComm +
1665 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1666 Index_t idx = dx*dy - 1 ;
1667 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1668 for (Index_t fi=0; fi<xferFields; ++fi) {
1669 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1670 }
1671 ++cmsg ;
1672 }
1673 if (rowMax && colMax && planeMax) {
1674 /* corner at domain logical coord (1, 1, 1) */
1675 Real_t *comBuf = &domain.commDataRecv[pmsg * maxPlaneComm +
1676 emsg * maxEdgeComm +
1677 cmsg * CACHE_COHERENCE_PAD_REAL] ;
1678 Index_t idx = dx*dy*dz - 1 ;
1679 MPI_Wait(&domain.recvRequest[pmsg+emsg+cmsg], &status) ;
1680 for (Index_t fi=0; fi<xferFields; ++fi) {
1681 (domain.*fieldData[fi])(idx) = comBuf[fi] ;
1682 }
1683 ++cmsg ;
1684 }
1685 }
1686
1687 /******************************************/
1688
CommMonoQ(Domain & domain)1689 void CommMonoQ(Domain& domain)
1690 {
1691 if (domain.numRanks() == 1)
1692 return ;
1693
1694 int myRank ;
1695 Index_t xferFields = 3 ; /* delv_xi, delv_eta, delv_zeta */
1696 Domain_member fieldData[3] ;
1697 Index_t fieldOffset[3] ;
1698 Index_t maxPlaneComm = xferFields * domain.maxPlaneSize() ;
1699 Index_t pmsg = 0 ; /* plane comm msg */
1700 Index_t dx = domain.sizeX() ;
1701 Index_t dy = domain.sizeY() ;
1702 Index_t dz = domain.sizeZ() ;
1703 MPI_Status status ;
1704 Real_t *srcAddr ;
1705 bool rowMin, rowMax, colMin, colMax, planeMin, planeMax ;
1706 /* assume communication to 6 neighbors by default */
1707 rowMin = rowMax = colMin = colMax = planeMin = planeMax = true ;
1708 if (domain.rowLoc() == 0) {
1709 rowMin = false ;
1710 }
1711 if (domain.rowLoc() == (domain.tp()-1)) {
1712 rowMax = false ;
1713 }
1714 if (domain.colLoc() == 0) {
1715 colMin = false ;
1716 }
1717 if (domain.colLoc() == (domain.tp()-1)) {
1718 colMax = false ;
1719 }
1720 if (domain.planeLoc() == 0) {
1721 planeMin = false ;
1722 }
1723 if (domain.planeLoc() == (domain.tp()-1)) {
1724 planeMax = false ;
1725 }
1726
1727 /* point into ghost data area */
1728 // fieldData[0] = &(domain.delv_xi(domain.numElem())) ;
1729 // fieldData[1] = &(domain.delv_eta(domain.numElem())) ;
1730 // fieldData[2] = &(domain.delv_zeta(domain.numElem())) ;
1731 fieldData[0] = &Domain::delv_xi ;
1732 fieldData[1] = &Domain::delv_eta ;
1733 fieldData[2] = &Domain::delv_zeta ;
1734 fieldOffset[0] = domain.numElem() ;
1735 fieldOffset[1] = domain.numElem() ;
1736 fieldOffset[2] = domain.numElem() ;
1737
1738
1739 MPI_Comm_rank(MPI_COMM_WORLD, &myRank) ;
1740
1741 if (planeMin | planeMax) {
1742 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
1743 Index_t opCount = dx * dy ;
1744
1745 if (planeMin) {
1746 /* contiguous memory */
1747 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1748 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1749 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1750 Domain_member dest = fieldData[fi] ;
1751 for (Index_t i=0; i<opCount; ++i) {
1752 (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i] ;
1753 }
1754 srcAddr += opCount ;
1755 fieldOffset[fi] += opCount ;
1756 }
1757 ++pmsg ;
1758 }
1759 if (planeMax) {
1760 /* contiguous memory */
1761 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1762 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1763 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1764 Domain_member dest = fieldData[fi] ;
1765 for (Index_t i=0; i<opCount; ++i) {
1766 (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i] ;
1767 }
1768 srcAddr += opCount ;
1769 fieldOffset[fi] += opCount ;
1770 }
1771 ++pmsg ;
1772 }
1773 }
1774
1775 if (rowMin | rowMax) {
1776 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
1777 Index_t opCount = dx * dz ;
1778
1779 if (rowMin) {
1780 /* contiguous memory */
1781 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1782 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1783 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1784 Domain_member dest = fieldData[fi] ;
1785 for (Index_t i=0; i<opCount; ++i) {
1786 (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i] ;
1787 }
1788 srcAddr += opCount ;
1789 fieldOffset[fi] += opCount ;
1790 }
1791 ++pmsg ;
1792 }
1793 if (rowMax) {
1794 /* contiguous memory */
1795 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1796 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1797 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1798 Domain_member dest = fieldData[fi] ;
1799 for (Index_t i=0; i<opCount; ++i) {
1800 (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i] ;
1801 }
1802 srcAddr += opCount ;
1803 fieldOffset[fi] += opCount ;
1804 }
1805 ++pmsg ;
1806 }
1807 }
1808 if (colMin | colMax) {
1809 /* ASSUMING ONE DOMAIN PER RANK, CONSTANT BLOCK SIZE HERE */
1810 Index_t opCount = dy * dz ;
1811
1812 if (colMin) {
1813 /* contiguous memory */
1814 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1815 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1816 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1817 Domain_member dest = fieldData[fi] ;
1818 for (Index_t i=0; i<opCount; ++i) {
1819 (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i] ;
1820 }
1821 srcAddr += opCount ;
1822 fieldOffset[fi] += opCount ;
1823 }
1824 ++pmsg ;
1825 }
1826 if (colMax) {
1827 /* contiguous memory */
1828 srcAddr = &domain.commDataRecv[pmsg * maxPlaneComm] ;
1829 MPI_Wait(&domain.recvRequest[pmsg], &status) ;
1830 for (Index_t fi=0 ; fi<xferFields; ++fi) {
1831 Domain_member dest = fieldData[fi] ;
1832 for (Index_t i=0; i<opCount; ++i) {
1833 (domain.*dest)(fieldOffset[fi] + i) = srcAddr[i] ;
1834 }
1835 srcAddr += opCount ;
1836 }
1837 ++pmsg ;
1838 }
1839 }
1840 }
1841
1842 #endif
1843