1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
5 * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
6 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
7 * Copyright (c) 2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
11 *
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
16 *
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 *
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
34 *
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
37 */
38
39 #include "gmxpre.h"
40
41 #include "domdec.h"
42
43 #include "config.h"
44
45 #include <cassert>
46 #include <cinttypes>
47 #include <climits>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
52
53 #include <algorithm>
54 #include <memory>
55
56 #include "gromacs/domdec/builder.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlb.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/ga2la.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/options.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/gmxlib/network.h"
66 #include "gromacs/gmxlib/nrnb.h"
67 #include "gromacs/gpu_utils/device_stream_manager.h"
68 #include "gromacs/gpu_utils/gpu_utils.h"
69 #include "gromacs/hardware/hw_info.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/math/vectypes.h"
72 #include "gromacs/mdlib/calc_verletbuf.h"
73 #include "gromacs/mdlib/constr.h"
74 #include "gromacs/mdlib/constraintrange.h"
75 #include "gromacs/mdlib/updategroups.h"
76 #include "gromacs/mdlib/vsite.h"
77 #include "gromacs/mdtypes/commrec.h"
78 #include "gromacs/mdtypes/forceoutput.h"
79 #include "gromacs/mdtypes/inputrec.h"
80 #include "gromacs/mdtypes/mdrunoptions.h"
81 #include "gromacs/mdtypes/state.h"
82 #include "gromacs/pbcutil/ishift.h"
83 #include "gromacs/pbcutil/pbc.h"
84 #include "gromacs/pulling/pull.h"
85 #include "gromacs/timing/wallcycle.h"
86 #include "gromacs/topology/block.h"
87 #include "gromacs/topology/idef.h"
88 #include "gromacs/topology/ifunc.h"
89 #include "gromacs/topology/mtop_lookup.h"
90 #include "gromacs/topology/mtop_util.h"
91 #include "gromacs/topology/topology.h"
92 #include "gromacs/utility/basedefinitions.h"
93 #include "gromacs/utility/basenetwork.h"
94 #include "gromacs/utility/cstringutil.h"
95 #include "gromacs/utility/exceptions.h"
96 #include "gromacs/utility/fatalerror.h"
97 #include "gromacs/utility/gmxmpi.h"
98 #include "gromacs/utility/logger.h"
99 #include "gromacs/utility/real.h"
100 #include "gromacs/utility/smalloc.h"
101 #include "gromacs/utility/strconvert.h"
102 #include "gromacs/utility/stringstream.h"
103 #include "gromacs/utility/stringutil.h"
104 #include "gromacs/utility/textwriter.h"
105
106 #include "atomdistribution.h"
107 #include "box.h"
108 #include "cellsizes.h"
109 #include "distribute.h"
110 #include "domdec_constraints.h"
111 #include "domdec_internal.h"
112 #include "domdec_setup.h"
113 #include "domdec_vsite.h"
114 #include "redistribute.h"
115 #include "utility.h"
116
117 // TODO remove this when moving domdec into gmx namespace
118 using gmx::DdRankOrder;
119 using gmx::DlbOption;
120 using gmx::DomdecOptions;
121
122 static const char* edlbs_names[int(DlbState::nr)] = { "off", "off", "auto", "locked", "on", "on" };
123
124 /* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
125 #define DD_CGIBS 2
126
127 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
128 #define DD_FLAG_NRCG 65535
129 #define DD_FLAG_FW(d) (1 << (16 + (d)*2))
130 #define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
131
132 /* The DD zone order */
133 static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
134 { 0, 1, 1 }, { 0, 0, 1 }, { 1, 0, 1 }, { 1, 1, 1 } };
135
136 /* The non-bonded zone-pair setup for domain decomposition
137 * The first number is the i-zone, the second number the first j-zone seen by
138 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
139 * As is, this is for 3D decomposition, where there are 4 i-zones.
140 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
141 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
142 */
143 static const int ddNonbondedZonePairRanges[DD_MAXIZONE][3] = { { 0, 0, 8 },
144 { 1, 3, 6 },
145 { 2, 5, 6 },
146 { 3, 5, 7 } };
147
ddindex2xyz(const ivec nc,int ind,ivec xyz)148 static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
149 {
150 xyz[XX] = ind / (nc[YY] * nc[ZZ]);
151 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
152 xyz[ZZ] = ind % nc[ZZ];
153 }
154
ddcoord2ddnodeid(gmx_domdec_t * dd,ivec c)155 static int ddcoord2ddnodeid(gmx_domdec_t* dd, ivec c)
156 {
157 int ddnodeid = -1;
158
159 const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
160 const int ddindex = dd_index(dd->numCells, c);
161 if (cartSetup.bCartesianPP_PME)
162 {
163 ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
164 }
165 else if (cartSetup.bCartesianPP)
166 {
167 #if GMX_MPI
168 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
169 #endif
170 }
171 else
172 {
173 ddnodeid = ddindex;
174 }
175
176 return ddnodeid;
177 }
178
ddglatnr(const gmx_domdec_t * dd,int i)179 int ddglatnr(const gmx_domdec_t* dd, int i)
180 {
181 int atnr;
182
183 if (dd == nullptr)
184 {
185 atnr = i + 1;
186 }
187 else
188 {
189 if (i >= dd->comm->atomRanges.numAtomsTotal())
190 {
191 gmx_fatal(FARGS,
192 "glatnr called with %d, which is larger than the local number of atoms (%d)",
193 i, dd->comm->atomRanges.numAtomsTotal());
194 }
195 atnr = dd->globalAtomIndices[i] + 1;
196 }
197
198 return atnr;
199 }
200
getUpdateGroupingPerMoleculetype(const gmx_domdec_t & dd)201 gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
202 {
203 GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
204 return dd.comm->systemInfo.updateGroupingPerMoleculetype;
205 }
206
dd_store_state(gmx_domdec_t * dd,t_state * state)207 void dd_store_state(gmx_domdec_t* dd, t_state* state)
208 {
209 int i;
210
211 if (state->ddp_count != dd->ddp_count)
212 {
213 gmx_incons("The MD state does not match the domain decomposition state");
214 }
215
216 state->cg_gl.resize(dd->ncg_home);
217 for (i = 0; i < dd->ncg_home; i++)
218 {
219 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
220 }
221
222 state->ddp_count_cg_gl = dd->ddp_count;
223 }
224
domdec_zones(gmx_domdec_t * dd)225 gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
226 {
227 return &dd->comm->zones;
228 }
229
dd_numAtomsZones(const gmx_domdec_t & dd)230 int dd_numAtomsZones(const gmx_domdec_t& dd)
231 {
232 return dd.comm->atomRanges.end(DDAtomRanges::Type::Zones);
233 }
234
dd_numHomeAtoms(const gmx_domdec_t & dd)235 int dd_numHomeAtoms(const gmx_domdec_t& dd)
236 {
237 return dd.comm->atomRanges.numHomeAtoms();
238 }
239
dd_natoms_mdatoms(const gmx_domdec_t * dd)240 int dd_natoms_mdatoms(const gmx_domdec_t* dd)
241 {
242 /* We currently set mdatoms entries for all atoms:
243 * local + non-local + communicated for vsite + constraints
244 */
245
246 return dd->comm->atomRanges.numAtomsTotal();
247 }
248
dd_natoms_vsite(const gmx_domdec_t * dd)249 int dd_natoms_vsite(const gmx_domdec_t* dd)
250 {
251 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
252 }
253
dd_get_constraint_range(const gmx_domdec_t * dd,int * at_start,int * at_end)254 void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
255 {
256 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
257 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
258 }
259
dd_move_x(gmx_domdec_t * dd,const matrix box,gmx::ArrayRef<gmx::RVec> x,gmx_wallcycle * wcycle)260 void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
261 {
262 wallcycle_start(wcycle, ewcMOVEX);
263
264 int nzone, nat_tot;
265 gmx_domdec_comm_t* comm;
266 gmx_domdec_comm_dim_t* cd;
267 rvec shift = { 0, 0, 0 };
268 gmx_bool bPBC, bScrew;
269
270 comm = dd->comm;
271
272 nzone = 1;
273 nat_tot = comm->atomRanges.numHomeAtoms();
274 for (int d = 0; d < dd->ndim; d++)
275 {
276 bPBC = (dd->ci[dd->dim[d]] == 0);
277 bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
278 if (bPBC)
279 {
280 copy_rvec(box[dd->dim[d]], shift);
281 }
282 cd = &comm->cd[d];
283 for (const gmx_domdec_ind_t& ind : cd->ind)
284 {
285 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
286 gmx::ArrayRef<gmx::RVec>& sendBuffer = sendBufferAccess.buffer;
287 int n = 0;
288 if (!bPBC)
289 {
290 for (int j : ind.index)
291 {
292 sendBuffer[n] = x[j];
293 n++;
294 }
295 }
296 else if (!bScrew)
297 {
298 for (int j : ind.index)
299 {
300 /* We need to shift the coordinates */
301 for (int d = 0; d < DIM; d++)
302 {
303 sendBuffer[n][d] = x[j][d] + shift[d];
304 }
305 n++;
306 }
307 }
308 else
309 {
310 for (int j : ind.index)
311 {
312 /* Shift x */
313 sendBuffer[n][XX] = x[j][XX] + shift[XX];
314 /* Rotate y and z.
315 * This operation requires a special shift force
316 * treatment, which is performed in calc_vir.
317 */
318 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
319 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
320 n++;
321 }
322 }
323
324 DDBufferAccess<gmx::RVec> receiveBufferAccess(
325 comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
326
327 gmx::ArrayRef<gmx::RVec> receiveBuffer;
328 if (cd->receiveInPlace)
329 {
330 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
331 }
332 else
333 {
334 receiveBuffer = receiveBufferAccess.buffer;
335 }
336 /* Send and receive the coordinates */
337 ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
338
339 if (!cd->receiveInPlace)
340 {
341 int j = 0;
342 for (int zone = 0; zone < nzone; zone++)
343 {
344 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
345 {
346 x[i] = receiveBuffer[j++];
347 }
348 }
349 }
350 nat_tot += ind.nrecv[nzone + 1];
351 }
352 nzone += nzone;
353 }
354
355 wallcycle_stop(wcycle, ewcMOVEX);
356 }
357
dd_move_f(gmx_domdec_t * dd,gmx::ForceWithShiftForces * forceWithShiftForces,gmx_wallcycle * wcycle)358 void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
359 {
360 wallcycle_start(wcycle, ewcMOVEF);
361
362 gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
363 gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
364
365 gmx_domdec_comm_t& comm = *dd->comm;
366 int nzone = comm.zones.n / 2;
367 int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
368 for (int d = dd->ndim - 1; d >= 0; d--)
369 {
370 /* Only forces in domains near the PBC boundaries need to
371 consider PBC in the treatment of fshift */
372 const bool shiftForcesNeedPbc =
373 (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
374 const bool applyScrewPbc =
375 (shiftForcesNeedPbc && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
376 /* Determine which shift vector we need */
377 ivec vis = { 0, 0, 0 };
378 vis[dd->dim[d]] = 1;
379 const int is = IVEC2IS(vis);
380
381 /* Loop over the pulses */
382 const gmx_domdec_comm_dim_t& cd = comm.cd[d];
383 for (int p = cd.numPulses() - 1; p >= 0; p--)
384 {
385 const gmx_domdec_ind_t& ind = cd.ind[p];
386 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
387 gmx::ArrayRef<gmx::RVec>& receiveBuffer = receiveBufferAccess.buffer;
388
389 nat_tot -= ind.nrecv[nzone + 1];
390
391 DDBufferAccess<gmx::RVec> sendBufferAccess(
392 comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
393
394 gmx::ArrayRef<gmx::RVec> sendBuffer;
395 if (cd.receiveInPlace)
396 {
397 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
398 }
399 else
400 {
401 sendBuffer = sendBufferAccess.buffer;
402 int j = 0;
403 for (int zone = 0; zone < nzone; zone++)
404 {
405 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
406 {
407 sendBuffer[j++] = f[i];
408 }
409 }
410 }
411 /* Communicate the forces */
412 ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
413 /* Add the received forces */
414 int n = 0;
415 if (!shiftForcesNeedPbc)
416 {
417 for (int j : ind.index)
418 {
419 for (int d = 0; d < DIM; d++)
420 {
421 f[j][d] += receiveBuffer[n][d];
422 }
423 n++;
424 }
425 }
426 else if (!applyScrewPbc)
427 {
428 for (int j : ind.index)
429 {
430 for (int d = 0; d < DIM; d++)
431 {
432 f[j][d] += receiveBuffer[n][d];
433 }
434 /* Add this force to the shift force */
435 for (int d = 0; d < DIM; d++)
436 {
437 fshift[is][d] += receiveBuffer[n][d];
438 }
439 n++;
440 }
441 }
442 else
443 {
444 for (int j : ind.index)
445 {
446 /* Rotate the force */
447 f[j][XX] += receiveBuffer[n][XX];
448 f[j][YY] -= receiveBuffer[n][YY];
449 f[j][ZZ] -= receiveBuffer[n][ZZ];
450 if (shiftForcesNeedPbc)
451 {
452 /* Add this force to the shift force */
453 for (int d = 0; d < DIM; d++)
454 {
455 fshift[is][d] += receiveBuffer[n][d];
456 }
457 }
458 n++;
459 }
460 }
461 }
462 nzone /= 2;
463 }
464 wallcycle_stop(wcycle, ewcMOVEF);
465 }
466
467 /* Convenience function for extracting a real buffer from an rvec buffer
468 *
469 * To reduce the number of temporary communication buffers and avoid
470 * cache polution, we reuse gmx::RVec buffers for storing reals.
471 * This functions return a real buffer reference with the same number
472 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
473 */
realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)474 static gmx::ArrayRef<real> realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
475 {
476 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0], arrayRef.size());
477 }
478
dd_atom_spread_real(gmx_domdec_t * dd,real v[])479 void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
480 {
481 int nzone, nat_tot;
482 gmx_domdec_comm_t* comm;
483 gmx_domdec_comm_dim_t* cd;
484
485 comm = dd->comm;
486
487 nzone = 1;
488 nat_tot = comm->atomRanges.numHomeAtoms();
489 for (int d = 0; d < dd->ndim; d++)
490 {
491 cd = &comm->cd[d];
492 for (const gmx_domdec_ind_t& ind : cd->ind)
493 {
494 /* Note: We provision for RVec instead of real, so a factor of 3
495 * more than needed. The buffer actually already has this size
496 * and we pass a plain pointer below, so this does not matter.
497 */
498 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
499 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
500 int n = 0;
501 for (int j : ind.index)
502 {
503 sendBuffer[n++] = v[j];
504 }
505
506 DDBufferAccess<gmx::RVec> receiveBufferAccess(
507 comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
508
509 gmx::ArrayRef<real> receiveBuffer;
510 if (cd->receiveInPlace)
511 {
512 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
513 }
514 else
515 {
516 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
517 }
518 /* Send and receive the data */
519 ddSendrecv(dd, d, dddirBackward, sendBuffer, receiveBuffer);
520 if (!cd->receiveInPlace)
521 {
522 int j = 0;
523 for (int zone = 0; zone < nzone; zone++)
524 {
525 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
526 {
527 v[i] = receiveBuffer[j++];
528 }
529 }
530 }
531 nat_tot += ind.nrecv[nzone + 1];
532 }
533 nzone += nzone;
534 }
535 }
536
dd_atom_sum_real(gmx_domdec_t * dd,real v[])537 void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
538 {
539 int nzone, nat_tot;
540 gmx_domdec_comm_t* comm;
541 gmx_domdec_comm_dim_t* cd;
542
543 comm = dd->comm;
544
545 nzone = comm->zones.n / 2;
546 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
547 for (int d = dd->ndim - 1; d >= 0; d--)
548 {
549 cd = &comm->cd[d];
550 for (int p = cd->numPulses() - 1; p >= 0; p--)
551 {
552 const gmx_domdec_ind_t& ind = cd->ind[p];
553
554 /* Note: We provision for RVec instead of real, so a factor of 3
555 * more than needed. The buffer actually already has this size
556 * and we typecast, so this works as intended.
557 */
558 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
559 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
560 nat_tot -= ind.nrecv[nzone + 1];
561
562 DDBufferAccess<gmx::RVec> sendBufferAccess(
563 comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
564
565 gmx::ArrayRef<real> sendBuffer;
566 if (cd->receiveInPlace)
567 {
568 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
569 }
570 else
571 {
572 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
573 int j = 0;
574 for (int zone = 0; zone < nzone; zone++)
575 {
576 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
577 {
578 sendBuffer[j++] = v[i];
579 }
580 }
581 }
582 /* Communicate the forces */
583 ddSendrecv(dd, d, dddirForward, sendBuffer, receiveBuffer);
584 /* Add the received forces */
585 int n = 0;
586 for (int j : ind.index)
587 {
588 v[j] += receiveBuffer[n];
589 n++;
590 }
591 }
592 nzone /= 2;
593 }
594 }
595
dd_cutoff_multibody(const gmx_domdec_t * dd)596 real dd_cutoff_multibody(const gmx_domdec_t* dd)
597 {
598 const gmx_domdec_comm_t& comm = *dd->comm;
599 const DDSystemInfo& systemInfo = comm.systemInfo;
600
601 real r = -1;
602 if (systemInfo.haveInterDomainMultiBodyBondeds)
603 {
604 if (comm.cutoff_mbody > 0)
605 {
606 r = comm.cutoff_mbody;
607 }
608 else
609 {
610 /* cutoff_mbody=0 means we do not have DLB */
611 r = comm.cellsize_min[dd->dim[0]];
612 for (int di = 1; di < dd->ndim; di++)
613 {
614 r = std::min(r, comm.cellsize_min[dd->dim[di]]);
615 }
616 if (comm.systemInfo.filterBondedCommunication)
617 {
618 r = std::max(r, comm.cutoff_mbody);
619 }
620 else
621 {
622 r = std::min(r, systemInfo.cutoff);
623 }
624 }
625 }
626
627 return r;
628 }
629
dd_cutoff_twobody(const gmx_domdec_t * dd)630 real dd_cutoff_twobody(const gmx_domdec_t* dd)
631 {
632 real r_mb;
633
634 r_mb = dd_cutoff_multibody(dd);
635
636 return std::max(dd->comm->systemInfo.cutoff, r_mb);
637 }
638
639
dd_cart_coord2pmecoord(const DDRankSetup & ddRankSetup,const CartesianRankSetup & cartSetup,const ivec coord,ivec coord_pme)640 static void dd_cart_coord2pmecoord(const DDRankSetup& ddRankSetup,
641 const CartesianRankSetup& cartSetup,
642 const ivec coord,
643 ivec coord_pme)
644 {
645 const int nc = ddRankSetup.numPPCells[cartSetup.cartpmedim];
646 const int ntot = cartSetup.ntot[cartSetup.cartpmedim];
647 copy_ivec(coord, coord_pme);
648 coord_pme[cartSetup.cartpmedim] =
649 nc + (coord[cartSetup.cartpmedim] * (ntot - nc) + (ntot - nc) / 2) / nc;
650 }
651
652 /* Returns the PME rank index in 0...npmenodes-1 for the PP cell with index ddCellIndex */
ddindex2pmeindex(const DDRankSetup & ddRankSetup,const int ddCellIndex)653 static int ddindex2pmeindex(const DDRankSetup& ddRankSetup, const int ddCellIndex)
654 {
655 const int npp = ddRankSetup.numPPRanks;
656 const int npme = ddRankSetup.numRanksDoingPme;
657
658 /* Here we assign a PME node to communicate with this DD node
659 * by assuming that the major index of both is x.
660 * We add npme/2 to obtain an even distribution.
661 */
662 return (ddCellIndex * npme + npme / 2) / npp;
663 }
664
dd_interleaved_pme_ranks(const DDRankSetup & ddRankSetup)665 static std::vector<int> dd_interleaved_pme_ranks(const DDRankSetup& ddRankSetup)
666 {
667 std::vector<int> pmeRanks(ddRankSetup.numRanksDoingPme);
668
669 int n = 0;
670 for (int i = 0; i < ddRankSetup.numPPRanks; i++)
671 {
672 const int p0 = ddindex2pmeindex(ddRankSetup, i);
673 const int p1 = ddindex2pmeindex(ddRankSetup, i + 1);
674 if (i + 1 == ddRankSetup.numPPRanks || p1 > p0)
675 {
676 if (debug)
677 {
678 fprintf(debug, "pme_rank[%d] = %d\n", n, i + 1 + n);
679 }
680 pmeRanks[n] = i + 1 + n;
681 n++;
682 }
683 }
684
685 return pmeRanks;
686 }
687
gmx_ddcoord2pmeindex(const t_commrec * cr,int x,int y,int z)688 static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
689 {
690 gmx_domdec_t* dd;
691 ivec coords;
692 int slab;
693
694 dd = cr->dd;
695 coords[XX] = x;
696 coords[YY] = y;
697 coords[ZZ] = z;
698 slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
699
700 return slab;
701 }
702
ddcoord2simnodeid(const t_commrec * cr,int x,int y,int z)703 static int ddcoord2simnodeid(const t_commrec* cr, int x, int y, int z)
704 {
705 const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
706 ivec coords = { x, y, z };
707 int nodeid = -1;
708
709 if (cartSetup.bCartesianPP_PME)
710 {
711 #if GMX_MPI
712 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
713 #endif
714 }
715 else
716 {
717 int ddindex = dd_index(cr->dd->numCells, coords);
718 if (cartSetup.bCartesianPP)
719 {
720 nodeid = cartSetup.ddindex2simnodeid[ddindex];
721 }
722 else
723 {
724 const DDRankSetup& rankSetup = cr->dd->comm->ddRankSetup;
725 if (rankSetup.rankOrder != DdRankOrder::pp_pme && rankSetup.usePmeOnlyRanks)
726 {
727 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
728 }
729 else
730 {
731 nodeid = ddindex;
732 }
733 }
734 }
735
736 return nodeid;
737 }
738
dd_simnode2pmenode(const DDRankSetup & ddRankSetup,const CartesianRankSetup & cartSetup,gmx::ArrayRef<const int> pmeRanks,const t_commrec gmx_unused * cr,const int sim_nodeid)739 static int dd_simnode2pmenode(const DDRankSetup& ddRankSetup,
740 const CartesianRankSetup& cartSetup,
741 gmx::ArrayRef<const int> pmeRanks,
742 const t_commrec gmx_unused* cr,
743 const int sim_nodeid)
744 {
745 int pmenode = -1;
746
747 /* This assumes a uniform x domain decomposition grid cell size */
748 if (cartSetup.bCartesianPP_PME)
749 {
750 #if GMX_MPI
751 ivec coord, coord_pme;
752 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
753 if (coord[cartSetup.cartpmedim] < ddRankSetup.numPPCells[cartSetup.cartpmedim])
754 {
755 /* This is a PP rank */
756 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
757 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
758 }
759 #endif
760 }
761 else if (cartSetup.bCartesianPP)
762 {
763 if (sim_nodeid < ddRankSetup.numPPRanks)
764 {
765 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
766 }
767 }
768 else
769 {
770 /* This assumes DD cells with identical x coordinates
771 * are numbered sequentially.
772 */
773 if (pmeRanks.empty())
774 {
775 if (sim_nodeid < ddRankSetup.numPPRanks)
776 {
777 /* The DD index equals the nodeid */
778 pmenode = ddRankSetup.numPPRanks + ddindex2pmeindex(ddRankSetup, sim_nodeid);
779 }
780 }
781 else
782 {
783 int i = 0;
784 while (sim_nodeid > pmeRanks[i])
785 {
786 i++;
787 }
788 if (sim_nodeid < pmeRanks[i])
789 {
790 pmenode = pmeRanks[i];
791 }
792 }
793 }
794
795 return pmenode;
796 }
797
getNumPmeDomains(const gmx_domdec_t * dd)798 NumPmeDomains getNumPmeDomains(const gmx_domdec_t* dd)
799 {
800 if (dd != nullptr)
801 {
802 return { dd->comm->ddRankSetup.npmenodes_x, dd->comm->ddRankSetup.npmenodes_y };
803 }
804 else
805 {
806 return { 1, 1 };
807 }
808 }
809
get_pme_ddranks(const t_commrec * cr,const int pmenodeid)810 std::vector<int> get_pme_ddranks(const t_commrec* cr, const int pmenodeid)
811 {
812 const DDRankSetup& ddRankSetup = cr->dd->comm->ddRankSetup;
813 const CartesianRankSetup& cartSetup = cr->dd->comm->cartesianRankSetup;
814 GMX_RELEASE_ASSERT(ddRankSetup.usePmeOnlyRanks,
815 "This function should only be called when PME-only ranks are in use");
816 std::vector<int> ddranks;
817 ddranks.reserve((ddRankSetup.numPPRanks + ddRankSetup.numRanksDoingPme - 1) / ddRankSetup.numRanksDoingPme);
818
819 for (int x = 0; x < ddRankSetup.numPPCells[XX]; x++)
820 {
821 for (int y = 0; y < ddRankSetup.numPPCells[YY]; y++)
822 {
823 for (int z = 0; z < ddRankSetup.numPPCells[ZZ]; z++)
824 {
825 if (cartSetup.bCartesianPP_PME)
826 {
827 ivec coord = { x, y, z };
828 ivec coord_pme;
829 dd_cart_coord2pmecoord(ddRankSetup, cartSetup, coord, coord_pme);
830 if (cr->dd->ci[XX] == coord_pme[XX] && cr->dd->ci[YY] == coord_pme[YY]
831 && cr->dd->ci[ZZ] == coord_pme[ZZ])
832 {
833 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
834 }
835 }
836 else
837 {
838 /* The slab corresponds to the nodeid in the PME group */
839 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
840 {
841 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
842 }
843 }
844 }
845 }
846 }
847 return ddranks;
848 }
849
receive_vir_ener(const gmx_domdec_t * dd,gmx::ArrayRef<const int> pmeRanks,const t_commrec * cr)850 static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
851 {
852 gmx_bool bReceive = TRUE;
853
854 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
855 if (ddRankSetup.usePmeOnlyRanks)
856 {
857 const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
858 if (cartSetup.bCartesianPP_PME)
859 {
860 #if GMX_MPI
861 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
862 ivec coords;
863 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
864 coords[cartSetup.cartpmedim]++;
865 if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
866 {
867 int rank;
868 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
869 if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
870 {
871 /* This is not the last PP node for pmenode */
872 bReceive = FALSE;
873 }
874 }
875 #else
876 GMX_RELEASE_ASSERT(
877 false,
878 "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
879 #endif
880 }
881 else
882 {
883 int pmenode = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
884 if (cr->sim_nodeid + 1 < cr->nnodes
885 && dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid + 1) == pmenode)
886 {
887 /* This is not the last PP node for pmenode */
888 bReceive = FALSE;
889 }
890 }
891 }
892
893 return bReceive;
894 }
895
set_slb_pme_dim_f(gmx_domdec_t * dd,int dim,real ** dim_f)896 static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
897 {
898 gmx_domdec_comm_t* comm;
899 int i;
900
901 comm = dd->comm;
902
903 snew(*dim_f, dd->numCells[dim] + 1);
904 (*dim_f)[0] = 0;
905 for (i = 1; i < dd->numCells[dim]; i++)
906 {
907 if (comm->slb_frac[dim])
908 {
909 (*dim_f)[i] = (*dim_f)[i - 1] + comm->slb_frac[dim][i - 1];
910 }
911 else
912 {
913 (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
914 }
915 }
916 (*dim_f)[dd->numCells[dim]] = 1;
917 }
918
init_ddpme(gmx_domdec_t * dd,gmx_ddpme_t * ddpme,int dimind)919 static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
920 {
921 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
922
923 if (dimind == 0 && dd->dim[0] == YY && ddRankSetup.npmenodes_x == 1)
924 {
925 ddpme->dim = YY;
926 }
927 else
928 {
929 ddpme->dim = dimind;
930 }
931 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
932
933 ddpme->nslab = (ddpme->dim == 0 ? ddRankSetup.npmenodes_x : ddRankSetup.npmenodes_y);
934
935 if (ddpme->nslab <= 1)
936 {
937 return;
938 }
939
940 const int nso = ddRankSetup.numRanksDoingPme / ddpme->nslab;
941 /* Determine for each PME slab the PP location range for dimension dim */
942 snew(ddpme->pp_min, ddpme->nslab);
943 snew(ddpme->pp_max, ddpme->nslab);
944 for (int slab = 0; slab < ddpme->nslab; slab++)
945 {
946 ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
947 ddpme->pp_max[slab] = 0;
948 }
949 for (int i = 0; i < dd->nnodes; i++)
950 {
951 ivec xyz;
952 ddindex2xyz(dd->numCells, i, xyz);
953 /* For y only use our y/z slab.
954 * This assumes that the PME x grid size matches the DD grid size.
955 */
956 if (dimind == 0 || xyz[XX] == dd->ci[XX])
957 {
958 const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
959 int slab;
960 if (dimind == 0)
961 {
962 slab = pmeindex / nso;
963 }
964 else
965 {
966 slab = pmeindex % ddpme->nslab;
967 }
968 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
969 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
970 }
971 }
972
973 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
974 }
975
dd_pme_maxshift_x(const gmx_domdec_t * dd)976 int dd_pme_maxshift_x(const gmx_domdec_t* dd)
977 {
978 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
979
980 if (ddRankSetup.ddpme[0].dim == XX)
981 {
982 return ddRankSetup.ddpme[0].maxshift;
983 }
984 else
985 {
986 return 0;
987 }
988 }
989
dd_pme_maxshift_y(const gmx_domdec_t * dd)990 int dd_pme_maxshift_y(const gmx_domdec_t* dd)
991 {
992 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
993
994 if (ddRankSetup.ddpme[0].dim == YY)
995 {
996 return ddRankSetup.ddpme[0].maxshift;
997 }
998 else if (ddRankSetup.npmedecompdim >= 2 && ddRankSetup.ddpme[1].dim == YY)
999 {
1000 return ddRankSetup.ddpme[1].maxshift;
1001 }
1002 else
1003 {
1004 return 0;
1005 }
1006 }
1007
ddHaveSplitConstraints(const gmx_domdec_t & dd)1008 bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
1009 {
1010 return dd.comm->systemInfo.haveSplitConstraints;
1011 }
1012
ddUsesUpdateGroups(const gmx_domdec_t & dd)1013 bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
1014 {
1015 return dd.comm->systemInfo.useUpdateGroups;
1016 }
1017
dd_cycles_add(const gmx_domdec_t * dd,float cycles,int ddCycl)1018 void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
1019 {
1020 /* Note that the cycles value can be incorrect, either 0 or some
1021 * extremely large value, when our thread migrated to another core
1022 * with an unsynchronized cycle counter. If this happens less often
1023 * that once per nstlist steps, this will not cause issues, since
1024 * we later subtract the maximum value from the sum over nstlist steps.
1025 * A zero count will slightly lower the total, but that's a small effect.
1026 * Note that the main purpose of the subtraction of the maximum value
1027 * is to avoid throwing off the load balancing when stalls occur due
1028 * e.g. system activity or network congestion.
1029 */
1030 dd->comm->cycl[ddCycl] += cycles;
1031 dd->comm->cycl_n[ddCycl]++;
1032 if (cycles > dd->comm->cycl_max[ddCycl])
1033 {
1034 dd->comm->cycl_max[ddCycl] = cycles;
1035 }
1036 }
1037
1038 #if GMX_MPI
make_load_communicator(gmx_domdec_t * dd,int dim_ind,ivec loc)1039 static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
1040 {
1041 MPI_Comm c_row;
1042 int dim, i, rank;
1043 ivec loc_c;
1044 gmx_bool bPartOfGroup = FALSE;
1045
1046 dim = dd->dim[dim_ind];
1047 copy_ivec(loc, loc_c);
1048 for (i = 0; i < dd->numCells[dim]; i++)
1049 {
1050 loc_c[dim] = i;
1051 rank = dd_index(dd->numCells, loc_c);
1052 if (rank == dd->rank)
1053 {
1054 /* This process is part of the group */
1055 bPartOfGroup = TRUE;
1056 }
1057 }
1058 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank, &c_row);
1059 if (bPartOfGroup)
1060 {
1061 dd->comm->mpi_comm_load[dim_ind] = c_row;
1062 if (!isDlbDisabled(dd->comm))
1063 {
1064 DDCellsizesWithDlb& cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
1065
1066 if (dd->ci[dim] == dd->master_ci[dim])
1067 {
1068 /* This is the root process of this row */
1069 cellsizes.rowMaster = std::make_unique<RowMaster>();
1070
1071 RowMaster& rowMaster = *cellsizes.rowMaster;
1072 rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
1073 rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
1074 rowMaster.isCellMin.resize(dd->numCells[dim]);
1075 if (dim_ind > 0)
1076 {
1077 rowMaster.bounds.resize(dd->numCells[dim]);
1078 }
1079 rowMaster.buf_ncd.resize(dd->numCells[dim]);
1080 }
1081 else
1082 {
1083 /* This is not a root process, we only need to receive cell_f */
1084 cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
1085 }
1086 }
1087 if (dd->ci[dim] == dd->master_ci[dim])
1088 {
1089 snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
1090 }
1091 }
1092 }
1093 #endif
1094
dd_setup_dlb_resource_sharing(const t_commrec * cr,int gpu_id)1095 void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
1096 {
1097 #if GMX_MPI
1098 int physicalnode_id_hash;
1099 gmx_domdec_t* dd;
1100 MPI_Comm mpi_comm_pp_physicalnode;
1101
1102 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
1103 {
1104 /* Only ranks with short-ranged tasks (currently) use GPUs.
1105 * If we don't have GPUs assigned, there are no resources to share.
1106 */
1107 return;
1108 }
1109
1110 physicalnode_id_hash = gmx_physicalnode_id_hash();
1111
1112 dd = cr->dd;
1113
1114 if (debug)
1115 {
1116 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
1117 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
1118 physicalnode_id_hash, gpu_id);
1119 }
1120 /* Split the PP communicator over the physical nodes */
1121 /* TODO: See if we should store this (before), as it's also used for
1122 * for the nodecomm summation.
1123 */
1124 // TODO PhysicalNodeCommunicator could be extended/used to handle
1125 // the need for per-node per-group communicators.
1126 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
1127 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
1128 MPI_Comm_free(&mpi_comm_pp_physicalnode);
1129 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
1130
1131 if (debug)
1132 {
1133 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
1134 }
1135
1136 /* Note that some ranks could share a GPU, while others don't */
1137
1138 if (dd->comm->nrank_gpu_shared == 1)
1139 {
1140 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
1141 }
1142 #else
1143 GMX_UNUSED_VALUE(cr);
1144 GMX_UNUSED_VALUE(gpu_id);
1145 #endif
1146 }
1147
make_load_communicators(gmx_domdec_t gmx_unused * dd)1148 static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
1149 {
1150 #if GMX_MPI
1151 int dim0, dim1, i, j;
1152 ivec loc;
1153
1154 if (debug)
1155 {
1156 fprintf(debug, "Making load communicators\n");
1157 }
1158
1159 dd->comm->load = new domdec_load_t[std::max(dd->ndim, 1)];
1160 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
1161
1162 if (dd->ndim == 0)
1163 {
1164 return;
1165 }
1166
1167 clear_ivec(loc);
1168 make_load_communicator(dd, 0, loc);
1169 if (dd->ndim > 1)
1170 {
1171 dim0 = dd->dim[0];
1172 for (i = 0; i < dd->numCells[dim0]; i++)
1173 {
1174 loc[dim0] = i;
1175 make_load_communicator(dd, 1, loc);
1176 }
1177 }
1178 if (dd->ndim > 2)
1179 {
1180 dim0 = dd->dim[0];
1181 for (i = 0; i < dd->numCells[dim0]; i++)
1182 {
1183 loc[dim0] = i;
1184 dim1 = dd->dim[1];
1185 for (j = 0; j < dd->numCells[dim1]; j++)
1186 {
1187 loc[dim1] = j;
1188 make_load_communicator(dd, 2, loc);
1189 }
1190 }
1191 }
1192
1193 if (debug)
1194 {
1195 fprintf(debug, "Finished making load communicators\n");
1196 }
1197 #endif
1198 }
1199
1200 /*! \brief Sets up the relation between neighboring domains and zones */
setup_neighbor_relations(gmx_domdec_t * dd)1201 static void setup_neighbor_relations(gmx_domdec_t* dd)
1202 {
1203 int d, dim, m;
1204 ivec tmp, s;
1205 gmx_domdec_zones_t* zones;
1206 GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
1207
1208 for (d = 0; d < dd->ndim; d++)
1209 {
1210 dim = dd->dim[d];
1211 copy_ivec(dd->ci, tmp);
1212 tmp[dim] = (tmp[dim] + 1) % dd->numCells[dim];
1213 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
1214 copy_ivec(dd->ci, tmp);
1215 tmp[dim] = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
1216 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
1217 if (debug)
1218 {
1219 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
1220 dd->neighbor[d][0], dd->neighbor[d][1]);
1221 }
1222 }
1223
1224 int nzone = (1 << dd->ndim);
1225 int nizone = (1 << std::max(dd->ndim - 1, 0));
1226 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
1227
1228 zones = &dd->comm->zones;
1229
1230 for (int i = 0; i < nzone; i++)
1231 {
1232 m = 0;
1233 clear_ivec(zones->shift[i]);
1234 for (d = 0; d < dd->ndim; d++)
1235 {
1236 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
1237 }
1238 }
1239
1240 zones->n = nzone;
1241 for (int i = 0; i < nzone; i++)
1242 {
1243 for (d = 0; d < DIM; d++)
1244 {
1245 s[d] = dd->ci[d] - zones->shift[i][d];
1246 if (s[d] < 0)
1247 {
1248 s[d] += dd->numCells[d];
1249 }
1250 else if (s[d] >= dd->numCells[d])
1251 {
1252 s[d] -= dd->numCells[d];
1253 }
1254 }
1255 }
1256 for (int iZoneIndex = 0; iZoneIndex < nizone; iZoneIndex++)
1257 {
1258 GMX_RELEASE_ASSERT(
1259 ddNonbondedZonePairRanges[iZoneIndex][0] == iZoneIndex,
1260 "The first element for each ddNonbondedZonePairRanges should match its index");
1261
1262 DDPairInteractionRanges iZone;
1263 iZone.iZoneIndex = iZoneIndex;
1264 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
1265 * j-zones up to nzone.
1266 */
1267 iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
1268 std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
1269 for (dim = 0; dim < DIM; dim++)
1270 {
1271 if (dd->numCells[dim] == 1)
1272 {
1273 /* All shifts should be allowed */
1274 iZone.shift0[dim] = -1;
1275 iZone.shift1[dim] = 1;
1276 }
1277 else
1278 {
1279 /* Determine the min/max j-zone shift wrt the i-zone */
1280 iZone.shift0[dim] = 1;
1281 iZone.shift1[dim] = -1;
1282 for (int jZone : iZone.jZoneRange)
1283 {
1284 int shift_diff = zones->shift[jZone][dim] - zones->shift[iZoneIndex][dim];
1285 if (shift_diff < iZone.shift0[dim])
1286 {
1287 iZone.shift0[dim] = shift_diff;
1288 }
1289 if (shift_diff > iZone.shift1[dim])
1290 {
1291 iZone.shift1[dim] = shift_diff;
1292 }
1293 }
1294 }
1295 }
1296
1297 zones->iZones.push_back(iZone);
1298 }
1299
1300 if (!isDlbDisabled(dd->comm))
1301 {
1302 dd->comm->cellsizesWithDlb.resize(dd->ndim);
1303 }
1304
1305 if (dd->comm->ddSettings.recordLoad)
1306 {
1307 make_load_communicators(dd);
1308 }
1309 }
1310
make_pp_communicator(const gmx::MDLogger & mdlog,gmx_domdec_t * dd,t_commrec gmx_unused * cr,bool gmx_unused reorder)1311 static void make_pp_communicator(const gmx::MDLogger& mdlog,
1312 gmx_domdec_t* dd,
1313 t_commrec gmx_unused* cr,
1314 bool gmx_unused reorder)
1315 {
1316 #if GMX_MPI
1317 gmx_domdec_comm_t* comm = dd->comm;
1318 CartesianRankSetup& cartSetup = comm->cartesianRankSetup;
1319
1320 if (cartSetup.bCartesianPP)
1321 {
1322 /* Set up cartesian communication for the particle-particle part */
1323 GMX_LOG(mdlog.info)
1324 .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
1325 dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
1326
1327 ivec periods;
1328 for (int i = 0; i < DIM; i++)
1329 {
1330 periods[i] = TRUE;
1331 }
1332 MPI_Comm comm_cart;
1333 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
1334 &comm_cart);
1335 /* We overwrite the old communicator with the new cartesian one */
1336 cr->mpi_comm_mygroup = comm_cart;
1337 }
1338
1339 dd->mpi_comm_all = cr->mpi_comm_mygroup;
1340 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
1341
1342 if (cartSetup.bCartesianPP_PME)
1343 {
1344 /* Since we want to use the original cartesian setup for sim,
1345 * and not the one after split, we need to make an index.
1346 */
1347 cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
1348 cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
1349 gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
1350 /* Get the rank of the DD master,
1351 * above we made sure that the master node is a PP node.
1352 */
1353 int rank;
1354 if (MASTER(cr))
1355 {
1356 rank = dd->rank;
1357 }
1358 else
1359 {
1360 rank = 0;
1361 }
1362 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
1363 }
1364 else if (cartSetup.bCartesianPP)
1365 {
1366 if (!comm->ddRankSetup.usePmeOnlyRanks)
1367 {
1368 /* The PP communicator is also
1369 * the communicator for this simulation
1370 */
1371 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
1372 }
1373 cr->nodeid = dd->rank;
1374
1375 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
1376
1377 /* We need to make an index to go from the coordinates
1378 * to the nodeid of this simulation.
1379 */
1380 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1381 std::vector<int> buf(dd->nnodes);
1382 if (thisRankHasDuty(cr, DUTY_PP))
1383 {
1384 buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1385 }
1386 /* Communicate the ddindex to simulation nodeid index */
1387 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1388 cr->mpi_comm_mysim);
1389
1390 /* Determine the master coordinates and rank.
1391 * The DD master should be the same node as the master of this sim.
1392 */
1393 for (int i = 0; i < dd->nnodes; i++)
1394 {
1395 if (cartSetup.ddindex2simnodeid[i] == 0)
1396 {
1397 ddindex2xyz(dd->numCells, i, dd->master_ci);
1398 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
1399 }
1400 }
1401 if (debug)
1402 {
1403 fprintf(debug, "The master rank is %d\n", dd->masterrank);
1404 }
1405 }
1406 else
1407 {
1408 /* No Cartesian communicators */
1409 /* We use the rank in dd->comm->all as DD index */
1410 ddindex2xyz(dd->numCells, dd->rank, dd->ci);
1411 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
1412 dd->masterrank = 0;
1413 clear_ivec(dd->master_ci);
1414 }
1415 #endif
1416
1417 GMX_LOG(mdlog.info)
1418 .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
1419 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1420 if (debug)
1421 {
1422 fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
1423 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1424 }
1425 }
1426
receive_ddindex2simnodeid(gmx_domdec_t * dd,t_commrec * cr)1427 static void receive_ddindex2simnodeid(gmx_domdec_t* dd, t_commrec* cr)
1428 {
1429 #if GMX_MPI
1430 CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1431
1432 if (!cartSetup.bCartesianPP_PME && cartSetup.bCartesianPP)
1433 {
1434 cartSetup.ddindex2simnodeid.resize(dd->nnodes);
1435 std::vector<int> buf(dd->nnodes);
1436 if (thisRankHasDuty(cr, DUTY_PP))
1437 {
1438 buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
1439 }
1440 /* Communicate the ddindex to simulation nodeid index */
1441 MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
1442 cr->mpi_comm_mysim);
1443 }
1444 #else
1445 GMX_UNUSED_VALUE(dd);
1446 GMX_UNUSED_VALUE(cr);
1447 #endif
1448 }
1449
split_communicator(const gmx::MDLogger & mdlog,t_commrec * cr,bool gmx_unused reorder,const DDRankSetup & ddRankSetup,ivec ddCellIndex,std::vector<int> * pmeRanks)1450 static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
1451 t_commrec* cr,
1452 bool gmx_unused reorder,
1453 const DDRankSetup& ddRankSetup,
1454 ivec ddCellIndex,
1455 std::vector<int>* pmeRanks)
1456 {
1457 CartesianRankSetup cartSetup;
1458
1459 const DdRankOrder ddRankOrder = ddRankSetup.rankOrder;
1460
1461 cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
1462 cartSetup.bCartesianPP_PME = false;
1463
1464 const ivec& numDDCells = ddRankSetup.numPPCells;
1465 /* Initially we set ntot to the number of PP cells */
1466 copy_ivec(numDDCells, cartSetup.ntot);
1467
1468 if (cartSetup.bCartesianPP)
1469 {
1470 const int numDDCellsTot = ddRankSetup.numPPRanks;
1471 bool bDiv[DIM];
1472 for (int i = 1; i < DIM; i++)
1473 {
1474 bDiv[i] = ((ddRankSetup.numRanksDoingPme * numDDCells[i]) % numDDCellsTot == 0);
1475 }
1476 if (bDiv[YY] || bDiv[ZZ])
1477 {
1478 cartSetup.bCartesianPP_PME = TRUE;
1479 /* If we have 2D PME decomposition, which is always in x+y,
1480 * we stack the PME only nodes in z.
1481 * Otherwise we choose the direction that provides the thinnest slab
1482 * of PME only nodes as this will have the least effect
1483 * on the PP communication.
1484 * But for the PME communication the opposite might be better.
1485 */
1486 if (bDiv[ZZ] && (ddRankSetup.npmenodes_y > 1 || !bDiv[YY] || numDDCells[YY] > numDDCells[ZZ]))
1487 {
1488 cartSetup.cartpmedim = ZZ;
1489 }
1490 else
1491 {
1492 cartSetup.cartpmedim = YY;
1493 }
1494 cartSetup.ntot[cartSetup.cartpmedim] +=
1495 (ddRankSetup.numRanksDoingPme * numDDCells[cartSetup.cartpmedim]) / numDDCellsTot;
1496 }
1497 else
1498 {
1499 GMX_LOG(mdlog.info)
1500 .appendTextFormatted(
1501 "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
1502 "nx*nz (%d*%d)",
1503 ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
1504 numDDCells[XX], numDDCells[ZZ]);
1505 GMX_LOG(mdlog.info)
1506 .appendText("Will not use a Cartesian communicator for PP <-> PME\n");
1507 }
1508 }
1509
1510 if (cartSetup.bCartesianPP_PME)
1511 {
1512 #if GMX_MPI
1513 int rank;
1514 ivec periods;
1515
1516 GMX_LOG(mdlog.info)
1517 .appendTextFormatted(
1518 "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
1519 cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
1520
1521 for (int i = 0; i < DIM; i++)
1522 {
1523 periods[i] = TRUE;
1524 }
1525 MPI_Comm comm_cart;
1526 MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
1527 &comm_cart);
1528 MPI_Comm_rank(comm_cart, &rank);
1529 if (MASTER(cr) && rank != 0)
1530 {
1531 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
1532 }
1533
1534 /* With this assigment we loose the link to the original communicator
1535 * which will usually be MPI_COMM_WORLD, unless have multisim.
1536 */
1537 cr->mpi_comm_mysim = comm_cart;
1538 cr->sim_nodeid = rank;
1539
1540 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
1541
1542 GMX_LOG(mdlog.info)
1543 .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
1544 ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
1545
1546 if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
1547 {
1548 cr->duty = DUTY_PP;
1549 }
1550 if (!ddRankSetup.usePmeOnlyRanks
1551 || ddCellIndex[cartSetup.cartpmedim] >= numDDCells[cartSetup.cartpmedim])
1552 {
1553 cr->duty = DUTY_PME;
1554 }
1555
1556 /* Split the sim communicator into PP and PME only nodes */
1557 MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
1558 dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
1559 #else
1560 GMX_UNUSED_VALUE(ddCellIndex);
1561 #endif
1562 }
1563 else
1564 {
1565 switch (ddRankOrder)
1566 {
1567 case DdRankOrder::pp_pme:
1568 GMX_LOG(mdlog.info).appendText("Order of the ranks: PP first, PME last");
1569 break;
1570 case DdRankOrder::interleave:
1571 /* Interleave the PP-only and PME-only ranks */
1572 GMX_LOG(mdlog.info).appendText("Interleaving PP and PME ranks");
1573 *pmeRanks = dd_interleaved_pme_ranks(ddRankSetup);
1574 break;
1575 case DdRankOrder::cartesian: break;
1576 default: gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(ddRankOrder));
1577 }
1578
1579 if (dd_simnode2pmenode(ddRankSetup, cartSetup, *pmeRanks, cr, cr->sim_nodeid) == -1)
1580 {
1581 cr->duty = DUTY_PME;
1582 }
1583 else
1584 {
1585 cr->duty = DUTY_PP;
1586 }
1587 #if GMX_MPI
1588 /* Split the sim communicator into PP and PME only nodes */
1589 MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
1590 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
1591 #endif
1592 }
1593
1594 GMX_LOG(mdlog.info)
1595 .appendTextFormatted("This rank does only %s work.\n",
1596 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
1597
1598 return cartSetup;
1599 }
1600
1601 /*! \brief Makes the PP communicator and the PME communicator, when needed
1602 *
1603 * Returns the Cartesian rank setup.
1604 * Sets \p cr->mpi_comm_mygroup
1605 * For PP ranks, sets the DD PP cell index in \p ddCellIndex.
1606 * With separate PME ranks in interleaved order, set the PME ranks in \p pmeRanks.
1607 */
makeGroupCommunicators(const gmx::MDLogger & mdlog,const DDSettings & ddSettings,const DDRankSetup & ddRankSetup,t_commrec * cr,ivec ddCellIndex,std::vector<int> * pmeRanks)1608 static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
1609 const DDSettings& ddSettings,
1610 const DDRankSetup& ddRankSetup,
1611 t_commrec* cr,
1612 ivec ddCellIndex,
1613 std::vector<int>* pmeRanks)
1614 {
1615 CartesianRankSetup cartSetup;
1616
1617 // As a default, both group and sim communicators are equal to the default communicator
1618 cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
1619 cr->mpi_comm_mysim = cr->mpiDefaultCommunicator;
1620 cr->nnodes = cr->sizeOfDefaultCommunicator;
1621 cr->nodeid = cr->rankInDefaultCommunicator;
1622 cr->sim_nodeid = cr->rankInDefaultCommunicator;
1623
1624 if (ddRankSetup.usePmeOnlyRanks)
1625 {
1626 /* Split the communicator into a PP and PME part */
1627 cartSetup = split_communicator(mdlog, cr, ddSettings.useCartesianReorder, ddRankSetup,
1628 ddCellIndex, pmeRanks);
1629 }
1630 else
1631 {
1632 /* All nodes do PP and PME */
1633 /* We do not require separate communicators */
1634 cartSetup.bCartesianPP = false;
1635 cartSetup.bCartesianPP_PME = false;
1636 }
1637
1638 return cartSetup;
1639 }
1640
1641 /*! \brief For PP ranks, sets or makes the communicator
1642 *
1643 * For PME ranks get the rank id.
1644 * For PP only ranks, sets the PME-only rank.
1645 */
setupGroupCommunication(const gmx::MDLogger & mdlog,const DDSettings & ddSettings,gmx::ArrayRef<const int> pmeRanks,t_commrec * cr,const int numAtomsInSystem,gmx_domdec_t * dd)1646 static void setupGroupCommunication(const gmx::MDLogger& mdlog,
1647 const DDSettings& ddSettings,
1648 gmx::ArrayRef<const int> pmeRanks,
1649 t_commrec* cr,
1650 const int numAtomsInSystem,
1651 gmx_domdec_t* dd)
1652 {
1653 const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
1654 const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
1655
1656 if (thisRankHasDuty(cr, DUTY_PP))
1657 {
1658 /* Copy or make a new PP communicator */
1659
1660 /* We (possibly) reordered the nodes in split_communicator,
1661 * so it is no longer required in make_pp_communicator.
1662 */
1663 const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
1664
1665 make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
1666 }
1667 else
1668 {
1669 receive_ddindex2simnodeid(dd, cr);
1670 }
1671
1672 if (!thisRankHasDuty(cr, DUTY_PME))
1673 {
1674 /* Set up the commnuication to our PME node */
1675 dd->pme_nodeid = dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, cr->sim_nodeid);
1676 dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
1677 if (debug)
1678 {
1679 fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
1680 gmx::boolToString(dd->pme_receive_vir_ener));
1681 }
1682 }
1683 else
1684 {
1685 dd->pme_nodeid = -1;
1686 }
1687
1688 /* We can not use DDMASTER(dd), because dd->masterrank is set later */
1689 if (MASTER(cr))
1690 {
1691 dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
1692 }
1693 }
1694
get_slb_frac(const gmx::MDLogger & mdlog,const char * dir,int nc,const char * size_string)1695 static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
1696 {
1697 real * slb_frac, tot;
1698 int i, n;
1699 double dbl;
1700
1701 slb_frac = nullptr;
1702 if (nc > 1 && size_string != nullptr)
1703 {
1704 GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
1705 snew(slb_frac, nc);
1706 tot = 0;
1707 for (i = 0; i < nc; i++)
1708 {
1709 dbl = 0;
1710 sscanf(size_string, "%20lf%n", &dbl, &n);
1711 if (dbl == 0)
1712 {
1713 gmx_fatal(FARGS,
1714 "Incorrect or not enough DD cell size entries for direction %s: '%s'",
1715 dir, size_string);
1716 }
1717 slb_frac[i] = dbl;
1718 size_string += n;
1719 tot += slb_frac[i];
1720 }
1721 /* Normalize */
1722 std::string relativeCellSizes = "Relative cell sizes:";
1723 for (i = 0; i < nc; i++)
1724 {
1725 slb_frac[i] /= tot;
1726 relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
1727 }
1728 GMX_LOG(mdlog.info).appendText(relativeCellSizes);
1729 }
1730
1731 return slb_frac;
1732 }
1733
multi_body_bondeds_count(const gmx_mtop_t * mtop)1734 static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
1735 {
1736 int n = 0;
1737 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
1738 int nmol;
1739 while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
1740 {
1741 for (auto& ilist : extractILists(*ilists, IF_BOND))
1742 {
1743 if (NRAL(ilist.functionType) > 2)
1744 {
1745 n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
1746 }
1747 }
1748 }
1749
1750 return n;
1751 }
1752
dd_getenv(const gmx::MDLogger & mdlog,const char * env_var,int def)1753 static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
1754 {
1755 char* val;
1756 int nst;
1757
1758 nst = def;
1759 val = getenv(env_var);
1760 if (val)
1761 {
1762 if (sscanf(val, "%20d", &nst) <= 0)
1763 {
1764 nst = 1;
1765 }
1766 GMX_LOG(mdlog.info).appendTextFormatted("Found env.var. %s = %s, using value %d", env_var, val, nst);
1767 }
1768
1769 return nst;
1770 }
1771
check_dd_restrictions(const gmx_domdec_t * dd,const t_inputrec * ir,const gmx::MDLogger & mdlog)1772 static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
1773 {
1774 if (ir->pbcType == PbcType::Screw
1775 && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
1776 {
1777 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
1778 c_pbcTypeNames[ir->pbcType].c_str());
1779 }
1780
1781 if (ir->nstlist == 0)
1782 {
1783 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
1784 }
1785
1786 if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
1787 {
1788 GMX_LOG(mdlog.warning)
1789 .appendText(
1790 "comm-mode angular will give incorrect results when the comm group "
1791 "partially crosses a periodic boundary");
1792 }
1793 }
1794
average_cellsize_min(const gmx_ddbox_t & ddbox,const ivec numDomains)1795 static real average_cellsize_min(const gmx_ddbox_t& ddbox, const ivec numDomains)
1796 {
1797 real r = ddbox.box_size[XX];
1798 for (int d = 0; d < DIM; d++)
1799 {
1800 if (numDomains[d] > 1)
1801 {
1802 /* Check using the initial average cell size */
1803 r = std::min(r, ddbox.box_size[d] * ddbox.skew_fac[d] / numDomains[d]);
1804 }
1805 }
1806
1807 return r;
1808 }
1809
1810 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
1811 */
forceDlbOffOrBail(DlbState cmdlineDlbState,const std::string & reasonStr,const gmx::MDLogger & mdlog)1812 static DlbState forceDlbOffOrBail(DlbState cmdlineDlbState,
1813 const std::string& reasonStr,
1814 const gmx::MDLogger& mdlog)
1815 {
1816 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
1817 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
1818
1819 if (cmdlineDlbState == DlbState::onUser)
1820 {
1821 gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
1822 }
1823 else if (cmdlineDlbState == DlbState::offCanTurnOn)
1824 {
1825 GMX_LOG(mdlog.info).appendText(dlbDisableNote + reasonStr);
1826 }
1827 return DlbState::offForever;
1828 }
1829
1830 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
1831 *
1832 * This function parses the parameters of "-dlb" command line option setting
1833 * corresponding state values. Then it checks the consistency of the determined
1834 * state with other run parameters and settings. As a result, the initial state
1835 * may be altered or an error may be thrown if incompatibility of options is detected.
1836 *
1837 * \param [in] mdlog Logger.
1838 * \param [in] dlbOption Enum value for the DLB option.
1839 * \param [in] bRecordLoad True if the load balancer is recording load information.
1840 * \param [in] mdrunOptions Options for mdrun.
1841 * \param [in] ir Pointer mdrun to input parameters.
1842 * \returns DLB initial/startup state.
1843 */
determineInitialDlbState(const gmx::MDLogger & mdlog,DlbOption dlbOption,gmx_bool bRecordLoad,const gmx::MdrunOptions & mdrunOptions,const t_inputrec * ir)1844 static DlbState determineInitialDlbState(const gmx::MDLogger& mdlog,
1845 DlbOption dlbOption,
1846 gmx_bool bRecordLoad,
1847 const gmx::MdrunOptions& mdrunOptions,
1848 const t_inputrec* ir)
1849 {
1850 DlbState dlbState = DlbState::offCanTurnOn;
1851
1852 switch (dlbOption)
1853 {
1854 case DlbOption::turnOnWhenUseful: dlbState = DlbState::offCanTurnOn; break;
1855 case DlbOption::no: dlbState = DlbState::offUser; break;
1856 case DlbOption::yes: dlbState = DlbState::onUser; break;
1857 default: gmx_incons("Invalid dlbOption enum value");
1858 }
1859
1860 /* Reruns don't support DLB: bail or override auto mode */
1861 if (mdrunOptions.rerun)
1862 {
1863 std::string reasonStr = "it is not supported in reruns.";
1864 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1865 }
1866
1867 /* Unsupported integrators */
1868 if (!EI_DYNAMICS(ir->eI))
1869 {
1870 auto reasonStr = gmx::formatString(
1871 "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
1872 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1873 }
1874
1875 /* Without cycle counters we can't time work to balance on */
1876 if (!bRecordLoad)
1877 {
1878 std::string reasonStr =
1879 "cycle counters unsupported or not enabled in the operating system kernel.";
1880 return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1881 }
1882
1883 if (mdrunOptions.reproducible)
1884 {
1885 std::string reasonStr = "you started a reproducible run.";
1886 switch (dlbState)
1887 {
1888 case DlbState::offUser: break;
1889 case DlbState::offForever:
1890 GMX_RELEASE_ASSERT(false, "DlbState::offForever is not a valid initial state");
1891 break;
1892 case DlbState::offCanTurnOn: return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
1893 case DlbState::onCanTurnOff:
1894 GMX_RELEASE_ASSERT(false, "DlbState::offCanTurnOff is not a valid initial state");
1895 break;
1896 case DlbState::onUser:
1897 return forceDlbOffOrBail(
1898 dlbState,
1899 reasonStr
1900 + " In load balanced runs binary reproducibility cannot be "
1901 "ensured.",
1902 mdlog);
1903 default:
1904 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
1905 static_cast<int>(dlbState));
1906 }
1907 }
1908
1909 return dlbState;
1910 }
1911
init_dd_comm()1912 static gmx_domdec_comm_t* init_dd_comm()
1913 {
1914 gmx_domdec_comm_t* comm = new gmx_domdec_comm_t;
1915
1916 comm->n_load_have = 0;
1917 comm->n_load_collect = 0;
1918
1919 comm->haveTurnedOffDlb = false;
1920
1921 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
1922 {
1923 comm->sum_nat[i] = 0;
1924 }
1925 comm->ndecomp = 0;
1926 comm->nload = 0;
1927 comm->load_step = 0;
1928 comm->load_sum = 0;
1929 comm->load_max = 0;
1930 clear_ivec(comm->load_lim);
1931 comm->load_mdf = 0;
1932 comm->load_pme = 0;
1933
1934 /* This should be replaced by a unique pointer */
1935 comm->balanceRegion = ddBalanceRegionAllocate();
1936
1937 return comm;
1938 }
1939
1940 /* Returns whether mtop contains constraints and/or vsites */
systemHasConstraintsOrVsites(const gmx_mtop_t & mtop)1941 static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
1942 {
1943 auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
1944 int nmol;
1945 while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
1946 {
1947 if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
1948 {
1949 return true;
1950 }
1951 }
1952
1953 return false;
1954 }
1955
setupUpdateGroups(const gmx::MDLogger & mdlog,const gmx_mtop_t & mtop,const t_inputrec & inputrec,const real cutoffMargin,DDSystemInfo * systemInfo)1956 static void setupUpdateGroups(const gmx::MDLogger& mdlog,
1957 const gmx_mtop_t& mtop,
1958 const t_inputrec& inputrec,
1959 const real cutoffMargin,
1960 DDSystemInfo* systemInfo)
1961 {
1962 /* When we have constraints and/or vsites, it is beneficial to use
1963 * update groups (when possible) to allow independent update of groups.
1964 */
1965 if (!systemHasConstraintsOrVsites(mtop))
1966 {
1967 /* No constraints or vsites, atoms can be updated independently */
1968 return;
1969 }
1970
1971 systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
1972 systemInfo->useUpdateGroups = (!systemInfo->updateGroupingPerMoleculetype.empty()
1973 && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
1974
1975 if (systemInfo->useUpdateGroups)
1976 {
1977 int numUpdateGroups = 0;
1978 for (const auto& molblock : mtop.molblock)
1979 {
1980 numUpdateGroups += molblock.nmol
1981 * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
1982 }
1983
1984 systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
1985 mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
1986
1987 /* To use update groups, the large domain-to-domain cutoff distance
1988 * should be compatible with the box size.
1989 */
1990 systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
1991
1992 if (systemInfo->useUpdateGroups)
1993 {
1994 GMX_LOG(mdlog.info)
1995 .appendTextFormatted(
1996 "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
1997 "nm\n",
1998 numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
1999 systemInfo->maxUpdateGroupRadius);
2000 }
2001 else
2002 {
2003 GMX_LOG(mdlog.info)
2004 .appendTextFormatted(
2005 "The combination of rlist and box size prohibits the use of update "
2006 "groups\n");
2007 systemInfo->updateGroupingPerMoleculetype.clear();
2008 }
2009 }
2010 }
2011
UnitCellInfo(const t_inputrec & ir)2012 UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
2013 npbcdim(numPbcDimensions(ir.pbcType)),
2014 numBoundedDimensions(inputrec2nboundeddim(&ir)),
2015 ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
2016 haveScrewPBC(ir.pbcType == PbcType::Screw)
2017 {
2018 }
2019
2020 /* Returns whether molecules are always whole, i.e. not broken by PBC */
moleculesAreAlwaysWhole(const gmx_mtop_t & mtop,const bool useUpdateGroups,gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)2021 static bool moleculesAreAlwaysWhole(const gmx_mtop_t& mtop,
2022 const bool useUpdateGroups,
2023 gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
2024 {
2025 if (useUpdateGroups)
2026 {
2027 GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
2028 "Need one grouping per moltype");
2029 for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
2030 {
2031 if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
2032 {
2033 return false;
2034 }
2035 }
2036 }
2037 else
2038 {
2039 for (const auto& moltype : mtop.moltype)
2040 {
2041 if (moltype.atoms.nr > 1)
2042 {
2043 return false;
2044 }
2045 }
2046 }
2047
2048 return true;
2049 }
2050
2051 /*! \brief Generate the simulation system information */
getSystemInfo(const gmx::MDLogger & mdlog,DDRole ddRole,MPI_Comm communicator,const DomdecOptions & options,const gmx_mtop_t & mtop,const t_inputrec & ir,const matrix box,gmx::ArrayRef<const gmx::RVec> xGlobal)2052 static DDSystemInfo getSystemInfo(const gmx::MDLogger& mdlog,
2053 DDRole ddRole,
2054 MPI_Comm communicator,
2055 const DomdecOptions& options,
2056 const gmx_mtop_t& mtop,
2057 const t_inputrec& ir,
2058 const matrix box,
2059 gmx::ArrayRef<const gmx::RVec> xGlobal)
2060 {
2061 const real tenPercentMargin = 1.1;
2062
2063 DDSystemInfo systemInfo;
2064
2065 /* We need to decide on update groups early, as this affects communication distances */
2066 systemInfo.useUpdateGroups = false;
2067 if (ir.cutoff_scheme == ecutsVERLET)
2068 {
2069 real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
2070 setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
2071 }
2072
2073 systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
2074 mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
2075 systemInfo.haveInterDomainBondeds =
2076 (!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
2077 systemInfo.haveInterDomainMultiBodyBondeds =
2078 (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
2079
2080 if (systemInfo.useUpdateGroups)
2081 {
2082 systemInfo.haveSplitConstraints = false;
2083 systemInfo.haveSplitSettles = false;
2084 }
2085 else
2086 {
2087 systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
2088 || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
2089 systemInfo.haveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
2090 }
2091
2092 if (ir.rlist == 0)
2093 {
2094 /* Set the cut-off to some very large value,
2095 * so we don't need if statements everywhere in the code.
2096 * We use sqrt, since the cut-off is squared in some places.
2097 */
2098 systemInfo.cutoff = GMX_CUTOFF_INF;
2099 }
2100 else
2101 {
2102 systemInfo.cutoff = atomToAtomIntoDomainToDomainCutoff(systemInfo, ir.rlist);
2103 }
2104 systemInfo.minCutoffForMultiBody = 0;
2105
2106 /* Determine the minimum cell size limit, affected by many factors */
2107 systemInfo.cellsizeLimit = 0;
2108 systemInfo.filterBondedCommunication = false;
2109
2110 /* We do not allow home atoms to move beyond the neighboring domain
2111 * between domain decomposition steps, which limits the cell size.
2112 * Get an estimate of cell size limit due to atom displacement.
2113 * In most cases this is a large overestimate, because it assumes
2114 * non-interaction atoms.
2115 * We set the chance to 1 in a trillion steps.
2116 */
2117 constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
2118 const real limitForAtomDisplacement = minCellSizeForAtomDisplacement(
2119 mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
2120 GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
2121
2122 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
2123
2124 /* TODO: PME decomposition currently requires atoms not to be more than
2125 * 2/3 of comm->cutoff, which is >=rlist, outside of their domain.
2126 * In nearly all cases, limitForAtomDisplacement will be smaller
2127 * than 2/3*rlist, so the PME requirement is satisfied.
2128 * But it would be better for both correctness and performance
2129 * to use limitForAtomDisplacement instead of 2/3*comm->cutoff.
2130 * Note that we would need to improve the pairlist buffer case.
2131 */
2132
2133 if (systemInfo.haveInterDomainBondeds)
2134 {
2135 if (options.minimumCommunicationRange > 0)
2136 {
2137 systemInfo.minCutoffForMultiBody =
2138 atomToAtomIntoDomainToDomainCutoff(systemInfo, options.minimumCommunicationRange);
2139 if (options.useBondedCommunication)
2140 {
2141 systemInfo.filterBondedCommunication =
2142 (systemInfo.minCutoffForMultiBody > systemInfo.cutoff);
2143 }
2144 else
2145 {
2146 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2147 }
2148 }
2149 else if (ir.bPeriodicMols)
2150 {
2151 /* Can not easily determine the required cut-off */
2152 GMX_LOG(mdlog.warning)
2153 .appendText(
2154 "NOTE: Periodic molecules are present in this system. Because of this, "
2155 "the domain decomposition algorithm cannot easily determine the "
2156 "minimum cell size that it requires for treating bonded interactions. "
2157 "Instead, domain decomposition will assume that half the non-bonded "
2158 "cut-off will be a suitable lower bound.");
2159 systemInfo.minCutoffForMultiBody = systemInfo.cutoff / 2;
2160 }
2161 else
2162 {
2163 real r_2b, r_mb;
2164
2165 if (ddRole == DDRole::Master)
2166 {
2167 dd_bonded_cg_distance(mdlog, &mtop, &ir, xGlobal, box,
2168 options.checkBondedInteractions, &r_2b, &r_mb);
2169 }
2170 gmx_bcast(sizeof(r_2b), &r_2b, communicator);
2171 gmx_bcast(sizeof(r_mb), &r_mb, communicator);
2172
2173 /* We use an initial margin of 10% for the minimum cell size,
2174 * except when we are just below the non-bonded cut-off.
2175 */
2176 if (options.useBondedCommunication)
2177 {
2178 if (std::max(r_2b, r_mb) > systemInfo.cutoff)
2179 {
2180 const real r_bonded = std::max(r_2b, r_mb);
2181 systemInfo.minCutoffForMultiBody = tenPercentMargin * r_bonded;
2182 /* This is the (only) place where we turn on the filtering */
2183 systemInfo.filterBondedCommunication = true;
2184 }
2185 else
2186 {
2187 const real r_bonded = r_mb;
2188 systemInfo.minCutoffForMultiBody =
2189 std::min(tenPercentMargin * r_bonded, systemInfo.cutoff);
2190 }
2191 /* We determine cutoff_mbody later */
2192 systemInfo.increaseMultiBodyCutoff = true;
2193 }
2194 else
2195 {
2196 /* No special bonded communication,
2197 * simply increase the DD cut-off.
2198 */
2199 systemInfo.minCutoffForMultiBody = tenPercentMargin * std::max(r_2b, r_mb);
2200 systemInfo.cutoff = std::max(systemInfo.cutoff, systemInfo.minCutoffForMultiBody);
2201 }
2202 }
2203 GMX_LOG(mdlog.info)
2204 .appendTextFormatted("Minimum cell size due to bonded interactions: %.3f nm",
2205 systemInfo.minCutoffForMultiBody);
2206
2207 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.minCutoffForMultiBody);
2208 }
2209
2210 systemInfo.constraintCommunicationRange = 0;
2211 if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
2212 {
2213 /* There is a cell size limit due to the constraints (P-LINCS) */
2214 systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
2215 GMX_LOG(mdlog.info)
2216 .appendTextFormatted("Estimated maximum distance required for P-LINCS: %.3f nm",
2217 systemInfo.constraintCommunicationRange);
2218 if (systemInfo.constraintCommunicationRange > systemInfo.cellsizeLimit)
2219 {
2220 GMX_LOG(mdlog.info)
2221 .appendText(
2222 "This distance will limit the DD cell size, you can override this with "
2223 "-rcon");
2224 }
2225 }
2226 else if (options.constraintCommunicationRange > 0)
2227 {
2228 /* Here we do not check for dd->splitConstraints.
2229 * because one can also set a cell size limit for virtual sites only
2230 * and at this point we don't know yet if there are intercg v-sites.
2231 */
2232 GMX_LOG(mdlog.info)
2233 .appendTextFormatted("User supplied maximum distance required for P-LINCS: %.3f nm",
2234 options.constraintCommunicationRange);
2235 systemInfo.constraintCommunicationRange = options.constraintCommunicationRange;
2236 }
2237 systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, systemInfo.constraintCommunicationRange);
2238
2239 return systemInfo;
2240 }
2241
2242 /*! \brief Exit with a fatal error if the DDGridSetup cannot be
2243 * implemented. */
checkDDGridSetup(const DDGridSetup & ddGridSetup,DDRole ddRole,MPI_Comm communicator,int numNodes,const DomdecOptions & options,const DDSettings & ddSettings,const DDSystemInfo & systemInfo,const real cellsizeLimit,const gmx_ddbox_t & ddbox)2244 static void checkDDGridSetup(const DDGridSetup& ddGridSetup,
2245 DDRole ddRole,
2246 MPI_Comm communicator,
2247 int numNodes,
2248 const DomdecOptions& options,
2249 const DDSettings& ddSettings,
2250 const DDSystemInfo& systemInfo,
2251 const real cellsizeLimit,
2252 const gmx_ddbox_t& ddbox)
2253 {
2254 if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
2255 {
2256 char buf[STRLEN];
2257 gmx_bool bC = (systemInfo.haveSplitConstraints
2258 && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
2259 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
2260 ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
2261 bC ? " or your LINCS settings" : "");
2262
2263 gmx_fatal_collective(FARGS, communicator, ddRole == DDRole::Master,
2264 "There is no domain decomposition for %d ranks that is compatible "
2265 "with the given box and a minimum cell size of %g nm\n"
2266 "%s\n"
2267 "Look in the log file for details on the domain decomposition",
2268 numNodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
2269 }
2270
2271 const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
2272 if (acs < cellsizeLimit)
2273 {
2274 if (options.numCells[XX] <= 0)
2275 {
2276 GMX_RELEASE_ASSERT(
2277 false,
2278 "dd_choose_grid() should return a grid that satisfies the cell size limits");
2279 }
2280 else
2281 {
2282 gmx_fatal_collective(
2283 FARGS, communicator, ddRole == DDRole::Master,
2284 "The initial cell size (%f) is smaller than the cell size limit (%f), change "
2285 "options -dd, -rdd or -rcon, see the log file for details",
2286 acs, cellsizeLimit);
2287 }
2288 }
2289
2290 const int numPPRanks =
2291 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2292 if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
2293 {
2294 gmx_fatal_collective(FARGS, communicator, ddRole == DDRole::Master,
2295 "The size of the domain decomposition grid (%d) does not match the "
2296 "number of PP ranks (%d). The total number of ranks is %d",
2297 numPPRanks, numNodes - ddGridSetup.numPmeOnlyRanks, numNodes);
2298 }
2299 if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
2300 {
2301 gmx_fatal_collective(FARGS, communicator, ddRole == DDRole::Master,
2302 "The number of separate PME ranks (%d) is larger than the number of "
2303 "PP ranks (%d), this is not supported.",
2304 ddGridSetup.numPmeOnlyRanks, numPPRanks);
2305 }
2306 }
2307
2308 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
getDDRankSetup(const gmx::MDLogger & mdlog,int numNodes,const DdRankOrder rankOrder,const DDGridSetup & ddGridSetup,const t_inputrec & ir)2309 static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
2310 int numNodes,
2311 const DdRankOrder rankOrder,
2312 const DDGridSetup& ddGridSetup,
2313 const t_inputrec& ir)
2314 {
2315 GMX_LOG(mdlog.info)
2316 .appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
2317 ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
2318 ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
2319
2320 DDRankSetup ddRankSetup;
2321
2322 ddRankSetup.rankOrder = rankOrder;
2323
2324 ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
2325 copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
2326
2327 ddRankSetup.usePmeOnlyRanks = (ddGridSetup.numPmeOnlyRanks > 0);
2328 if (ddRankSetup.usePmeOnlyRanks)
2329 {
2330 ddRankSetup.numRanksDoingPme = ddGridSetup.numPmeOnlyRanks;
2331 }
2332 else
2333 {
2334 ddRankSetup.numRanksDoingPme =
2335 ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
2336 }
2337
2338 if (EEL_PME(ir.coulombtype) || EVDW_PME(ir.vdwtype))
2339 {
2340 /* The following choices should match those
2341 * in comm_cost_est in domdec_setup.c.
2342 * Note that here the checks have to take into account
2343 * that the decomposition might occur in a different order than xyz
2344 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
2345 * in which case they will not match those in comm_cost_est,
2346 * but since that is mainly for testing purposes that's fine.
2347 */
2348 if (ddGridSetup.numDDDimensions >= 2 && ddGridSetup.ddDimensions[0] == XX
2349 && ddGridSetup.ddDimensions[1] == YY
2350 && ddRankSetup.numRanksDoingPme > ddGridSetup.numDomains[XX]
2351 && ddRankSetup.numRanksDoingPme % ddGridSetup.numDomains[XX] == 0
2352 && getenv("GMX_PMEONEDD") == nullptr)
2353 {
2354 ddRankSetup.npmedecompdim = 2;
2355 ddRankSetup.npmenodes_x = ddGridSetup.numDomains[XX];
2356 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme / ddRankSetup.npmenodes_x;
2357 }
2358 else
2359 {
2360 /* In case nc is 1 in both x and y we could still choose to
2361 * decompose pme in y instead of x, but we use x for simplicity.
2362 */
2363 ddRankSetup.npmedecompdim = 1;
2364 if (ddGridSetup.ddDimensions[0] == YY)
2365 {
2366 ddRankSetup.npmenodes_x = 1;
2367 ddRankSetup.npmenodes_y = ddRankSetup.numRanksDoingPme;
2368 }
2369 else
2370 {
2371 ddRankSetup.npmenodes_x = ddRankSetup.numRanksDoingPme;
2372 ddRankSetup.npmenodes_y = 1;
2373 }
2374 }
2375 GMX_LOG(mdlog.info)
2376 .appendTextFormatted("PME domain decomposition: %d x %d x %d",
2377 ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
2378 }
2379 else
2380 {
2381 ddRankSetup.npmedecompdim = 0;
2382 ddRankSetup.npmenodes_x = 0;
2383 ddRankSetup.npmenodes_y = 0;
2384 }
2385
2386 return ddRankSetup;
2387 }
2388
2389 /*! \brief Set the cell size and interaction limits */
set_dd_limits(const gmx::MDLogger & mdlog,DDRole ddRole,gmx_domdec_t * dd,const DomdecOptions & options,const DDSettings & ddSettings,const DDSystemInfo & systemInfo,const DDGridSetup & ddGridSetup,const int numPPRanks,const gmx_mtop_t * mtop,const t_inputrec * ir,const gmx_ddbox_t & ddbox)2390 static void set_dd_limits(const gmx::MDLogger& mdlog,
2391 DDRole ddRole,
2392 gmx_domdec_t* dd,
2393 const DomdecOptions& options,
2394 const DDSettings& ddSettings,
2395 const DDSystemInfo& systemInfo,
2396 const DDGridSetup& ddGridSetup,
2397 const int numPPRanks,
2398 const gmx_mtop_t* mtop,
2399 const t_inputrec* ir,
2400 const gmx_ddbox_t& ddbox)
2401 {
2402 gmx_domdec_comm_t* comm = dd->comm;
2403 comm->ddSettings = ddSettings;
2404
2405 /* Initialize to GPU share count to 0, might change later */
2406 comm->nrank_gpu_shared = 0;
2407
2408 comm->dlbState = comm->ddSettings.initialDlbState;
2409 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
2410 /* To consider turning DLB on after 2*nstlist steps we need to check
2411 * at partitioning count 3. Thus we need to increase the first count by 2.
2412 */
2413 comm->ddPartioningCountFirstDlbOff += 2;
2414
2415 comm->bPMELoadBalDLBLimits = FALSE;
2416
2417 /* Allocate the charge group/atom sorting struct */
2418 comm->sort = std::make_unique<gmx_domdec_sort_t>();
2419
2420 comm->systemInfo = systemInfo;
2421
2422 if (systemInfo.useUpdateGroups)
2423 {
2424 /* Note: We would like to use dd->nnodes for the atom count estimate,
2425 * but that is not yet available here. But this anyhow only
2426 * affect performance up to the second dd_partition_system call.
2427 */
2428 const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
2429 comm->updateGroupsCog = std::make_unique<gmx::UpdateGroupsCog>(
2430 *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
2431 homeAtomCountEstimate);
2432 }
2433
2434 /* Set the DD setup given by ddGridSetup */
2435 copy_ivec(ddGridSetup.numDomains, dd->numCells);
2436 dd->ndim = ddGridSetup.numDDDimensions;
2437 copy_ivec(ddGridSetup.ddDimensions, dd->dim);
2438
2439 dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
2440
2441 snew(comm->slb_frac, DIM);
2442 if (isDlbDisabled(comm))
2443 {
2444 comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
2445 comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
2446 comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
2447 }
2448
2449 /* Set the multi-body cut-off and cellsize limit for DLB */
2450 comm->cutoff_mbody = systemInfo.minCutoffForMultiBody;
2451 comm->cellsize_limit = systemInfo.cellsizeLimit;
2452 if (systemInfo.haveInterDomainBondeds && systemInfo.increaseMultiBodyCutoff)
2453 {
2454 if (systemInfo.filterBondedCommunication || !isDlbDisabled(comm))
2455 {
2456 /* Set the bonded communication distance to halfway
2457 * the minimum and the maximum,
2458 * since the extra communication cost is nearly zero.
2459 */
2460 real acs = average_cellsize_min(ddbox, dd->numCells);
2461 comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
2462 if (!isDlbDisabled(comm))
2463 {
2464 /* Check if this does not limit the scaling */
2465 comm->cutoff_mbody = std::min(comm->cutoff_mbody, options.dlbScaling * acs);
2466 }
2467 if (!systemInfo.filterBondedCommunication)
2468 {
2469 /* Without bBondComm do not go beyond the n.b. cut-off */
2470 comm->cutoff_mbody = std::min(comm->cutoff_mbody, systemInfo.cutoff);
2471 if (comm->cellsize_limit >= systemInfo.cutoff)
2472 {
2473 /* We don't loose a lot of efficieny
2474 * when increasing it to the n.b. cut-off.
2475 * It can even be slightly faster, because we need
2476 * less checks for the communication setup.
2477 */
2478 comm->cutoff_mbody = systemInfo.cutoff;
2479 }
2480 }
2481 /* Check if we did not end up below our original limit */
2482 comm->cutoff_mbody = std::max(comm->cutoff_mbody, systemInfo.minCutoffForMultiBody);
2483
2484 if (comm->cutoff_mbody > comm->cellsize_limit)
2485 {
2486 comm->cellsize_limit = comm->cutoff_mbody;
2487 }
2488 }
2489 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
2490 }
2491
2492 if (debug)
2493 {
2494 fprintf(debug,
2495 "Bonded atom communication beyond the cut-off: %s\n"
2496 "cellsize limit %f\n",
2497 gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
2498 }
2499
2500 if (ddRole == DDRole::Master)
2501 {
2502 check_dd_restrictions(dd, ir, mdlog);
2503 }
2504 }
2505
dd_init_bondeds(FILE * fplog,gmx_domdec_t * dd,const gmx_mtop_t & mtop,const gmx::VirtualSitesHandler * vsite,const t_inputrec * ir,gmx_bool bBCheck,gmx::ArrayRef<cginfo_mb_t> cginfo_mb)2506 void dd_init_bondeds(FILE* fplog,
2507 gmx_domdec_t* dd,
2508 const gmx_mtop_t& mtop,
2509 const gmx::VirtualSitesHandler* vsite,
2510 const t_inputrec* ir,
2511 gmx_bool bBCheck,
2512 gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
2513 {
2514 gmx_domdec_comm_t* comm;
2515
2516 dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
2517
2518 comm = dd->comm;
2519
2520 if (comm->systemInfo.filterBondedCommunication)
2521 {
2522 /* Communicate atoms beyond the cut-off for bonded interactions */
2523 comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
2524 }
2525 else
2526 {
2527 /* Only communicate atoms based on cut-off */
2528 comm->bondedLinks = nullptr;
2529 }
2530 }
2531
writeSettings(gmx::TextWriter * log,gmx_domdec_t * dd,const gmx_mtop_t * mtop,const t_inputrec * ir,gmx_bool bDynLoadBal,real dlb_scale,const gmx_ddbox_t * ddbox)2532 static void writeSettings(gmx::TextWriter* log,
2533 gmx_domdec_t* dd,
2534 const gmx_mtop_t* mtop,
2535 const t_inputrec* ir,
2536 gmx_bool bDynLoadBal,
2537 real dlb_scale,
2538 const gmx_ddbox_t* ddbox)
2539 {
2540 gmx_domdec_comm_t* comm;
2541 int d;
2542 ivec np;
2543 real limit, shrink;
2544
2545 comm = dd->comm;
2546
2547 if (bDynLoadBal)
2548 {
2549 log->writeString("The maximum number of communication pulses is:");
2550 for (d = 0; d < dd->ndim; d++)
2551 {
2552 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
2553 }
2554 log->ensureLineBreak();
2555 log->writeLineFormatted("The minimum size for domain decomposition cells is %.3f nm",
2556 comm->cellsize_limit);
2557 log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
2558 log->writeString("The allowed shrink of domain decomposition cells is:");
2559 for (d = 0; d < DIM; d++)
2560 {
2561 if (dd->numCells[d] > 1)
2562 {
2563 if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
2564 {
2565 shrink = 0;
2566 }
2567 else
2568 {
2569 shrink = comm->cellsize_min_dlb[d]
2570 / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
2571 }
2572 log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
2573 }
2574 }
2575 log->ensureLineBreak();
2576 }
2577 else
2578 {
2579 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
2580 log->writeString("The initial number of communication pulses is:");
2581 for (d = 0; d < dd->ndim; d++)
2582 {
2583 log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
2584 }
2585 log->ensureLineBreak();
2586 log->writeString("The initial domain decomposition cell size is:");
2587 for (d = 0; d < DIM; d++)
2588 {
2589 if (dd->numCells[d] > 1)
2590 {
2591 log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
2592 }
2593 }
2594 log->ensureLineBreak();
2595 log->writeLine();
2596 }
2597
2598 const bool haveInterDomainVsites =
2599 (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
2600
2601 if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
2602 || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2603 {
2604 std::string decompUnits;
2605 if (comm->systemInfo.useUpdateGroups)
2606 {
2607 decompUnits = "atom groups";
2608 }
2609 else
2610 {
2611 decompUnits = "atoms";
2612 }
2613
2614 log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
2615 decompUnits.c_str());
2616 log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "",
2617 comm->systemInfo.cutoff);
2618
2619 if (bDynLoadBal)
2620 {
2621 limit = dd->comm->cellsize_limit;
2622 }
2623 else
2624 {
2625 if (dd->unitCellInfo.ddBoxIsDynamic)
2626 {
2627 log->writeLine(
2628 "(the following are initial values, they could change due to box "
2629 "deformation)");
2630 }
2631 limit = dd->comm->cellsize_min[XX];
2632 for (d = 1; d < DIM; d++)
2633 {
2634 limit = std::min(limit, dd->comm->cellsize_min[d]);
2635 }
2636 }
2637
2638 if (comm->systemInfo.haveInterDomainBondeds)
2639 {
2640 log->writeLineFormatted("%40s %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
2641 std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
2642 log->writeLineFormatted("%40s %-7s %6.3f nm", "multi-body bonded interactions",
2643 "(-rdd)",
2644 (comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
2645 ? comm->cutoff_mbody
2646 : std::min(comm->systemInfo.cutoff, limit));
2647 }
2648 if (haveInterDomainVsites)
2649 {
2650 log->writeLineFormatted("%40s %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
2651 }
2652 if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
2653 {
2654 std::string separation =
2655 gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
2656 log->writeLineFormatted("%40s %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
2657 }
2658 log->ensureLineBreak();
2659 }
2660 }
2661
logSettings(const gmx::MDLogger & mdlog,gmx_domdec_t * dd,const gmx_mtop_t * mtop,const t_inputrec * ir,real dlb_scale,const gmx_ddbox_t * ddbox)2662 static void logSettings(const gmx::MDLogger& mdlog,
2663 gmx_domdec_t* dd,
2664 const gmx_mtop_t* mtop,
2665 const t_inputrec* ir,
2666 real dlb_scale,
2667 const gmx_ddbox_t* ddbox)
2668 {
2669 gmx::StringOutputStream stream;
2670 gmx::TextWriter log(&stream);
2671 writeSettings(&log, dd, mtop, ir, isDlbOn(dd->comm), dlb_scale, ddbox);
2672 if (dd->comm->dlbState == DlbState::offCanTurnOn)
2673 {
2674 {
2675 log.ensureEmptyLine();
2676 log.writeLine(
2677 "When dynamic load balancing gets turned on, these settings will change to:");
2678 }
2679 writeSettings(&log, dd, mtop, ir, true, dlb_scale, ddbox);
2680 }
2681 GMX_LOG(mdlog.info).asParagraph().appendText(stream.toString());
2682 }
2683
set_cell_limits_dlb(const gmx::MDLogger & mdlog,gmx_domdec_t * dd,real dlb_scale,const t_inputrec * ir,const gmx_ddbox_t * ddbox)2684 static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
2685 gmx_domdec_t* dd,
2686 real dlb_scale,
2687 const t_inputrec* ir,
2688 const gmx_ddbox_t* ddbox)
2689 {
2690 gmx_domdec_comm_t* comm;
2691 int d, dim, npulse, npulse_d_max, npulse_d;
2692 gmx_bool bNoCutOff;
2693
2694 comm = dd->comm;
2695
2696 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
2697
2698 /* Determine the maximum number of comm. pulses in one dimension */
2699
2700 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2701
2702 /* Determine the maximum required number of grid pulses */
2703 if (comm->cellsize_limit >= comm->systemInfo.cutoff)
2704 {
2705 /* Only a single pulse is required */
2706 npulse = 1;
2707 }
2708 else if (!bNoCutOff && comm->cellsize_limit > 0)
2709 {
2710 /* We round down slightly here to avoid overhead due to the latency
2711 * of extra communication calls when the cut-off
2712 * would be only slightly longer than the cell size.
2713 * Later cellsize_limit is redetermined,
2714 * so we can not miss interactions due to this rounding.
2715 */
2716 npulse = static_cast<int>(0.96 + comm->systemInfo.cutoff / comm->cellsize_limit);
2717 }
2718 else
2719 {
2720 /* There is no cell size limit */
2721 npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
2722 }
2723
2724 if (!bNoCutOff && npulse > 1)
2725 {
2726 /* See if we can do with less pulses, based on dlb_scale */
2727 npulse_d_max = 0;
2728 for (d = 0; d < dd->ndim; d++)
2729 {
2730 dim = dd->dim[d];
2731 npulse_d = static_cast<int>(
2732 1
2733 + dd->numCells[dim] * comm->systemInfo.cutoff
2734 / (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
2735 npulse_d_max = std::max(npulse_d_max, npulse_d);
2736 }
2737 npulse = std::min(npulse, npulse_d_max);
2738 }
2739
2740 /* This env var can override npulse */
2741 d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
2742 if (d > 0)
2743 {
2744 npulse = d;
2745 }
2746
2747 comm->maxpulse = 1;
2748 comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
2749 for (d = 0; d < dd->ndim; d++)
2750 {
2751 comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
2752 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
2753 if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
2754 {
2755 comm->bVacDLBNoLimit = FALSE;
2756 }
2757 }
2758
2759 /* cellsize_limit is set for LINCS in init_domain_decomposition */
2760 if (!comm->bVacDLBNoLimit)
2761 {
2762 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->systemInfo.cutoff / comm->maxpulse);
2763 }
2764 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
2765 /* Set the minimum cell size for each DD dimension */
2766 for (d = 0; d < dd->ndim; d++)
2767 {
2768 if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
2769 {
2770 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
2771 }
2772 else
2773 {
2774 comm->cellsize_min_dlb[dd->dim[d]] = comm->systemInfo.cutoff / comm->cd[d].np_dlb;
2775 }
2776 }
2777 if (comm->cutoff_mbody <= 0)
2778 {
2779 comm->cutoff_mbody = std::min(comm->systemInfo.cutoff, comm->cellsize_limit);
2780 }
2781 if (isDlbOn(comm))
2782 {
2783 set_dlb_limits(dd);
2784 }
2785 }
2786
dd_moleculesAreAlwaysWhole(const gmx_domdec_t & dd)2787 bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd)
2788 {
2789 return dd.comm->systemInfo.moleculesAreAlwaysWhole;
2790 }
2791
dd_bonded_molpbc(const gmx_domdec_t * dd,PbcType pbcType)2792 gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
2793 {
2794 /* If each molecule is a single charge group
2795 * or we use domain decomposition for each periodic dimension,
2796 * we do not need to take pbc into account for the bonded interactions.
2797 */
2798 return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
2799 && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
2800 && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
2801 }
2802
2803 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
set_ddgrid_parameters(const gmx::MDLogger & mdlog,gmx_domdec_t * dd,real dlb_scale,const gmx_mtop_t * mtop,const t_inputrec * ir,const gmx_ddbox_t * ddbox)2804 static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
2805 gmx_domdec_t* dd,
2806 real dlb_scale,
2807 const gmx_mtop_t* mtop,
2808 const t_inputrec* ir,
2809 const gmx_ddbox_t* ddbox)
2810 {
2811 gmx_domdec_comm_t* comm = dd->comm;
2812 DDRankSetup& ddRankSetup = comm->ddRankSetup;
2813
2814 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
2815 {
2816 init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
2817 if (ddRankSetup.npmedecompdim >= 2)
2818 {
2819 init_ddpme(dd, &ddRankSetup.ddpme[1], 1);
2820 }
2821 }
2822 else
2823 {
2824 ddRankSetup.numRanksDoingPme = 0;
2825 if (dd->pme_nodeid >= 0)
2826 {
2827 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
2828 "Can not have separate PME ranks without PME electrostatics");
2829 }
2830 }
2831
2832 if (debug)
2833 {
2834 fprintf(debug, "The DD cut-off is %f\n", comm->systemInfo.cutoff);
2835 }
2836 if (!isDlbDisabled(comm))
2837 {
2838 set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
2839 }
2840
2841 logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
2842
2843 real vol_frac;
2844 if (ir->pbcType == PbcType::No)
2845 {
2846 vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
2847 }
2848 else
2849 {
2850 vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
2851 / static_cast<double>(dd->nnodes);
2852 }
2853 if (debug)
2854 {
2855 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
2856 }
2857 int natoms_tot = mtop->natoms;
2858
2859 dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
2860 }
2861
2862 /*! \brief Get some important DD parameters which can be modified by env.vars */
getDDSettings(const gmx::MDLogger & mdlog,const DomdecOptions & options,const gmx::MdrunOptions & mdrunOptions,const t_inputrec & ir)2863 static DDSettings getDDSettings(const gmx::MDLogger& mdlog,
2864 const DomdecOptions& options,
2865 const gmx::MdrunOptions& mdrunOptions,
2866 const t_inputrec& ir)
2867 {
2868 DDSettings ddSettings;
2869
2870 ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
2871 ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
2872 ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
2873 ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
2874 ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
2875 const int recload = dd_getenv(mdlog, "GMX_DD_RECORD_LOAD", 1);
2876 ddSettings.nstDDDump = dd_getenv(mdlog, "GMX_DD_NST_DUMP", 0);
2877 ddSettings.nstDDDumpGrid = dd_getenv(mdlog, "GMX_DD_NST_DUMP_GRID", 0);
2878 ddSettings.DD_debug = dd_getenv(mdlog, "GMX_DD_DEBUG", 0);
2879
2880 if (ddSettings.useSendRecv2)
2881 {
2882 GMX_LOG(mdlog.info)
2883 .appendText(
2884 "Will use two sequential MPI_Sendrecv calls instead of two simultaneous "
2885 "non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite "
2886 "communication");
2887 }
2888
2889 if (ddSettings.eFlop)
2890 {
2891 GMX_LOG(mdlog.info).appendText("Will load balance based on FLOP count");
2892 ddSettings.recordLoad = true;
2893 }
2894 else
2895 {
2896 ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
2897 }
2898
2899 ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
2900 ddSettings.recordLoad, mdrunOptions, &ir);
2901 GMX_LOG(mdlog.info)
2902 .appendTextFormatted("Dynamic load balancing: %s",
2903 edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
2904
2905 return ddSettings;
2906 }
2907
gmx_domdec_t(const t_inputrec & ir)2908 gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
2909
2910 namespace gmx
2911 {
2912
2913 // TODO once the functionality stablizes, move this class and
2914 // supporting functionality into builder.cpp
2915 /*! \brief Impl class for DD builder */
2916 class DomainDecompositionBuilder::Impl
2917 {
2918 public:
2919 //! Constructor
2920 Impl(const MDLogger& mdlog,
2921 t_commrec* cr,
2922 const DomdecOptions& options,
2923 const MdrunOptions& mdrunOptions,
2924 const gmx_mtop_t& mtop,
2925 const t_inputrec& ir,
2926 const matrix box,
2927 ArrayRef<const RVec> xGlobal);
2928
2929 //! Build the resulting DD manager
2930 gmx_domdec_t* build(LocalAtomSetManager* atomSets);
2931
2932 //! Objects used in constructing and configuring DD
2933 //! {
2934 //! Logging object
2935 const MDLogger& mdlog_;
2936 //! Communication object
2937 t_commrec* cr_;
2938 //! User-supplied options configuring DD behavior
2939 const DomdecOptions options_;
2940 //! Global system topology
2941 const gmx_mtop_t& mtop_;
2942 //! User input values from the tpr file
2943 const t_inputrec& ir_;
2944 //! }
2945
2946 //! Internal objects used in constructing DD
2947 //! {
2948 //! Settings combined from the user input
2949 DDSettings ddSettings_;
2950 //! Information derived from the simulation system
2951 DDSystemInfo systemInfo_;
2952 //! Box structure
2953 gmx_ddbox_t ddbox_ = { 0 };
2954 //! Organization of the DD grids
2955 DDGridSetup ddGridSetup_;
2956 //! Organzation of the DD ranks
2957 DDRankSetup ddRankSetup_;
2958 //! Number of DD cells in each dimension
2959 ivec ddCellIndex_ = { 0, 0, 0 };
2960 //! IDs of PME-only ranks
2961 std::vector<int> pmeRanks_;
2962 //! Contains a valid Cartesian-communicator-based setup, or defaults.
2963 CartesianRankSetup cartSetup_;
2964 //! }
2965 };
2966
Impl(const MDLogger & mdlog,t_commrec * cr,const DomdecOptions & options,const MdrunOptions & mdrunOptions,const gmx_mtop_t & mtop,const t_inputrec & ir,const matrix box,ArrayRef<const RVec> xGlobal)2967 DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog,
2968 t_commrec* cr,
2969 const DomdecOptions& options,
2970 const MdrunOptions& mdrunOptions,
2971 const gmx_mtop_t& mtop,
2972 const t_inputrec& ir,
2973 const matrix box,
2974 ArrayRef<const RVec> xGlobal) :
2975 mdlog_(mdlog),
2976 cr_(cr),
2977 options_(options),
2978 mtop_(mtop),
2979 ir_(ir)
2980 {
2981 GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
2982
2983 ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
2984
2985 if (ddSettings_.eFlop > 1)
2986 {
2987 /* Ensure that we have different random flop counts on different ranks */
2988 srand(1 + cr_->rankInDefaultCommunicator);
2989 }
2990
2991 systemInfo_ = getSystemInfo(mdlog_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
2992 cr->mpiDefaultCommunicator, options_, mtop_, ir_, box, xGlobal);
2993
2994 const int numRanksRequested = cr_->sizeOfDefaultCommunicator;
2995 const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
2996 checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype),
2997 options_.numPmeRanks, checkForLargePrimeFactors);
2998
2999 // DD grid setup uses a more different cell size limit for
3000 // automated setup than the one in systemInfo_. The latter is used
3001 // in set_dd_limits() to configure DLB, for example.
3002 const real gridSetupCellsizeLimit =
3003 getDDGridSetupCellSizeLimit(mdlog_, !isDlbDisabled(ddSettings_.initialDlbState),
3004 options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
3005 ddGridSetup_ =
3006 getDDGridSetup(mdlog_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3007 cr->mpiDefaultCommunicator, numRanksRequested, options_, ddSettings_,
3008 systemInfo_, gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
3009 checkDDGridSetup(ddGridSetup_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
3010 cr->mpiDefaultCommunicator, cr->sizeOfDefaultCommunicator, options_,
3011 ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
3012
3013 cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
3014
3015 ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, options_.rankOrder,
3016 ddGridSetup_, ir_);
3017
3018 /* Generate the group communicator, also decides the duty of each rank */
3019 cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
3020 }
3021
build(LocalAtomSetManager * atomSets)3022 gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
3023 {
3024 gmx_domdec_t* dd = new gmx_domdec_t(ir_);
3025
3026 copy_ivec(ddCellIndex_, dd->ci);
3027
3028 dd->comm = init_dd_comm();
3029
3030 dd->comm->ddRankSetup = ddRankSetup_;
3031 dd->comm->cartesianRankSetup = cartSetup_;
3032
3033 set_dd_limits(mdlog_, MASTER(cr_) ? DDRole::Master : DDRole::Agent, dd, options_, ddSettings_,
3034 systemInfo_, ddGridSetup_, ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
3035
3036 setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
3037
3038 if (thisRankHasDuty(cr_, DUTY_PP))
3039 {
3040 set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
3041
3042 setup_neighbor_relations(dd);
3043 }
3044
3045 /* Set overallocation to avoid frequent reallocation of arrays */
3046 set_over_alloc_dd(TRUE);
3047
3048 dd->atomSets = atomSets;
3049
3050 return dd;
3051 }
3052
DomainDecompositionBuilder(const MDLogger & mdlog,t_commrec * cr,const DomdecOptions & options,const MdrunOptions & mdrunOptions,const gmx_mtop_t & mtop,const t_inputrec & ir,const matrix box,ArrayRef<const RVec> xGlobal)3053 DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& mdlog,
3054 t_commrec* cr,
3055 const DomdecOptions& options,
3056 const MdrunOptions& mdrunOptions,
3057 const gmx_mtop_t& mtop,
3058 const t_inputrec& ir,
3059 const matrix box,
3060 ArrayRef<const RVec> xGlobal) :
3061 impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, xGlobal))
3062 {
3063 }
3064
build(LocalAtomSetManager * atomSets)3065 gmx_domdec_t* DomainDecompositionBuilder::build(LocalAtomSetManager* atomSets)
3066 {
3067 return impl_->build(atomSets);
3068 }
3069
3070 DomainDecompositionBuilder::~DomainDecompositionBuilder() = default;
3071
3072 } // namespace gmx
3073
test_dd_cutoff(const t_commrec * cr,const matrix box,gmx::ArrayRef<const gmx::RVec> x,real cutoffRequested)3074 static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3075 {
3076 gmx_ddbox_t ddbox;
3077 int d, dim, np;
3078 real inv_cell_size;
3079 int LocallyLimited;
3080
3081 const auto* dd = cr->dd;
3082
3083 set_ddbox(*dd, false, box, true, x, &ddbox);
3084
3085 LocallyLimited = 0;
3086
3087 for (d = 0; d < dd->ndim; d++)
3088 {
3089 dim = dd->dim[d];
3090
3091 inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
3092 if (dd->unitCellInfo.ddBoxIsDynamic)
3093 {
3094 inv_cell_size *= DD_PRES_SCALE_MARGIN;
3095 }
3096
3097 np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
3098
3099 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
3100 {
3101 if (np > dd->comm->cd[d].np_dlb)
3102 {
3103 return FALSE;
3104 }
3105
3106 /* If a current local cell size is smaller than the requested
3107 * cut-off, we could still fix it, but this gets very complicated.
3108 * Without fixing here, we might actually need more checks.
3109 */
3110 real cellSizeAlongDim =
3111 (dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim]) * ddbox.skew_fac[dim];
3112 if (cellSizeAlongDim * dd->comm->cd[d].np_dlb < cutoffRequested)
3113 {
3114 LocallyLimited = 1;
3115 }
3116 }
3117 }
3118
3119 if (!isDlbDisabled(dd->comm))
3120 {
3121 /* If DLB is not active yet, we don't need to check the grid jumps.
3122 * Actually we shouldn't, because then the grid jump data is not set.
3123 */
3124 if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
3125 {
3126 LocallyLimited = 1;
3127 }
3128
3129 gmx_sumi(1, &LocallyLimited, cr);
3130
3131 if (LocallyLimited > 0)
3132 {
3133 return FALSE;
3134 }
3135 }
3136
3137 return TRUE;
3138 }
3139
change_dd_cutoff(t_commrec * cr,const matrix box,gmx::ArrayRef<const gmx::RVec> x,real cutoffRequested)3140 gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
3141 {
3142 gmx_bool bCutoffAllowed;
3143
3144 bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
3145
3146 if (bCutoffAllowed)
3147 {
3148 cr->dd->comm->systemInfo.cutoff = cutoffRequested;
3149 }
3150
3151 return bCutoffAllowed;
3152 }
3153
constructGpuHaloExchange(const gmx::MDLogger & mdlog,const t_commrec & cr,const gmx::DeviceStreamManager & deviceStreamManager,gmx_wallcycle * wcycle)3154 void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
3155 const t_commrec& cr,
3156 const gmx::DeviceStreamManager& deviceStreamManager,
3157 gmx_wallcycle* wcycle)
3158 {
3159 GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
3160 "Local non-bonded stream should be valid when using"
3161 "GPU halo exchange.");
3162 GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
3163 "Non-local non-bonded stream should be valid when using "
3164 "GPU halo exchange.");
3165
3166 if (cr.dd->gpuHaloExchange[0].empty())
3167 {
3168 GMX_LOG(mdlog.warning)
3169 .asParagraph()
3170 .appendTextFormatted(
3171 "NOTE: Activating the 'GPU halo exchange' feature, enabled "
3172 "by the "
3173 "GMX_GPU_DD_COMMS environment variable.");
3174 }
3175
3176 for (int d = 0; d < cr.dd->ndim; d++)
3177 {
3178 for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3179 {
3180 cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
3181 cr.dd, d, cr.mpi_comm_mysim, deviceStreamManager.context(),
3182 deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
3183 deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal), pulse, wcycle));
3184 }
3185 }
3186 }
3187
reinitGpuHaloExchange(const t_commrec & cr,const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,const DeviceBuffer<gmx::RVec> d_forcesBuffer)3188 void reinitGpuHaloExchange(const t_commrec& cr,
3189 const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
3190 const DeviceBuffer<gmx::RVec> d_forcesBuffer)
3191 {
3192 for (int d = 0; d < cr.dd->ndim; d++)
3193 {
3194 for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3195 {
3196 cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
3197 }
3198 }
3199 }
3200
communicateGpuHaloCoordinates(const t_commrec & cr,const matrix box,GpuEventSynchronizer * coordinatesReadyOnDeviceEvent)3201 void communicateGpuHaloCoordinates(const t_commrec& cr,
3202 const matrix box,
3203 GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
3204 {
3205 for (int d = 0; d < cr.dd->ndim; d++)
3206 {
3207 for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
3208 {
3209 cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
3210 }
3211 }
3212 }
3213
communicateGpuHaloForces(const t_commrec & cr,bool accumulateForces)3214 void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
3215 {
3216 for (int d = cr.dd->ndim - 1; d >= 0; d--)
3217 {
3218 for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
3219 {
3220 cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
3221 }
3222 }
3223 }
3224
putUpdateGroupAtomsInSamePeriodicImage(const gmx_domdec_t & dd,const gmx_mtop_t & mtop,const matrix box,gmx::ArrayRef<gmx::RVec> positions)3225 void putUpdateGroupAtomsInSamePeriodicImage(const gmx_domdec_t& dd,
3226 const gmx_mtop_t& mtop,
3227 const matrix box,
3228 gmx::ArrayRef<gmx::RVec> positions)
3229 {
3230 int atomOffset = 0;
3231 for (const gmx_molblock_t& molblock : mtop.molblock)
3232 {
3233 const auto& updateGrouping = dd.comm->systemInfo.updateGroupingPerMoleculetype[molblock.type];
3234
3235 for (int mol = 0; mol < molblock.nmol; mol++)
3236 {
3237 for (int g = 0; g < updateGrouping.numBlocks(); g++)
3238 {
3239 const auto& block = updateGrouping.block(g);
3240 const int atomBegin = atomOffset + block.begin();
3241 const int atomEnd = atomOffset + block.end();
3242 for (int a = atomBegin + 1; a < atomEnd; a++)
3243 {
3244 // Make sure that atoms in the same update group
3245 // are in the same periodic image after restarts.
3246 for (int d = DIM - 1; d >= 0; d--)
3247 {
3248 while (positions[a][d] - positions[atomBegin][d] > 0.5_real * box[d][d])
3249 {
3250 positions[a] -= box[d];
3251 }
3252 while (positions[a][d] - positions[atomBegin][d] < -0.5_real * box[d][d])
3253 {
3254 positions[a] += box[d];
3255 }
3256 }
3257 }
3258 }
3259 atomOffset += updateGrouping.fullRange().end();
3260 }
3261 }
3262 }
3263