1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
9 *
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
14 *
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
32 *
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
35 */
36 /*! \internal \file
37 *
38 * \brief Declares implementation functions and types for the domain
39 * decomposition module.
40 *
41 * \author Berk Hess <hess@kth.se>
42 * \ingroup module_domdec
43 */
44 #ifndef GMX_DOMDEC_DOMDEC_INTERNAL_H
45 #define GMX_DOMDEC_DOMDEC_INTERNAL_H
46
47 #include "config.h"
48
49 #include "gromacs/domdec/domdec.h"
50 #include "gromacs/domdec/domdec_struct.h"
51 #include "gromacs/mdlib/updategroupscog.h"
52 #include "gromacs/timing/cyclecounter.h"
53 #include "gromacs/topology/block.h"
54
55 struct t_commrec;
56
57 /*! \cond INTERNAL */
58
59 #define DD_NLOAD_MAX 9
60
61 struct BalanceRegion;
62
63 namespace gmx
64 {
65 enum class DdRankOrder : int;
66 }
67 // namespace
68
69
70 //! Indices to communicate in a dimension
71 struct gmx_domdec_ind_t
72 {
73 //! @{
74 /*! \brief The numbers of charge groups to send and receive for each
75 * cell that requires communication, the last entry contains the total
76 * number of atoms that needs to be communicated.
77 */
78 int nsend[DD_MAXIZONE + 2] = {};
79 int nrecv[DD_MAXIZONE + 2] = {};
80 //! @}
81 //! The charge groups to send
82 std::vector<int> index;
83 //! @{
84 /* The atom range for non-in-place communication */
85 int cell2at0[DD_MAXIZONE] = {};
86 int cell2at1[DD_MAXIZONE] = {};
87 //! @}
88 };
89
90 //! Things relating to index communication
91 struct gmx_domdec_comm_dim_t
92 {
93 /* Returns the number of grid pulses (the number of domains in the halo along this dimension) */
numPulsesgmx_domdec_comm_dim_t94 int numPulses() const { return ind.size(); }
95
96 /**< For dlb, for use with edlbAUTO */
97 int np_dlb = 0;
98 /**< The indices to communicate, size np */
99 std::vector<gmx_domdec_ind_t> ind;
100 /**< Can we receive data in place? */
101 bool receiveInPlace = false;
102 };
103
104 /*! \brief Load balancing data along a dim used on the master rank of that dim */
105 struct RowMaster
106 {
107 struct Bounds
108 {
109 /**< State var.: max lower bound., incl. neighbors */
110 real cellFracLowerMax = 0;
111 /**< State var.: min upper bound., incl. neighbors */
112 real cellFracUpperMin = 0;
113 /**< Temp. var.: lower limit for cell boundary */
114 real boundMin = 0;
115 /**< Temp. var.: upper limit for cell boundary */
116 real boundMax = 0;
117 };
118
119 /**< Temp. var.: is this cell size at the limit */
120 std::vector<bool> isCellMin;
121 /**< State var.: cell boundaries, box relative */
122 std::vector<real> cellFrac;
123 /**< Temp. var.: old cell size */
124 std::vector<real> oldCellFrac;
125 /**< Cell bounds */
126 std::vector<Bounds> bounds;
127 /**< State var.: is DLB limited in this row */
128 bool dlbIsLimited = false;
129 /**< Temp. var. */
130 std::vector<real> buf_ncd;
131 };
132
133 /*! \brief Struct for managing cell sizes with DLB along a dimension */
134 struct DDCellsizesWithDlb
135 {
136 /**< Cell row root struct, only available on the first rank in a row */
137 std::unique_ptr<RowMaster> rowMaster;
138 /**< The cell sizes, in fractions, along a row, not available on the first rank in a row */
139 std::vector<real> fracRow;
140 /**< The lower corner, in fractions, in triclinic space */
141 real fracLower = 0;
142 /**< The upper corner, in fractions, in triclinic space */
143 real fracUpper = 0;
144 /**< The maximum lower corner among all our neighbors */
145 real fracLowerMax = 0;
146 /**< The minimum upper corner among all our neighbors */
147 real fracUpperMin = 0;
148 };
149
150 /*! \brief Struct for compute load commuication
151 *
152 * Here floats are accurate enough, since these variables
153 * only influence the load balancing, not the actual MD results.
154 */
155 typedef struct domdec_load
156 {
157 /**< The number of load recordings */
158 int nload = 0;
159 /**< Scan of the sum of load over dimensions */
160 float* load = nullptr;
161 /**< The sum of the load over the ranks up to our current dimension */
162 float sum = 0;
163 /**< The maximum over the ranks contributing to \p sum */
164 float max = 0;
165 /**< Like \p sum, but takes the maximum when the load balancing is limited */
166 float sum_m = 0;
167 /**< Minimum cell volume, relative to the box */
168 float cvol_min = 0;
169 /**< The PP time during which PME can overlap */
170 float mdf = 0;
171 /**< The PME-only rank load */
172 float pme = 0;
173 /**< Bit flags that tell if DLB was limited, per dimension */
174 int flags = 0;
175 } domdec_load_t;
176
177 /*! \brief Data needed to sort an atom to the desired location in the local state */
178 typedef struct gmx_cgsort
179 {
180 /**< Neighborsearch grid cell index */
181 int nsc = 0;
182 /**< Global atom/charge group index */
183 int ind_gl = 0;
184 /**< Local atom/charge group index */
185 int ind = 0;
186 } gmx_cgsort_t;
187
188 /*! \brief Temporary buffers for sorting atoms */
189 typedef struct gmx_domdec_sort
190 {
191 /**< Sorted array of indices */
192 std::vector<gmx_cgsort_t> sorted;
193 /**< Array of stationary atom/charge group indices */
194 std::vector<gmx_cgsort_t> stationary;
195 /**< Array of moved atom/charge group indices */
196 std::vector<gmx_cgsort_t> moved;
197 /**< Integer buffer for sorting */
198 std::vector<int> intBuffer;
199 } gmx_domdec_sort_t;
200
201 /*! \brief Manages atom ranges and order for the local state atom vectors */
202 class DDAtomRanges
203 {
204 public:
205 /*! \brief The local state atom order
206 *
207 * This enum determines the order of the atoms in the local state.
208 * ddnatHOME and ddnatZONE should be first and second,
209 * the others can be ordered as wanted.
210 */
211 enum class Type : int
212 {
213 Home, /**< The home atoms */
214 Zones, /**< All zones in the eighth shell */
215 Vsites, /**< Atoms communicated for virtual sites */
216 Constraints, /**< Atoms communicated for constraints */
217 Number /**< Not a count, only present for convenience */
218 };
219
220 /*! \brief Returns the start atom index for range \p rangeType */
start(Type rangeType)221 int start(Type rangeType) const
222 {
223 if (rangeType == Type::Home)
224 {
225 return 0;
226 }
227 else
228 {
229 return end_[static_cast<int>(rangeType) - 1];
230 }
231 }
232
233 /*! \brief Returns the end atom index for range \p rangeType */
end(Type rangeType)234 int end(Type rangeType) const { return end_[static_cast<int>(rangeType)]; }
235
236 /*! \brief Returns the number of home atoms */
numHomeAtoms()237 int numHomeAtoms() const { return end_[static_cast<int>(Type::Home)]; }
238
239 /*! \brief Returns the total number of atoms */
numAtomsTotal()240 int numAtomsTotal() const { return end_[static_cast<int>(Type::Number) - 1]; }
241
242 /*! \brief Sets the end index of range \p rangeType to \p end
243 *
244 * This should be called either with Type::Home or with a type
245 * that is larger than that passed in the previous call to setEnd.
246 * A release assertion for these conditions are present.
247 */
setEnd(Type rangeType,int end)248 void setEnd(Type rangeType, int end)
249 {
250 GMX_RELEASE_ASSERT(rangeType == Type::Home || rangeType > lastTypeSet_,
251 "Can only set either home or a larger type than the last one");
252
253 for (int i = static_cast<int>(rangeType); i < static_cast<int>(Type::Number); i++)
254 {
255 end_[i] = end;
256 }
257
258 lastTypeSet_ = rangeType;
259 }
260
261 private:
262 /*! \brief The list of end atom indices */
263 std::array<int, static_cast<int>(Type::Number)> end_;
264 Type lastTypeSet_ = Type::Number;
265 };
266
267 /*! \brief Enum of dynamic load balancing states
268 *
269 * Allowed DLB states and transitions
270 * - intialization at startup:
271 * -> offUser ("-dlb no")
272 * -> onUser ("-dlb yes")
273 * -> offCanTurnOn ("-dlb auto")
274 *
275 * - in automatic mode (i.e. initial state offCanTurnOn):
276 * offCanTurnOn -> onCanTurnOff
277 * offCanTurnOn -> offForever
278 * offCanTurnOn -> offTemporarilyLocked
279 * offTemporarilyLocked -> offCanTurnOn
280 * onCanTurnOff -> offCanTurnOn
281 */
282 enum class DlbState
283 {
284 offUser, /**< DLB is permanently off per user request */
285 offForever, /**< DLB is off due to a runtime condition (not supported or causes performance loss) and will never be turned on */
286 offCanTurnOn, /**< DLB is off and will turn on on imbalance */
287 offTemporarilyLocked, /**< DLB is off and temporarily can't turn on */
288 onCanTurnOff, /**< DLB is on and can turn off when slow */
289 onUser, /**< DLB is permanently on per user request */
290 nr /**< The number of DLB states */
291 };
292
293 /*! \brief The PME domain decomposition for one dimension */
294 typedef struct gmx_ddpme
295 {
296 /**< The dimension */
297 int dim = 0;
298 /**< Tells if DD and PME dims match */
299 gmx_bool dim_match = false;
300 /**< The number of PME ranks/domains in this dimension */
301 int nslab = 0;
302 /**< Cell sizes for determining the PME comm. with SLB */
303 real* slb_dim_f = nullptr;
304 /**< The minimum pp node location, size nslab */
305 int* pp_min = nullptr;
306 /**< The maximum pp node location, size nslab */
307 int* pp_max = nullptr;
308 /**< The maximum shift for coordinate redistribution in PME */
309 int maxshift = 0;
310 } gmx_ddpme_t;
311
312 struct gmx_ddzone_t
313 {
314 /**< The minimum bottom of this zone */
315 real min0 = 0;
316 /**< The maximum top of this zone */
317 real max1 = 0;
318 /**< The minimum top of this zone */
319 real min1 = 0;
320 /**< The maximum bottom communicaton height for this zone */
321 real mch0 = 0;
322 /**< The maximum top communicaton height for this zone */
323 real mch1 = 0;
324 /**< The bottom value of the first cell in this zone */
325 real p1_0 = 0;
326 /**< The top value of the first cell in this zone */
327 real p1_1 = 0;
328 /**< Bool disguised as a real, 1 when the above data has been set. 0 otherwise */
329 real dataSet = 0;
330 };
331
332 /*! \brief The number of reals in gmx_ddzone_t */
333 constexpr int c_ddzoneNumReals = 8;
334
335 template<typename T>
336 class DDBufferAccess;
337
338 /*! \brief Temporary storage container that minimizes (re)allocation and clearing
339 *
340 * This is only the storage, actual access happens through DDBufferAccess.
341 * All methods check if the buffer is (not) in use.
342 */
343 template<typename T>
344 class DDBuffer
345 {
346 private:
347 /*! \brief Returns a buffer of size \p numElements, the elements are undefined */
resize(size_t numElements)348 gmx::ArrayRef<T> resize(size_t numElements)
349 {
350 GMX_ASSERT(isInUse_, "Should only operate on acquired buffers");
351
352 if (numElements > buffer_.size())
353 {
354 buffer_.resize(numElements);
355 }
356
357 return gmx::arrayRefFromArray(buffer_.data(), numElements);
358 }
359
360 /*! \brief Acquire the buffer for use with size set to \p numElements, the elements are undefined */
acquire(size_t numElements)361 gmx::ArrayRef<T> acquire(size_t numElements)
362 {
363 GMX_RELEASE_ASSERT(!isInUse_, "Should only request free buffers");
364 isInUse_ = true;
365
366 return resize(numElements);
367 }
368
369 /*! \brief Releases the buffer, buffer_ should not be used after this */
release()370 void release()
371 {
372 GMX_RELEASE_ASSERT(isInUse_, "Should only release buffers in use");
373 isInUse_ = false;
374 }
375
376 std::vector<T> buffer_; /**< The actual memory buffer */
377 bool isInUse_ = false; /**< Flag that tells whether the buffer is in use */
378
379 friend class DDBufferAccess<T>;
380 };
381
382 /*! \brief Class that manages access to a temporary memory buffer */
383 template<typename T>
384 class DDBufferAccess
385 {
386 public:
387 /*! \brief Constructor, returns a buffer of size \p numElements, element values are undefined
388 *
389 * \note The actual memory buffer \p ddBuffer can not be used to
390 * create other DDBufferAccess objects until the one created
391 * here is destroyed.
392 */
DDBufferAccess(DDBuffer<T> & ddBuffer,size_t numElements)393 DDBufferAccess(DDBuffer<T>& ddBuffer, size_t numElements) : ddBuffer_(ddBuffer)
394 {
395 buffer = ddBuffer_.acquire(numElements);
396 }
397
~DDBufferAccess()398 ~DDBufferAccess() { ddBuffer_.release(); }
399
400 /*! \brief Resizes the buffer to \p numElements, new elements are undefined
401 *
402 * \note The buffer arrayref is updated after this call.
403 */
resize(size_t numElements)404 void resize(size_t numElements) { buffer = ddBuffer_.resize(numElements); }
405
406 private:
407 DDBuffer<T>& ddBuffer_; /**< Reference to the storage class */
408 public:
409 gmx::ArrayRef<T> buffer; /**< The access to the memory buffer */
410 };
411
412 /*! \brief Temporary buffer for setting up communiation over one pulse and all zones in the halo */
413 struct dd_comm_setup_work_t
414 {
415 /**< The local atom group indices to send */
416 std::vector<int> localAtomGroupBuffer;
417 /**< Buffer for collecting the global atom group indices to send */
418 std::vector<int> atomGroupBuffer;
419 /**< Buffer for collecting the atom group positions to send */
420 std::vector<gmx::RVec> positionBuffer;
421 /**< The number of atoms contained in the atom groups to send */
422 int nat = 0;
423 /**< The number of atom groups to send for the last zone */
424 int nsend_zone = 0;
425 };
426
427 /*! \brief Information about the simulated system */
428 struct DDSystemInfo
429 {
430 //! True when update groups are used
431 bool useUpdateGroups = false;
432 //! Update atom grouping for each molecule type
433 std::vector<gmx::RangePartitioning> updateGroupingPerMoleculetype;
434 //! The maximum radius over all update groups
435 real maxUpdateGroupRadius;
436
437 //! Are molecules always whole, i.e. not broken by PBC?
438 bool moleculesAreAlwaysWhole = false;
439 //! Are there inter-domain bonded interactions?
440 bool haveInterDomainBondeds = false;
441 //! Are there inter-domain multi-body interactions?
442 bool haveInterDomainMultiBodyBondeds = false;
443
444 //! Cut-off for multi-body interactions
445 real minCutoffForMultiBody = 0;
446 //! Cut-off for non-bonded/2-body interactions
447 real cutoff = 0;
448 //! The lower limit for the DD cell size
449 real cellsizeLimit = 0;
450
451 //! Can atoms connected by constraints be assigned to different domains?
452 bool haveSplitConstraints = false;
453 //! Can atoms connected by settles be assigned to different domains?
454 bool haveSplitSettles = false;
455 //! Estimated communication range needed for constraints
456 real constraintCommunicationRange = 0;
457
458 //! Whether to only communicate atoms beyond the non-bonded cut-off when they are involved in bonded interactions with non-local atoms
459 bool filterBondedCommunication = false;
460 //! Whether to increase the multi-body cut-off beyond the minimum required
461 bool increaseMultiBodyCutoff = false;
462 };
463
464 /*! \brief Settings that affect the behavior of the domain decomposition
465 *
466 * These settings depend on options chosen by the user, set by enviroment
467 * variables, as well as hardware support. The initial DLB state also
468 * depends on the integrator.
469 *
470 * Note: Settings that depend on the simulated system are in DDSystemInfo.
471 */
472 struct DDSettings
473 {
474 //! Use MPI_Sendrecv communication instead of non-blocking calls
475 bool useSendRecv2 = false;
476
477 /* Information for managing the dynamic load balancing */
478 //! Maximum DLB scaling per load balancing step in percent
479 int dlb_scale_lim = 0;
480 //! Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise
481 int eFlop = 0;
482
483 //! Whether to order the DD dimensions from z to x
484 bool useDDOrderZYX = false;
485
486 //! Whether to use MPI Cartesian reordering of communicators, when supported (almost never)
487 bool useCartesianReorder = true;
488
489 //! Whether we should record the load
490 bool recordLoad = false;
491
492 /* Debugging */
493 //! Step interval for dumping the local+non-local atoms to pdb
494 int nstDDDump = 0;
495 //! Step interval for duming the DD grid to pdb
496 int nstDDDumpGrid = 0;
497 //! DD debug print level: 0, 1, 2
498 int DD_debug = 0;
499
500 //! The DLB state at the start of the run
501 DlbState initialDlbState = DlbState::offCanTurnOn;
502 };
503
504 /*! \brief Information on how the DD ranks are set up */
505 struct DDRankSetup
506 {
507 /**< The rank ordering */
508 gmx::DdRankOrder rankOrder;
509
510 /**< The number of particle-particle (non PME-only) ranks */
511 int numPPRanks = 0;
512 /**< The DD PP grid */
513 ivec numPPCells = { 0, 0, 0 };
514
515 /* PME and Cartesian communicator stuff */
516 bool usePmeOnlyRanks = false;
517 /**< The number of decomposition dimensions for PME, 0: no PME */
518 int npmedecompdim = 0;
519 /**< The number of ranks doing PME (PP/PME or only PME) */
520 int numRanksDoingPme = 0;
521 /**< The number of PME ranks/domains along x */
522 int npmenodes_x = 0;
523 /**< The number of PME ranks/domains along y */
524 int npmenodes_y = 0;
525 /**< The 1D or 2D PME domain decomposition setup */
526 gmx_ddpme_t ddpme[2];
527 };
528
529 /*! \brief Information on Cartesian MPI setup of the DD ranks */
530 struct CartesianRankSetup
531 {
532 /**< Use Cartesian communication between PP and PME ranks */
533 bool bCartesianPP_PME = false;
534 /**< Cartesian grid for combinted PP+PME ranks */
535 ivec ntot = {};
536 /**< The number of dimensions for the PME setup that are Cartesian */
537 int cartpmedim = 0;
538 /**< The Cartesian index to sim rank conversion, used with bCartesianPP_PME */
539 std::vector<int> ddindex2simnodeid;
540
541 /* The DD particle-particle nodes only */
542 /**< Use a Cartesian communicator for PP */
543 bool bCartesianPP = false;
544 /**< The Cartesian index to DD rank conversion, used with bCartesianPP */
545 std::vector<int> ddindex2ddnodeid;
546 };
547
548 /*! \brief Struct for domain decomposition communication
549 *
550 * This struct contains most information about domain decomposition
551 * communication setup, some communication buffers, some statistics
552 * and also the setup for the communication between particle-particle
553 * and PME only ranks.
554 *
555 * All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
556 * unless stated otherwise.
557 */
558 struct gmx_domdec_comm_t // NOLINT (clang-analyzer-optin.performance.Padding)
559 {
560 /**< Constant parameters that control DD behavior */
561 DDSettings ddSettings;
562
563 /**< Information on how the DD ranks are set up */
564 DDRankSetup ddRankSetup;
565 /**< Information on the Cartesian part of the DD rank setup */
566 CartesianRankSetup cartesianRankSetup;
567
568 /* Charge group / atom sorting */
569 /**< Data structure for cg/atom sorting */
570 std::unique_ptr<gmx_domdec_sort_t> sort;
571
572 //! Centers of mass of local update groups
573 std::unique_ptr<gmx::UpdateGroupsCog> updateGroupsCog;
574
575 /* Data for the optional filtering of communication of atoms for bonded interactions */
576 /**< Links between atoms through bonded interactions */
577 t_blocka* bondedLinks = nullptr;
578
579 /* The DLB state, possible values are defined above */
580 DlbState dlbState;
581 /* With dlbState=DlbState::offCanTurnOn, should we check if to DLB on at the next DD? */
582 gmx_bool bCheckWhetherToTurnDlbOn = false;
583 /* The first DD count since we are running without DLB */
584 int ddPartioningCountFirstDlbOff = 0;
585
586 /* Cell sizes for static load balancing, first index cartesian */
587 real** slb_frac = nullptr;
588
589 /**< Information about the simulated system */
590 DDSystemInfo systemInfo;
591
592 /* The width of the communicated boundaries */
593 /**< Cut-off for multi-body interactions, also 2-body bonded when \p cutoff_mody > \p cutoff */
594 real cutoff_mbody = 0;
595 /**< The minimum guaranteed cell-size, Cartesian indexing */
596 rvec cellsize_min = {};
597 /**< The minimum guaranteed cell-size with dlb=auto */
598 rvec cellsize_min_dlb = {};
599 /**< The lower limit for the DD cell size with DLB */
600 real cellsize_limit = 0;
601 /**< Effectively no NB cut-off limit with DLB for systems without PBC? */
602 gmx_bool bVacDLBNoLimit = false;
603
604 /** With PME load balancing we set limits on DLB */
605 gmx_bool bPMELoadBalDLBLimits = false;
606 /** DLB needs to take into account that we want to allow this maximum
607 * cut-off (for PME load balancing), this could limit cell boundaries.
608 */
609 real PMELoadBal_max_cutoff = 0;
610
611 /**< box lower corner, required with dim's without pbc and -gcom */
612 rvec box0 = {};
613 /**< box size, required with dim's without pbc and -gcom */
614 rvec box_size = {};
615
616 /**< The DD cell lower corner, in triclinic space */
617 rvec cell_x0 = {};
618 /**< The DD cell upper corner, in triclinic space */
619 rvec cell_x1 = {};
620
621 /**< The old \p cell_x0, to check cg displacements */
622 rvec old_cell_x0 = {};
623 /**< The old \p cell_x1, to check cg displacements */
624 rvec old_cell_x1 = {};
625
626 /** The communication setup and charge group boundaries for the zones */
627 gmx_domdec_zones_t zones;
628
629 /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
630 * cell boundaries of neighboring cells for staggered grids when using
631 * dynamic load balancing.
632 */
633 /**< Zone limits for dim 1 with staggered grids */
634 gmx_ddzone_t zone_d1[2];
635 /**< Zone limits for dim 2 with staggered grids */
636 gmx_ddzone_t zone_d2[2][2];
637
638 /** The coordinate/force communication setup and indices */
639 gmx_domdec_comm_dim_t cd[DIM];
640 /** Restricts the maximum number of cells to communicate with in one dimension
641 *
642 * Dynamic load balancing is not permitted to change sizes if it
643 * would violate this restriction. */
644 int maxpulse = 0;
645
646 /** Which cg distribution is stored on the master node,
647 * stored as DD partitioning call count.
648 */
649 int64_t master_cg_ddp_count = 0;
650
651 /** The number of cg's received from the direct neighbors */
652 int zone_ncg1[DD_MAXZONE] = { 0 };
653
654 /** The atom ranges in the local state */
655 DDAtomRanges atomRanges;
656
657 /** Array for signalling if atoms have moved to another domain */
658 std::vector<int> movedBuffer;
659
660 /** Communication int buffer for general use */
661 DDBuffer<int> intBuffer;
662
663 /** Communication rvec buffer for general use */
664 DDBuffer<gmx::RVec> rvecBuffer;
665
666 /* Temporary storage for thread parallel communication setup */
667 /**< Thread-local work data */
668 std::vector<dd_comm_setup_work_t> dth;
669
670 /* Communication buffer only used with multiple grid pulses */
671 /**< Another rvec comm. buffer */
672 DDBuffer<gmx::RVec> rvecBuffer2;
673
674 /* Communication buffers for local redistribution */
675 /**< Charge group flag comm. buffers */
676 std::array<std::vector<int>, DIM * 2> cggl_flag;
677 /**< Charge group center comm. buffers */
678 std::array<std::vector<gmx::RVec>, DIM * 2> cgcm_state;
679
680 /* Cell sizes for dynamic load balancing */
681 std::vector<DDCellsizesWithDlb> cellsizesWithDlb;
682
683 /* Stuff for load communication */
684 /**< The recorded load data */
685 domdec_load_t* load = nullptr;
686 /**< The number of MPI ranks sharing the GPU our rank is using */
687 int nrank_gpu_shared = 0;
688 #if GMX_MPI
689 /**< The MPI load communicator */
690 MPI_Comm* mpi_comm_load = nullptr;
691 /**< The MPI load communicator for ranks sharing a GPU */
692 MPI_Comm mpi_comm_gpu_shared;
693 #endif
694
695 /**< Struct for timing the force load balancing region */
696 BalanceRegion* balanceRegion = nullptr;
697
698 /* Cycle counters over nstlist steps */
699 /**< Total cycles counted */
700 float cycl[ddCyclNr] = {};
701 /**< The number of cycle recordings */
702 int cycl_n[ddCyclNr] = {};
703 /**< The maximum cycle count */
704 float cycl_max[ddCyclNr] = {};
705 /**< Total flops counted */
706 double flop = 0.0;
707 /**< The number of flop recordings */
708 int flop_n = 0;
709 /** How many times did we have load measurements */
710 int n_load_have = 0;
711 /** How many times have we collected the load measurements */
712 int n_load_collect = 0;
713
714 /* Cycle count history for DLB checks */
715 /**< The averaged cycles per step over the last nstlist step before turning on DLB */
716 float cyclesPerStepBeforeDLB = 0;
717 /**< The running average of the cycles per step during DLB */
718 float cyclesPerStepDlbExpAverage = 0;
719 /**< Have we turned off DLB (after turning DLB on)? */
720 bool haveTurnedOffDlb = false;
721 /**< The DD step at which we last measured that DLB off was faster than DLB on, 0 if there was no such step */
722 int64_t dlbSlowerPartitioningCount = 0;
723
724 /* Statistics for atoms */
725 /**< The atoms per range, summed over the steps */
726 double sum_nat[static_cast<int>(DDAtomRanges::Type::Number)] = {};
727
728 /* Statistics for calls and times */
729 /**< The number of partioning calls */
730 int ndecomp = 0;
731 /**< The number of load recordings */
732 int nload = 0;
733 /**< Total MD step time */
734 double load_step = 0.0;
735 /**< Total PP force time */
736 double load_sum = 0.0;
737 /**< Max \p load_sum over the ranks */
738 double load_max = 0.0;
739 /**< Was load balancing limited, per DD dim */
740 ivec load_lim = {};
741 /**< Total time on PP done during PME overlap time */
742 double load_mdf = 0.0;
743 /**< Total time on our PME-only rank */
744 double load_pme = 0.0;
745
746 /** The last partition step */
747 int64_t partition_step = INT_MIN;
748 };
749
750 /*! \brief DD zone permutation
751 *
752 * Zone permutation from the Cartesian x-major/z-minor order to an order
753 * that leads to consecutive charge groups for neighbor searching.
754 * TODO: It should be possible to remove this now that the group scheme is removed
755 */
756 static const int zone_perm[3][4] = { { 0, 0, 0, 0 }, { 1, 0, 0, 0 }, { 3, 0, 1, 2 } };
757
758 /*! \brief DD zone reordering to Cartesian order
759 *
760 * Index to reorder the zone such that the end up in Cartesian order
761 * with dimension index 0 major and dimension index 2 minor.
762 */
763 static const int zone_reorder_cartesian[DD_MAXZONE] = { 0, 1, 3, 2, 5, 4, 6, 7 };
764
765 /* dd_zo and dd_zp3 is set up such that i zones with non-zero
766 * components see only j zones with that component 0.
767 */
768
769 /*! \brief Returns the DD cut-off distance for multi-body interactions */
770 real dd_cutoff_multibody(const gmx_domdec_t* dd);
771
772 /*! \brief Returns the DD cut-off distance for two-body interactions */
773 real dd_cutoff_twobody(const gmx_domdec_t* dd);
774
775 /*! \brief Returns the domain index given the number of domains and the domain coordinates
776 *
777 * This order is required to minimize the coordinate communication in PME
778 * which uses decomposition in the x direction.
779 */
dd_index(const ivec numDomains,const ivec domainCoordinates)780 static inline int dd_index(const ivec numDomains, const ivec domainCoordinates)
781 {
782 return ((domainCoordinates[XX] * numDomains[YY] + domainCoordinates[YY]) * numDomains[ZZ])
783 + domainCoordinates[ZZ];
784 };
785
786 /*! Returns the size of the buffer to hold fractional cell boundaries for DD dimension index dimIndex */
ddCellFractionBufferSize(const gmx_domdec_t * dd,int dimIndex)787 static inline int ddCellFractionBufferSize(const gmx_domdec_t* dd, int dimIndex)
788 {
789 return dd->numCells[dd->dim[dimIndex]] + 1 + dimIndex * 2 + 1 + dimIndex;
790 }
791
792 /*! \brief Maximum number of ranks for using send/recv for state scattering and gathering
793 *
794 * Use separate MPI send and receive commands
795 * when #ranks <= c_maxNumRanksUseSendRecvForScatterAndGather
796 * This saves memory (and some copying for small #ranks).
797 * For high parallelization scatter and gather calls are used.
798 */
799 static constexpr int c_maxNumRanksUseSendRecvForScatterAndGather = 4;
800
801 /*! \brief Make DD cells larger by this factor than the limit to avoid rounding issues */
802 static constexpr double DD_CELL_MARGIN = 1.0001;
803
804 /*! \brief Factor for checking DD cell size limitation during DLB, should be in between 1 and DD_CELL_MARGIN */
805 static constexpr double DD_CELL_MARGIN2 = 1.00005;
806
807 /*! \brief With pressure scaling, keep cell sizes 2% above the limit to allow for some scaling */
808 static constexpr double DD_PRES_SCALE_MARGIN = 1.02;
809
810 /*! \endcond */
811
812 #endif
813