1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2019 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 #ifndef dealii_base_mpi_compute_index_owner_internal_h
17 #define dealii_base_mpi_compute_index_owner_internal_h
18 
19 #include <deal.II/base/config.h>
20 
21 #include <deal.II/base/mpi.h>
22 #include <deal.II/base/mpi_consensus_algorithms.h>
23 
24 DEAL_II_NAMESPACE_OPEN
25 
26 namespace Utilities
27 {
28   namespace MPI
29   {
30     namespace internal
31     {
32       /**
33        * An internal namespace used for Utilities::MPI::compute_index_owner()
34        * and for Utilities::MPI::Partitioner::set_ghost_indices().
35        */
36       namespace ComputeIndexOwner
37       {
38         /**
39          * Specialization of ConsensusAlgorithms::Process for setting up the
40          * Dictionary even if there are ranges in the IndexSet space not owned
41          * by any processes.
42          *
43          * @note Only for internal usage.
44          */
45         class DictionaryPayLoad
46           : public ConsensusAlgorithms::Process<
47               std::pair<types::global_dof_index, types::global_dof_index>,
48               unsigned int>
49         {
50         public:
51           /**
52            * Constructor.
53            */
DictionaryPayLoad(const std::map<unsigned int,std::vector<std::pair<types::global_dof_index,types::global_dof_index>>> & buffers,std::vector<unsigned int> & actually_owning_ranks,const std::pair<types::global_dof_index,types::global_dof_index> & local_range,std::vector<unsigned int> & actually_owning_rank_list)54           DictionaryPayLoad(
55             const std::map<unsigned int,
56                            std::vector<std::pair<types::global_dof_index,
57                                                  types::global_dof_index>>>
58               &                        buffers,
59             std::vector<unsigned int> &actually_owning_ranks,
60             const std::pair<types::global_dof_index, types::global_dof_index>
61               &                        local_range,
62             std::vector<unsigned int> &actually_owning_rank_list)
63             : buffers(buffers)
64             , actually_owning_ranks(actually_owning_ranks)
65             , local_range(local_range)
66             , actually_owning_rank_list(actually_owning_rank_list)
67           {}
68 
69           /**
70            * Implementation of
71            * Utilities::MPI::ConsensusAlgorithms::Process::compute_targets().
72            */
73           virtual std::vector<unsigned int>
compute_targets()74           compute_targets() override
75           {
76             std::vector<unsigned int> targets;
77             for (const auto &rank_pair : buffers)
78               targets.push_back(rank_pair.first);
79 
80             return targets;
81           }
82 
83           /**
84            * Implementation of
85            * Utilities::MPI::ConsensusAlgorithms::Process::create_request().
86            */
87           virtual void
create_request(const unsigned int other_rank,std::vector<std::pair<types::global_dof_index,types::global_dof_index>> & send_buffer)88           create_request(const unsigned int other_rank,
89                          std::vector<std::pair<types::global_dof_index,
90                                                types::global_dof_index>>
91                            &send_buffer) override
92           {
93             send_buffer = this->buffers.at(other_rank);
94           }
95 
96           /**
97            * Implementation of
98            * Utilities::MPI::ConsensusAlgorithms::Process::answer_request().
99            */
100           virtual void
answer_request(const unsigned int other_rank,const std::vector<std::pair<types::global_dof_index,types::global_dof_index>> & buffer_recv,std::vector<unsigned int> & request_buffer)101           answer_request(
102             const unsigned int                                     other_rank,
103             const std::vector<std::pair<types::global_dof_index,
104                                         types::global_dof_index>> &buffer_recv,
105             std::vector<unsigned int> &request_buffer) override
106           {
107             (void)request_buffer; // not needed
108 
109 
110             // process message: loop over all intervals
111             for (auto interval : buffer_recv)
112               {
113 #ifdef DEBUG
114                 for (types::global_dof_index i = interval.first;
115                      i < interval.second;
116                      i++)
117                   Assert(actually_owning_ranks[i - local_range.first] ==
118                            numbers::invalid_unsigned_int,
119                          ExcInternalError());
120                 Assert(interval.first >= local_range.first &&
121                          interval.first < local_range.second,
122                        ExcInternalError());
123                 Assert(interval.second > local_range.first &&
124                          interval.second <= local_range.second,
125                        ExcInternalError());
126 #endif
127                 std::fill(actually_owning_ranks.data() + interval.first -
128                             local_range.first,
129                           actually_owning_ranks.data() + interval.second -
130                             local_range.first,
131                           other_rank);
132               }
133             actually_owning_rank_list.push_back(other_rank);
134           }
135 
136         private:
137           const std::map<unsigned int,
138                          std::vector<std::pair<types::global_dof_index,
139                                                types::global_dof_index>>>
140             &buffers;
141 
142           std::vector<unsigned int> &actually_owning_ranks;
143 
144           const std::pair<types::global_dof_index, types::global_dof_index>
145             &local_range;
146 
147           std::vector<unsigned int> &actually_owning_rank_list;
148         };
149 
150 
151 
152         /**
153          * Dictionary class with basic partitioning in terms of a single
154          * interval of fixed size known to all MPI ranks for two-stage index
155          * lookup.
156          */
157         struct Dictionary
158         {
159           /**
160            * The minimum grain size for the ranges.
161            */
162           static const unsigned int range_minimum_grain_size = 4096;
163 
164           /**
165            * A vector with as many entries as there are dofs in the dictionary
166            * of the current process, and each entry containing the rank of the
167            * owner of that dof in the IndexSet `owned_indices`. This is
168            * queried in the index lookup, so we keep an expanded list.
169            */
170           std::vector<unsigned int> actually_owning_ranks;
171 
172           /**
173            * A sorted vector containing the MPI ranks appearing in
174            * `actually_owning_ranks`.
175            */
176           std::vector<unsigned int> actually_owning_rank_list;
177 
178           /**
179            * The number of unknowns in the dictionary for on each MPI rank
180            * used for the index space splitting. For simplicity of index
181            * lookup without additional communication, this number is the same
182            * on all MPI ranks.
183            */
184           types::global_dof_index dofs_per_process;
185 
186           /**
187            * The local range of the global index space that is represented in
188            * the dictionary, computed from `dofs_per_process`, the current
189            * MPI rank, and range_minimum_grain_size.
190            */
191           std::pair<types::global_dof_index, types::global_dof_index>
192             local_range;
193 
194           /**
195            * The actual size, computed as the minimum of dofs_per_process and
196            * the possible end of the index space. Equivalent to
197            * `local_range.second - local_range.first`.
198            */
199           types::global_dof_index local_size;
200 
201           /**
202            * The global size of the index space.
203            */
204           types::global_dof_index size;
205 
206           /**
207            * The number of ranks the `owned_indices` IndexSet is distributed
208            * among.
209            */
210           unsigned int n_dict_procs_in_owned_indices;
211 
212           /**
213            * A stride to distribute the work more evenly over MPI ranks in
214            * case the grain size forces us to have fewer ranges than we have
215            * processors.
216            */
217           unsigned int stride_small_size;
218 
219           /**
220            * Set up the dictionary by computing the partitioning from the
221            * global size and sending the rank information on locally owned
222            * ranges to the owner of the dictionary part.
223            */
224           void
reinitDictionary225           reinit(const IndexSet &owned_indices, const MPI_Comm &comm)
226           {
227             // 1) set up the partition
228             this->partition(owned_indices, comm);
229 
230 #ifdef DEAL_II_WITH_MPI
231             unsigned int my_rank = this_mpi_process(comm);
232 
233             types::global_dof_index dic_local_received = 0;
234             std::map<unsigned int,
235                      std::vector<std::pair<types::global_dof_index,
236                                            types::global_dof_index>>>
237               buffers;
238 
239             std::fill(actually_owning_ranks.begin(),
240                       actually_owning_ranks.end(),
241                       numbers::invalid_subdomain_id);
242 
243             // 2) collect relevant processes and process local dict entries
244             for (auto interval = owned_indices.begin_intervals();
245                  interval != owned_indices.end_intervals();
246                  ++interval)
247               {
248                 // Due to the granularity of the dictionary, the interval
249                 // might be split into several ranges of processor owner
250                 // ranks. Here, we process the interval by breaking into
251                 // smaller pieces in terms of the dictionary number.
252                 std::pair<types::global_dof_index, types::global_dof_index>
253                                    index_range(*interval->begin(), interval->last() + 1);
254                 const unsigned int owner_last =
255                   dof_to_dict_rank(interval->last());
256                 unsigned int owner_first = numbers::invalid_unsigned_int;
257                 while (owner_first != owner_last)
258                   {
259                     Assert(index_range.first < index_range.second,
260                            ExcInternalError());
261 
262                     owner_first = dof_to_dict_rank(index_range.first);
263 
264                     // this explicitly picks up the formula of
265                     // dof_to_dict_rank, so the two places must be in sync
266                     types::global_dof_index next_index =
267                       std::min(get_index_offset(owner_first + 1),
268                                index_range.second);
269 
270                     Assert(next_index > index_range.first, ExcInternalError());
271 
272 #  ifdef DEBUG
273                     // make sure that the owner is the same on the current
274                     // interval
275                     for (types::global_dof_index i = index_range.first + 1;
276                          i < next_index;
277                          ++i)
278                       AssertDimension(owner_first, dof_to_dict_rank(i));
279 #  endif
280 
281                     // add the interval, either to the local range or into a
282                     // buffer to be sent to another processor
283                     if (owner_first == my_rank)
284                       {
285                         std::fill(actually_owning_ranks.data() +
286                                     index_range.first - local_range.first,
287                                   actually_owning_ranks.data() + next_index -
288                                     local_range.first,
289                                   my_rank);
290                         dic_local_received += next_index - index_range.first;
291                         if (actually_owning_rank_list.empty())
292                           actually_owning_rank_list.push_back(my_rank);
293                       }
294                     else
295                       buffers[owner_first].emplace_back(index_range.first,
296                                                         next_index);
297 
298                     index_range.first = next_index;
299                   }
300               }
301 
302             n_dict_procs_in_owned_indices = buffers.size();
303             std::vector<MPI_Request> request;
304 
305             // Check if index set space is partitioned globally without gaps.
306             if (Utilities::MPI::sum(owned_indices.n_elements(), comm) ==
307                 owned_indices.size())
308               {
309                 // no gaps: setup is simple! Processes send their locally owned
310                 // indices to the dictionary. The dictionary stores the sending
311                 // rank for each index. The dictionary knows exactly
312                 // when it is set up when all indices it is responsible for
313                 // have been processed.
314 
315                 request.reserve(n_dict_procs_in_owned_indices);
316 
317                 // protect the following communication steps using a mutex:
318                 static CollectiveMutex      mutex;
319                 CollectiveMutex::ScopedLock lock(mutex, comm);
320 
321                 const int mpi_tag =
322                   Utilities::MPI::internal::Tags::dictionary_reinit;
323 
324 
325                 // 3) send messages with local dofs to the right dict process
326                 for (const auto &rank_pair : buffers)
327                   {
328                     request.push_back(MPI_Request());
329                     const int ierr = MPI_Isend(rank_pair.second.data(),
330                                                rank_pair.second.size() * 2,
331                                                DEAL_II_DOF_INDEX_MPI_TYPE,
332                                                rank_pair.first,
333                                                mpi_tag,
334                                                comm,
335                                                &request.back());
336                     AssertThrowMPI(ierr);
337                   }
338 
339                 // 4) receive messages until all dofs in dict are processed
340                 while (this->local_size != dic_local_received)
341                   {
342                     // wait for an incoming message
343                     MPI_Status status;
344                     auto       ierr =
345                       MPI_Probe(MPI_ANY_SOURCE, mpi_tag, comm, &status);
346                     AssertThrowMPI(ierr);
347 
348                     // retrieve size of incoming message
349                     int number_amount;
350                     ierr = MPI_Get_count(&status,
351                                          DEAL_II_DOF_INDEX_MPI_TYPE,
352                                          &number_amount);
353                     AssertThrowMPI(ierr);
354 
355                     const auto other_rank = status.MPI_SOURCE;
356                     actually_owning_rank_list.push_back(other_rank);
357 
358                     // receive message
359                     Assert(number_amount % 2 == 0, ExcInternalError());
360                     std::vector<std::pair<types::global_dof_index,
361                                           types::global_dof_index>>
362                       buffer(number_amount / 2);
363                     ierr = MPI_Recv(buffer.data(),
364                                     number_amount,
365                                     DEAL_II_DOF_INDEX_MPI_TYPE,
366                                     status.MPI_SOURCE,
367                                     status.MPI_TAG,
368                                     comm,
369                                     MPI_STATUS_IGNORE);
370                     AssertThrowMPI(ierr);
371                     // process message: loop over all intervals
372                     for (auto interval : buffer)
373                       {
374 #  ifdef DEBUG
375                         for (types::global_dof_index i = interval.first;
376                              i < interval.second;
377                              i++)
378                           Assert(actually_owning_ranks[i - local_range.first] ==
379                                    numbers::invalid_unsigned_int,
380                                  ExcInternalError());
381                         Assert(interval.first >= local_range.first &&
382                                  interval.first < local_range.second,
383                                ExcInternalError());
384                         Assert(interval.second > local_range.first &&
385                                  interval.second <= local_range.second,
386                                ExcInternalError());
387 #  endif
388 
389                         std::fill(actually_owning_ranks.data() +
390                                     interval.first - local_range.first,
391                                   actually_owning_ranks.data() +
392                                     interval.second - local_range.first,
393                                   other_rank);
394                         dic_local_received += interval.second - interval.first;
395                       }
396                   }
397               }
398             else
399               {
400                 // with gap: use a ConsensusAlgorithm to determine when all
401                 // dictionaries have been set up.
402 
403                 // 3/4) use a ConsensusAlgorithm to send messages with local
404                 // dofs to the right dict process
405                 DictionaryPayLoad temp(buffers,
406                                        actually_owning_ranks,
407                                        local_range,
408                                        actually_owning_rank_list);
409 
410                 ConsensusAlgorithms::Selector<
411                   std::pair<types::global_dof_index, types::global_dof_index>,
412                   unsigned int>
413                   consensus_algo(temp, comm);
414                 consensus_algo.run();
415               }
416 
417             std::sort(actually_owning_rank_list.begin(),
418                       actually_owning_rank_list.end());
419 
420             for (unsigned int i = 1; i < actually_owning_rank_list.size(); ++i)
421               Assert(actually_owning_rank_list[i] >
422                        actually_owning_rank_list[i - 1],
423                      ExcInternalError());
424 
425             // 5) make sure that all messages have been sent
426             if (request.size() > 0)
427               {
428                 const int ierr = MPI_Waitall(request.size(),
429                                              request.data(),
430                                              MPI_STATUSES_IGNORE);
431                 AssertThrowMPI(ierr);
432               }
433 
434 #else
435             (void)owned_indices;
436             (void)comm;
437 #endif
438           }
439 
440           /**
441            * Translate a global dof index to the MPI rank in the dictionary
442            * using `dofs_per_process`. We multiply by `stride_small_size` to
443            * ensure a balance over the MPI ranks due to the grain size.
444            */
445           unsigned int
dof_to_dict_rankDictionary446           dof_to_dict_rank(const types::global_dof_index i)
447           {
448             // note: this formula is also explicitly used in
449             // get_index_offset(), so keep the two in sync
450             return (i / dofs_per_process) * stride_small_size;
451           }
452 
453           /**
454            * Given an MPI rank id of an arbitrary processor, return the index
455            * offset where the local range of that processor begins.
456            */
457           types::global_dof_index
get_index_offsetDictionary458           get_index_offset(const unsigned int rank)
459           {
460             return std::min(dofs_per_process *
461                               static_cast<types::global_dof_index>(
462                                 (rank + stride_small_size - 1) /
463                                 stride_small_size),
464                             size);
465           }
466 
467           /**
468            * Given the rank in the owned indices from `actually_owning_ranks`,
469            * this returns the index of the rank in the
470            * `actually_owning_rank_list`.
471            */
472           unsigned int
473           get_owning_rank_index(const unsigned int rank_in_owned_indices,
474                                 const unsigned int guess = 0)
475           {
476             AssertIndexRange(guess, actually_owning_rank_list.size());
477             if (actually_owning_rank_list[guess] == rank_in_owned_indices)
478               return guess;
479             else
480               {
481                 auto it = std::lower_bound(actually_owning_rank_list.begin(),
482                                            actually_owning_rank_list.end(),
483                                            rank_in_owned_indices);
484                 Assert(it != actually_owning_rank_list.end(),
485                        ExcInternalError());
486                 Assert(*it == rank_in_owned_indices, ExcInternalError());
487                 return it - actually_owning_rank_list.begin();
488               }
489           }
490 
491         private:
492           /**
493            * Compute the partition from the global size of the index space and
494            * the number of ranks.
495            */
496           void
partitionDictionary497           partition(const IndexSet &owned_indices, const MPI_Comm &comm)
498           {
499 #ifdef DEAL_II_WITH_MPI
500             const unsigned int n_procs = n_mpi_processes(comm);
501             const unsigned int my_rank = this_mpi_process(comm);
502 
503             size = owned_indices.size();
504 
505             Assert(size > 0, ExcNotImplemented());
506 
507             dofs_per_process = (size + n_procs - 1) / n_procs;
508             if (dofs_per_process < range_minimum_grain_size)
509               {
510                 dofs_per_process  = range_minimum_grain_size;
511                 stride_small_size = dofs_per_process * n_procs / size;
512               }
513             else
514               stride_small_size = 1;
515             local_range.first  = get_index_offset(my_rank);
516             local_range.second = get_index_offset(my_rank + 1);
517 
518             local_size = local_range.second - local_range.first;
519 
520             actually_owning_ranks = {};
521             actually_owning_ranks.resize(local_size,
522                                          numbers::invalid_unsigned_int);
523 #else
524             (void)owned_indices;
525             (void)comm;
526 #endif
527           }
528         };
529 
530 
531 
532         /**
533          * Specialization of ConsensusAlgorithms::Process for the context of
534          * Utilities::MPI::compute_index_owner() and
535          * Utilities::MPI::Partitioner::set_ghost_indices() with additional
536          * payload.
537          */
538         class ConsensusAlgorithmsPayload
539           : public ConsensusAlgorithms::Process<
540               std::pair<types::global_dof_index, types::global_dof_index>,
541               unsigned int>
542         {
543         public:
544           /**
545            * Constructor.
546            */
547           ConsensusAlgorithmsPayload(const IndexSet &owned_indices,
548                                      const IndexSet &indices_to_look_up,
549                                      const MPI_Comm &comm,
550                                      std::vector<unsigned int> &owning_ranks,
551                                      const bool track_index_requests = false)
owned_indices(owned_indices)552             : owned_indices(owned_indices)
553             , indices_to_look_up(indices_to_look_up)
554             , comm(comm)
555             , my_rank(this_mpi_process(comm))
556             , n_procs(n_mpi_processes(comm))
557             , track_index_requests(track_index_requests)
558             , owning_ranks(owning_ranks)
559           {
560             dict.reinit(owned_indices, comm);
561             requesters.resize(dict.actually_owning_rank_list.size());
562           }
563 
564           /**
565            * The index space which describes the locally owned space.
566            */
567           const IndexSet &owned_indices;
568 
569           /**
570            * The indices which are "ghosts" on a given rank and should be
571            * looked up in terms of their owner rank from owned_indices.
572            */
573           const IndexSet &indices_to_look_up;
574 
575           /**
576            * The underlying MPI communicator.
577            */
578           const MPI_Comm comm;
579 
580           /**
581            * The present MPI rank.
582            */
583           const unsigned int my_rank;
584 
585           /**
586            * The total number of ranks participating in the MPI communicator
587            * `comm`.
588            */
589           const unsigned int n_procs;
590 
591           /**
592            * Controls whether the origin of ghost owner should also be
593            * stored. If true, it will be added into `requesters` and can be
594            * queried by `get_requesters()`.
595            */
596           const bool track_index_requests;
597 
598           /**
599            * The result of the index owner computation: To each index
600            * contained in `indices_to_look_up`, this vector contains the MPI
601            * rank of the owner in `owned_indices`.
602            */
603           std::vector<unsigned int> &owning_ranks;
604 
605           /**
606            * Keeps track of the origin of the requests. The layout of the data
607            * structure is as follows: The outermost vector has as many entries
608            * as Dictionary::actually_owning_rank_list and represents the
609            * information we should send back to the owners from the present
610            * dictionary entry. The second vector then collects a list of MPI
611            * ranks that have requested data, using the rank in the first pair
612            * entry and a list of index ranges as the second entry.
613            */
614           std::vector<std::vector<
615             std::pair<unsigned int,
616                       std::vector<std::pair<unsigned int, unsigned int>>>>>
617             requesters;
618 
619           /**
620            * The dictionary handling the requests.
621            */
622           Dictionary dict;
623 
624           /**
625            * Array to collect the indices to look up, sorted by the rank in
626            * the dictionary.
627            */
628           std::map<unsigned int, std::vector<types::global_dof_index>>
629             indices_to_look_up_by_dict_rank;
630 
631           /**
632            * The field where the indices for incoming data from the process
633            * are stored.
634            */
635           std::map<unsigned int, std::vector<unsigned int>> recv_indices;
636 
637           /**
638            * Implementation of
639            * Utilities::MPI::ConsensusAlgorithms::Process::answer_request(),
640            * adding the owner of a particular index in request_buffer (and
641            * keeping track of who requested a particular index in case that
642            * information is also desired).
643            */
644           virtual void
answer_request(const unsigned int other_rank,const std::vector<std::pair<types::global_dof_index,types::global_dof_index>> & buffer_recv,std::vector<unsigned int> & request_buffer)645           answer_request(
646             const unsigned int                                     other_rank,
647             const std::vector<std::pair<types::global_dof_index,
648                                         types::global_dof_index>> &buffer_recv,
649             std::vector<unsigned int> &request_buffer) override
650           {
651             unsigned int owner_index = 0;
652             for (const auto &interval : buffer_recv)
653               for (auto i = interval.first; i < interval.second; ++i)
654                 {
655                   const unsigned int actual_owner =
656                     dict.actually_owning_ranks[i - dict.local_range.first];
657                   request_buffer.push_back(actual_owner);
658 
659                   if (track_index_requests)
660                     append_index_origin(i, owner_index, other_rank);
661                 }
662           }
663 
664           /**
665            * Implementation of
666            * Utilities::MPI::ConsensusAlgorithms::Process::compute_targets().
667            */
668           virtual std::vector<unsigned int>
compute_targets()669           compute_targets() override
670           {
671             std::vector<unsigned int> targets;
672 
673             // 1) collect relevant processes and process local dict entries
674             {
675               unsigned int index       = 0;
676               unsigned int owner_index = 0;
677               for (auto i : indices_to_look_up)
678                 {
679                   unsigned int other_rank = dict.dof_to_dict_rank(i);
680                   if (other_rank == my_rank)
681                     {
682                       owning_ranks[index] =
683                         dict.actually_owning_ranks[i - dict.local_range.first];
684                       if (track_index_requests)
685                         append_index_origin(i, owner_index, my_rank);
686                     }
687                   else if (targets.empty() || targets.back() != other_rank)
688                     targets.push_back(other_rank);
689                   index++;
690                 }
691             }
692 
693 
694             for (auto i : targets)
695               {
696                 recv_indices[i]                    = {};
697                 indices_to_look_up_by_dict_rank[i] = {};
698               }
699 
700             // 3) collect indices for each process
701             {
702               unsigned int index = 0;
703               for (auto i : indices_to_look_up)
704                 {
705                   unsigned int other_rank = dict.dof_to_dict_rank(i);
706                   if (other_rank != my_rank)
707                     {
708                       recv_indices[other_rank].push_back(index);
709                       indices_to_look_up_by_dict_rank[other_rank].push_back(i);
710                     }
711                   index++;
712                 }
713             }
714 
715             Assert(targets.size() == recv_indices.size() &&
716                      targets.size() == indices_to_look_up_by_dict_rank.size(),
717                    ExcMessage("Size does not match!"));
718 
719             return targets;
720           }
721 
722           /**
723            * Implementation of
724            * Utilities::MPI::ConsensusAlgorithms::Process::create_request().
725            */
726           virtual void
create_request(const unsigned int other_rank,std::vector<std::pair<types::global_dof_index,types::global_dof_index>> & send_buffer)727           create_request(const unsigned int other_rank,
728                          std::vector<std::pair<types::global_dof_index,
729                                                types::global_dof_index>>
730                            &send_buffer) override
731           {
732             // create index set and compress data to be sent
733             auto &   indices_i = indices_to_look_up_by_dict_rank[other_rank];
734             IndexSet is(dict.size);
735             is.add_indices(indices_i.begin(), indices_i.end());
736             is.compress();
737 
738             for (auto interval = is.begin_intervals();
739                  interval != is.end_intervals();
740                  ++interval)
741               send_buffer.emplace_back(*interval->begin(),
742                                        interval->last() + 1);
743           }
744 
745           /**
746            * Implementation of
747            * Utilities::MPI::ConsensusAlgorithms::Process::prepare_buffer_for_answer().
748            */
749           virtual void
prepare_buffer_for_answer(const unsigned int other_rank,std::vector<unsigned int> & recv_buffer)750           prepare_buffer_for_answer(
751             const unsigned int         other_rank,
752             std::vector<unsigned int> &recv_buffer) override
753           {
754             recv_buffer.resize(recv_indices[other_rank].size());
755           }
756 
757           /**
758            * Implementation of
759            * Utilities::MPI::ConsensusAlgorithms::Process::read_answer().
760            */
761           virtual void
read_answer(const unsigned int other_rank,const std::vector<unsigned int> & recv_buffer)762           read_answer(const unsigned int               other_rank,
763                       const std::vector<unsigned int> &recv_buffer) override
764           {
765             Assert(recv_indices[other_rank].size() == recv_buffer.size(),
766                    ExcMessage("Sizes do not match!"));
767 
768             for (unsigned int j = 0; j < recv_indices[other_rank].size(); j++)
769               owning_ranks[recv_indices[other_rank][j]] = recv_buffer[j];
770           }
771 
772           /**
773            * Resolve the origin of the requests by sending the information
774            * accumulated in terms of the dictionary owners during the run of
775            * the consensus algorithm back to the owner in the original
776            * IndexSet. This requires some point-to-point communication.
777            *
778            * @return Map of processors and associated ranges of indices that
779            *         are requested from the current rank
780            */
781           std::map<unsigned int, IndexSet>
get_requesters()782           get_requesters()
783           {
784             Assert(track_index_requests,
785                    ExcMessage("Must enable index range tracking in "
786                               "constructor of ConsensusAlgorithmProcess"));
787 
788             std::map<unsigned int, dealii::IndexSet> requested_indices;
789 
790 #ifdef DEAL_II_WITH_MPI
791 
792             static CollectiveMutex      mutex;
793             CollectiveMutex::ScopedLock lock(mutex, comm);
794 
795             const int mpi_tag = Utilities::MPI::internal::Tags::
796               consensus_algorithm_payload_get_requesters;
797 
798             // reserve enough slots for the requests ahead; depending on
799             // whether the owning rank is one of the requesters or not, we
800             // might have one less requests to execute, so fill the requests
801             // on demand.
802             std::vector<MPI_Request> send_requests;
803             send_requests.reserve(requesters.size());
804 
805             // We use an integer vector for the data exchange. Since we send
806             // data associated to intervals with different requesters, we will
807             // need to send (a) the MPI rank of the requester, (b) the number
808             // of intervals directed to this requester, and (c) a list of
809             // intervals, i.e., two integers per interval. The number of items
810             // sent in total can be deduced both via the MPI status message at
811             // the receiver site as well as be counting the buckets from
812             // different requesters.
813             std::vector<std::vector<unsigned int>> send_data(requesters.size());
814             for (unsigned int i = 0; i < requesters.size(); ++i)
815               {
816                 // special code for our own indices
817                 if (dict.actually_owning_rank_list[i] == my_rank)
818                   {
819                     for (const auto &j : requesters[i])
820                       {
821                         const types::global_dof_index index_offset =
822                           dict.get_index_offset(my_rank);
823                         IndexSet &my_index_set = requested_indices[j.first];
824                         my_index_set.set_size(owned_indices.size());
825                         for (const auto &interval : j.second)
826                           my_index_set.add_range(index_offset + interval.first,
827                                                  index_offset +
828                                                    interval.second);
829                       }
830                   }
831                 else
832                   {
833                     for (const auto &j : requesters[i])
834                       {
835                         send_data[i].push_back(j.first);
836                         send_data[i].push_back(j.second.size());
837                         for (const auto &interval : j.second)
838                           {
839                             send_data[i].push_back(interval.first);
840                             send_data[i].push_back(interval.second);
841                           }
842                       }
843                     send_requests.push_back(MPI_Request());
844                     const int ierr =
845                       MPI_Isend(send_data[i].data(),
846                                 send_data[i].size(),
847                                 MPI_UNSIGNED,
848                                 dict.actually_owning_rank_list[i],
849                                 mpi_tag,
850                                 comm,
851                                 &send_requests.back());
852                     AssertThrowMPI(ierr);
853                   }
854               }
855 
856             // receive the data
857             for (unsigned int c = 0; c < dict.n_dict_procs_in_owned_indices;
858                  ++c)
859               {
860                 // wait for an incoming message
861                 MPI_Status   status;
862                 unsigned int ierr =
863                   MPI_Probe(MPI_ANY_SOURCE, mpi_tag, comm, &status);
864                 AssertThrowMPI(ierr);
865 
866                 // retrieve size of incoming message
867                 int number_amount;
868                 ierr = MPI_Get_count(&status, MPI_UNSIGNED, &number_amount);
869                 AssertThrowMPI(ierr);
870 
871                 // receive message
872                 Assert(number_amount % 2 == 0, ExcInternalError());
873                 std::vector<std::pair<unsigned int, unsigned int>> buffer(
874                   number_amount / 2);
875                 ierr = MPI_Recv(buffer.data(),
876                                 number_amount,
877                                 MPI_UNSIGNED,
878                                 status.MPI_SOURCE,
879                                 status.MPI_TAG,
880                                 comm,
881                                 &status);
882                 AssertThrowMPI(ierr);
883 
884                 // unpack the message and translate the dictionary-local
885                 // indices coming via MPI to the global index range
886                 const types::global_dof_index index_offset =
887                   dict.get_index_offset(status.MPI_SOURCE);
888                 unsigned int offset = 0;
889                 while (offset < buffer.size())
890                   {
891                     AssertIndexRange(offset + buffer[offset].second,
892                                      buffer.size());
893 
894                     IndexSet my_index_set(owned_indices.size());
895                     for (unsigned int i = offset + 1;
896                          i < offset + buffer[offset].second + 1;
897                          ++i)
898                       my_index_set.add_range(index_offset + buffer[i].first,
899                                              index_offset + buffer[i].second);
900 
901                     // the underlying index set is able to merge ranges coming
902                     // from different ranks due to the partitioning in the
903                     // dictionary
904                     IndexSet &index_set =
905                       requested_indices[buffer[offset].first];
906                     if (index_set.size() == 0)
907                       index_set.set_size(owned_indices.size());
908                     index_set.add_indices(my_index_set);
909 
910                     offset += buffer[offset].second + 1;
911                   }
912                 AssertDimension(offset, buffer.size());
913               }
914 
915             if (send_requests.size() > 0)
916               {
917                 const auto ierr = MPI_Waitall(send_requests.size(),
918                                               send_requests.data(),
919                                               MPI_STATUSES_IGNORE);
920                 AssertThrowMPI(ierr);
921               }
922 
923 
924 #  ifdef DEBUG
925             for (const auto &it : requested_indices)
926               {
927                 IndexSet copy_set = it.second;
928                 copy_set.subtract_set(owned_indices);
929                 Assert(copy_set.n_elements() == 0,
930                        ExcInternalError(
931                          "The indices requested from the current "
932                          "MPI rank should be locally owned here!"));
933               }
934 #  endif
935 
936 #endif // DEAL_II_WITH_MPI
937 
938             return requested_indices;
939           }
940 
941         private:
942           /**
943            * Stores the index request in the `requesters` field. We first find
944            * out the owner of the index that was requested (using the guess in
945            * `owner_index`, as we typically might look up on the same rank
946            * several times in a row, which avoids the binary search in
947            * Dictionary::get_owning_rank_index(). Once we know the rank of the
948            * owner, we the vector entry with the rank of the request. Here, we
949            * utilize the fact that requests are processed rank-by-rank, so we
950            * can simply look at the end of the vector if there is already some
951            * data stored or not. Finally, we build ranges, again using that
952            * the index list is sorted and we therefore only need to append at
953            * the end.
954            */
955           void
append_index_origin(const types::global_dof_index index,unsigned int & owner_index,const unsigned int rank_of_request)956           append_index_origin(const types::global_dof_index index,
957                               unsigned int &                owner_index,
958                               const unsigned int            rank_of_request)
959           {
960             // remember who requested which index. We want to use an
961             // std::vector with simple addressing, via a good guess from the
962             // preceding index, rather than std::map, because this is an inner
963             // loop and it avoids the map lookup in every iteration
964             const unsigned int rank_of_owner =
965               dict.actually_owning_ranks[index - dict.local_range.first];
966             owner_index =
967               dict.get_owning_rank_index(rank_of_owner, owner_index);
968             if (requesters[owner_index].empty() ||
969                 requesters[owner_index].back().first != rank_of_request)
970               requesters[owner_index].emplace_back(
971                 rank_of_request,
972                 std::vector<std::pair<unsigned int, unsigned int>>());
973             if (requesters[owner_index].back().second.empty() ||
974                 requesters[owner_index].back().second.back().second !=
975                   index - dict.local_range.first)
976               requesters[owner_index].back().second.emplace_back(
977                 index - dict.local_range.first,
978                 index - dict.local_range.first + 1);
979             else
980               ++requesters[owner_index].back().second.back().second;
981           }
982         };
983 
984       } // namespace ComputeIndexOwner
985     }   // namespace internal
986   }     // namespace MPI
987 } // namespace Utilities
988 
989 DEAL_II_NAMESPACE_CLOSE
990 
991 #endif
992