1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 #ifndef dealii_mpi_noncontiguous_partitioner_templates_h
17 #define dealii_mpi_noncontiguous_partitioner_templates_h
18 
19 #include <deal.II/base/config.h>
20 
21 #include <deal.II/base/mpi.h>
22 #include <deal.II/base/mpi.templates.h>
23 #include <deal.II/base/mpi_compute_index_owner_internal.h>
24 #include <deal.II/base/mpi_noncontiguous_partitioner.h>
25 
26 #include <deal.II/lac/communication_pattern_base.h>
27 #include <deal.II/lac/vector_space_vector.h>
28 
29 
30 DEAL_II_NAMESPACE_OPEN
31 
32 namespace Utilities
33 {
34   namespace MPI
35   {
NoncontiguousPartitioner(const IndexSet & indexset_has,const IndexSet & indexset_want,const MPI_Comm & communicator)36     NoncontiguousPartitioner::NoncontiguousPartitioner(
37       const IndexSet &indexset_has,
38       const IndexSet &indexset_want,
39       const MPI_Comm &communicator)
40     {
41       this->reinit(indexset_has, indexset_want, communicator);
42     }
43 
44 
45 
NoncontiguousPartitioner(const std::vector<types::global_dof_index> & indices_has,const std::vector<types::global_dof_index> & indices_want,const MPI_Comm & communicator)46     NoncontiguousPartitioner::NoncontiguousPartitioner(
47       const std::vector<types::global_dof_index> &indices_has,
48       const std::vector<types::global_dof_index> &indices_want,
49       const MPI_Comm &                            communicator)
50     {
51       this->reinit(indices_has, indices_want, communicator);
52     }
53 
54 
55 
56     std::pair<unsigned int, unsigned int>
n_targets()57     NoncontiguousPartitioner::n_targets()
58     {
59       return {send_ranks.size(), recv_ranks.size()};
60     }
61 
62 
63 
64     types::global_dof_index
memory_consumption()65     NoncontiguousPartitioner::memory_consumption()
66     {
67       return MemoryConsumption::memory_consumption(send_ranks) +
68              MemoryConsumption::memory_consumption(send_ptr) +
69              MemoryConsumption::memory_consumption(send_indices) +
70              MemoryConsumption::memory_consumption(recv_ranks) +
71              MemoryConsumption::memory_consumption(recv_ptr) +
72              MemoryConsumption::memory_consumption(recv_indices) +
73              MemoryConsumption::memory_consumption(buffers) +
74              MemoryConsumption::memory_consumption(requests);
75     }
76 
77 
78 
79     const MPI_Comm &
get_mpi_communicator()80     NoncontiguousPartitioner::get_mpi_communicator() const
81     {
82       return communicator;
83     }
84 
85 
86 
87     void
reinit(const IndexSet & indexset_has,const IndexSet & indexset_want,const MPI_Comm & communicator)88     NoncontiguousPartitioner::reinit(const IndexSet &indexset_has,
89                                      const IndexSet &indexset_want,
90                                      const MPI_Comm &communicator)
91     {
92       this->communicator = communicator;
93 
94       // clean up
95       send_ranks.clear();
96       send_ptr.clear();
97       send_indices.clear();
98       recv_ranks.clear();
99       recv_ptr.clear();
100       recv_indices.clear();
101       buffers.clear();
102       requests.clear();
103 
104       // setup communication pattern
105       std::vector<unsigned int> owning_ranks_of_ghosts(
106         indexset_want.n_elements());
107 
108       // set up dictionary
109       Utilities::MPI::internal::ComputeIndexOwner::ConsensusAlgorithmsPayload
110         process(indexset_has,
111                 indexset_want,
112                 communicator,
113                 owning_ranks_of_ghosts,
114                 true);
115 
116       Utilities::MPI::ConsensusAlgorithms::Selector<
117         std::pair<types::global_dof_index, types::global_dof_index>,
118         unsigned int>
119         consensus_algorithm(process, communicator);
120       consensus_algorithm.run();
121 
122       // setup map of processes from where this rank will receive values
123       {
124         std::map<unsigned int, std::vector<types::global_dof_index>> recv_map;
125 
126         for (const auto &owner : owning_ranks_of_ghosts)
127           recv_map[owner] = std::vector<types::global_dof_index>();
128 
129         for (types::global_dof_index i = 0; i < owning_ranks_of_ghosts.size();
130              i++)
131           recv_map[owning_ranks_of_ghosts[i]].push_back(i);
132 
133         recv_ptr.push_back(recv_indices.size() /*=0*/);
134         for (const auto &target_with_indexset : recv_map)
135           {
136             recv_ranks.push_back(target_with_indexset.first);
137 
138             for (const auto &cell_index : target_with_indexset.second)
139               recv_indices.push_back(cell_index);
140 
141             recv_ptr.push_back(recv_indices.size());
142           }
143       }
144 
145       {
146         const auto targets_with_indexset = process.get_requesters();
147 
148         send_ptr.push_back(recv_ptr.back());
149         for (const auto &target_with_indexset : targets_with_indexset)
150           {
151             send_ranks.push_back(target_with_indexset.first);
152 
153             for (const auto &cell_index : target_with_indexset.second)
154               send_indices.push_back(indexset_has.index_within_set(cell_index));
155 
156             send_ptr.push_back(send_indices.size() + recv_ptr.back());
157           }
158       }
159     }
160 
161 
162 
163     void
reinit(const std::vector<types::global_dof_index> & indices_has,const std::vector<types::global_dof_index> & indices_want,const MPI_Comm & communicator)164     NoncontiguousPartitioner::reinit(
165       const std::vector<types::global_dof_index> &indices_has,
166       const std::vector<types::global_dof_index> &indices_want,
167       const MPI_Comm &                            communicator)
168     {
169       // step 0) clean vectors from numbers::invalid_dof_index (indicating
170       //         padding)
171       std::vector<types::global_dof_index> indices_has_clean;
172       indices_has_clean.reserve(indices_has.size());
173 
174       for (const auto &i : indices_has)
175         if (i != numbers::invalid_dof_index)
176           indices_has_clean.push_back(i);
177 
178       std::vector<types::global_dof_index> indices_want_clean;
179       indices_want_clean.reserve(indices_want.size());
180 
181       for (const auto &i : indices_want)
182         if (i != numbers::invalid_dof_index)
183           indices_want_clean.push_back(i);
184 
185       // step 0) determine "number of degrees of freedom" needed for IndexSet
186       const types::global_dof_index local_n_dofs_has =
187         indices_has_clean.empty() ?
188           0 :
189           (*std::max_element(indices_has_clean.begin(),
190                              indices_has_clean.end()) +
191            1);
192 
193       const types::global_dof_index local_n_dofs_want =
194         indices_want_clean.empty() ?
195           0 :
196           (*std::max_element(indices_want_clean.begin(),
197                              indices_want_clean.end()) +
198            1);
199 
200       const types::global_dof_index n_dofs =
201         Utilities::MPI::max(std::max(local_n_dofs_has, local_n_dofs_want),
202                             communicator);
203 
204       // step 1) convert vectors to indexsets (sorted!)
205       IndexSet index_set_has(n_dofs);
206       index_set_has.add_indices(indices_has_clean.begin(),
207                                 indices_has_clean.end());
208 
209       IndexSet index_set_want(n_dofs);
210       index_set_want.add_indices(indices_want_clean.begin(),
211                                  indices_want_clean.end());
212 
213       // step 2) setup internal data structures with indexset
214       this->reinit(index_set_has, index_set_want, communicator);
215 
216       // step 3) fix inner data structures so that it is sorted as
217       //         in the original vector
218       {
219         std::vector<types::global_dof_index> temp_map_send(
220           index_set_has.n_elements());
221 
222         for (types::global_dof_index i = 0; i < indices_has.size(); i++)
223           if (indices_has[i] != numbers::invalid_dof_index)
224             temp_map_send[index_set_has.index_within_set(indices_has[i])] = i;
225 
226         for (auto &i : send_indices)
227           i = temp_map_send[i];
228       }
229 
230       {
231         std::vector<types::global_dof_index> temp_map_recv(
232           index_set_want.n_elements());
233 
234         for (types::global_dof_index i = 0; i < indices_want.size(); i++)
235           if (indices_want[i] != numbers::invalid_dof_index)
236             temp_map_recv[index_set_want.index_within_set(indices_want[i])] = i;
237 
238         for (auto &i : recv_indices)
239           i = temp_map_recv[i];
240       }
241     }
242 
243 
244     template <typename Number>
245     void
export_to_ghosted_array(const ArrayView<const Number> & src,const ArrayView<Number> & dst)246     NoncontiguousPartitioner::export_to_ghosted_array(
247       const ArrayView<const Number> &src,
248       const ArrayView<Number> &      dst) const
249     {
250       // allocate internal memory since needed
251       if (requests.size() != send_ranks.size() + recv_ranks.size())
252         requests.resize(send_ranks.size() + recv_ranks.size());
253 
254       if (this->buffers.size() != send_ptr.back() * sizeof(Number))
255         this->buffers.resize(send_ptr.back() * sizeof(Number), 0);
256 
257       // perform actual exchange
258       this->template export_to_ghosted_array<Number>(
259         0,
260         src,
261         ArrayView<Number>(reinterpret_cast<Number *>(this->buffers.data()),
262                           send_ptr.back()),
263         dst,
264         this->requests);
265     }
266 
267 
268     template <typename Number>
269     void
export_to_ghosted_array(const unsigned int communication_channel,const ArrayView<const Number> & locally_owned_array,const ArrayView<Number> & temporary_storage,const ArrayView<Number> & ghost_array,std::vector<MPI_Request> & requests)270     NoncontiguousPartitioner::export_to_ghosted_array(
271       const unsigned int             communication_channel,
272       const ArrayView<const Number> &locally_owned_array,
273       const ArrayView<Number> &      temporary_storage,
274       const ArrayView<Number> &      ghost_array,
275       std::vector<MPI_Request> &     requests) const
276     {
277       this->template export_to_ghosted_array_start<Number>(
278         communication_channel,
279         locally_owned_array,
280         temporary_storage,
281         requests);
282       this->template export_to_ghosted_array_finish<Number>(temporary_storage,
283                                                             ghost_array,
284                                                             requests);
285     }
286 
287 
288 
289     template <typename Number>
290     void
export_to_ghosted_array_start(const unsigned int communication_channel,const ArrayView<const Number> & src,const ArrayView<Number> & buffers,std::vector<MPI_Request> & requests)291     NoncontiguousPartitioner::export_to_ghosted_array_start(
292       const unsigned int             communication_channel,
293       const ArrayView<const Number> &src,
294       const ArrayView<Number> &      buffers,
295       std::vector<MPI_Request> &     requests) const
296     {
297 #ifndef DEAL_II_WITH_MPI
298       (void)communication_channel;
299       (void)src;
300       (void)buffers;
301       (void)requests;
302       Assert(false, ExcNeedsMPI());
303 #else
304       AssertIndexRange(communication_channel, 10);
305 
306       const auto tag =
307         communication_channel +
308         internal::Tags::noncontiguous_partitioner_update_ghost_values;
309 
310       // post recv
311       for (types::global_dof_index i = 0; i < recv_ranks.size(); i++)
312         {
313           const auto ierr =
314             MPI_Irecv(buffers.data() + recv_ptr[i],
315                       recv_ptr[i + 1] - recv_ptr[i],
316                       Utilities::MPI::internal::mpi_type_id(buffers.data()),
317                       recv_ranks[i],
318                       tag,
319                       communicator,
320                       &requests[i + send_ranks.size()]);
321           AssertThrowMPI(ierr);
322         }
323 
324       auto src_iterator = src.begin();
325 
326       // post send
327       for (types::global_dof_index i = 0, k = 0; i < send_ranks.size(); i++)
328         {
329           // collect data to be send
330           for (types::global_dof_index j = send_ptr[i]; j < send_ptr[i + 1];
331                j++)
332             buffers[j] = src_iterator[send_indices[k++]];
333 
334           // send data
335           const auto ierr =
336             MPI_Isend(buffers.data() + send_ptr[i],
337                       send_ptr[i + 1] - send_ptr[i],
338                       Utilities::MPI::internal::mpi_type_id(buffers.data()),
339                       send_ranks[i],
340                       tag,
341                       communicator,
342                       &requests[i]);
343           AssertThrowMPI(ierr);
344         }
345 #endif
346     }
347 
348 
349 
350     template <typename Number>
351     void
export_to_ghosted_array_finish(const ArrayView<const Number> & buffers,const ArrayView<Number> & dst,std::vector<MPI_Request> & requests)352     NoncontiguousPartitioner::export_to_ghosted_array_finish(
353       const ArrayView<const Number> &buffers,
354       const ArrayView<Number> &      dst,
355       std::vector<MPI_Request> &     requests) const
356     {
357 #ifndef DEAL_II_WITH_MPI
358       (void)buffers;
359       (void)dst;
360       (void)requests;
361       Assert(false, ExcNeedsMPI());
362 #else
363       auto dst_iterator = dst.begin();
364 
365       // receive all data packages and copy data from buffers
366       for (types::global_dof_index proc = 0; proc < recv_ranks.size(); proc++)
367         {
368           int        i;
369           MPI_Status status;
370           const auto ierr = MPI_Waitany(recv_ranks.size(),
371                                         requests.data() + send_ranks.size(),
372                                         &i,
373                                         &status);
374           AssertThrowMPI(ierr);
375 
376           for (types::global_dof_index j = recv_ptr[i], c = 0;
377                j < recv_ptr[i + 1];
378                j++)
379             dst_iterator[recv_indices[j]] = buffers[recv_ptr[i] + c++];
380         }
381 
382       // wait that all data packages have been sent
383       const auto ierr =
384         MPI_Waitall(send_ranks.size(), requests.data(), MPI_STATUSES_IGNORE);
385       AssertThrowMPI(ierr);
386 #endif
387     }
388 
389   } // namespace MPI
390 } // namespace Utilities
391 
392 DEAL_II_NAMESPACE_CLOSE
393 
394 #endif
395