1 // -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 // vi: set et ts=8 sw=2 sts=2:
3 #ifndef DUNE_PDELAB_BACKEND_ISTL_PARALLELHELPER_HH
4 #define DUNE_PDELAB_BACKEND_ISTL_PARALLELHELPER_HH
5 
6 #include <limits>
7 
8 #include <dune/common/parallel/mpihelper.hh>
9 #include <dune/common/stdstreams.hh>
10 #include <dune/common/typetraits.hh>
11 
12 #if HAVE_UG && PDELAB_SEQUENTIAL_UG
13 // We need the UGGrid declaration for the assertion
14 #include <dune/grid/uggrid.hh>
15 #endif
16 
17 #include <dune/istl/owneroverlapcopy.hh>
18 #include <dune/istl/solvercategory.hh>
19 #include <dune/istl/operators.hh>
20 #include <dune/istl/solvers.hh>
21 #include <dune/istl/preconditioners.hh>
22 #include <dune/istl/scalarproducts.hh>
23 #include <dune/istl/paamg/amg.hh>
24 #include <dune/istl/paamg/pinfo.hh>
25 #include <dune/istl/io.hh>
26 #include <dune/istl/superlu.hh>
27 
28 #include <dune/pdelab/constraints/common/constraints.hh>
29 #include <dune/pdelab/gridfunctionspace/genericdatahandle.hh>
30 #include <dune/pdelab/backend/interface.hh>
31 #include <dune/pdelab/backend/istl/vector.hh>
32 #include <dune/pdelab/backend/istl/utility.hh>
33 #include <dune/pdelab/gridfunctionspace/tags.hh>
34 
35 namespace Dune {
36   namespace PDELab {
37     namespace ISTL {
38 
39       //! \addtogroup Backend
40       //! \ingroup PDELab
41       //! \{
42 
43 
44       //========================================================
45       // A parallel helper class providing a nonoverlapping
46       // decomposition of all degrees of freedom
47       //========================================================
48 
49       template<typename GFS>
50       class ParallelHelper
51       {
52 
53         //! Type for storing rank values.
54         typedef int RankIndex;
55 
56         //! Type used to store owner rank values of all DOFs.
57         using RankVector = Dune::PDELab::Backend::Vector<GFS,RankIndex>;
58         //! Type used to store ghost flag of all DOFs.
59         using GhostVector = Dune::PDELab::Backend::Vector<GFS,bool>;
60 
61         //! ContainerIndex of the underlying GridFunctionSpace.
62         typedef typename GFS::Ordering::Traits::ContainerIndex ContainerIndex;
63 
64       public:
65 
ParallelHelper(const GFS & gfs,int verbose=1)66         ParallelHelper (const GFS& gfs, int verbose = 1)
67           : _gfs(gfs)
68           , _rank(gfs.gridView().comm().rank())
69           , _rank_partition(gfs,_rank)
70           , _ghosts(gfs,false)
71           , _verbose(verbose)
72         {
73 
74           // Let's try to be clever and reduce the communication overhead by picking the smallest
75           // possible communication interface depending on the overlap structure of the GFS.
76           // FIXME: Switch to simple comparison as soon as dune-grid:1b3e83ec0 is reliably available!
77           if (gfs.entitySet().partitions().value == Partitions::interiorBorder.value)
78             {
79               // The GFS only spans the interior and border partitions, so we can skip sending or
80               // receiving anything else.
81               _interiorBorder_all_interface = InteriorBorder_InteriorBorder_Interface;
82               _all_all_interface = InteriorBorder_InteriorBorder_Interface;
83             }
84           else
85             {
86               // In general, we have to transmit more.
87               _interiorBorder_all_interface = InteriorBorder_All_Interface;
88               _all_all_interface = All_All_Interface;
89             }
90 
91           if (_gfs.gridView().comm().size()>1)
92             {
93 
94               // find out about ghosts
95               //GFSDataHandle<GFS,GhostVector,GhostGatherScatter>
96               GhostDataHandle<GFS,GhostVector>
97                 gdh(_gfs,_ghosts,false);
98               _gfs.gridView().communicate(gdh,_interiorBorder_all_interface,Dune::ForwardCommunication);
99 
100               // create disjoint DOF partitioning
101               //            GFSDataHandle<GFS,RankVector,DisjointPartitioningGatherScatter<RankIndex> >
102               //  ibdh(_gfs,_rank_partition,DisjointPartitioningGatherScatter<RankIndex>(_rank));
103               DisjointPartitioningDataHandle<GFS,RankVector> pdh(_gfs,_rank_partition);
104               _gfs.gridView().communicate(pdh,_interiorBorder_all_interface,Dune::ForwardCommunication);
105 
106             }
107 
108           // Generate list of neighbors' ranks
109           std::set<RankIndex> rank_set;
110           for (RankIndex rank : _rank_partition)
111             if (rank != _rank)
112               rank_set.insert(rank);
113 
114           for (RankIndex rank : rank_set)
115             _neighbor_ranks.push_back(rank);
116         }
117 
118         //! Returns a sorted list of the ranks of all neighboring processes
getNeighborRanks() const119         const std::vector<RankIndex>& getNeighborRanks() const
120         {
121           return _neighbor_ranks;
122         }
123 
124         //! Mask out all DOFs not owned by the current process with 0.
125         template<typename X>
maskForeignDOFs(X & x) const126         void maskForeignDOFs(X& x) const
127         {
128           using Backend::native;
129           // dispatch to implementation.
130           maskForeignDOFs(ISTL::container_tag(native(x)),native(x),native(_rank_partition));
131         }
132 
133       private:
134 
135         // Implementation for block vector; recursively masks blocks.
136         template<typename X, typename Mask>
maskForeignDOFs(ISTL::tags::block_vector,X & x,const Mask & mask) const137         void maskForeignDOFs(ISTL::tags::block_vector, X& x, const Mask& mask) const
138         {
139           typename Mask::const_iterator mask_it = mask.begin();
140           for (typename X::iterator it = x.begin(),
141                  end_it = x.end();
142                it != end_it;
143                ++it, ++mask_it)
144             maskForeignDOFs(ISTL::container_tag(*it),*it,*mask_it);
145         }
146 
147         // Implementation for field vector, iterates over entries and masks them individually.
148         template<typename X, typename Mask>
maskForeignDOFs(ISTL::tags::field_vector,X & x,const Mask & mask) const149         void maskForeignDOFs(ISTL::tags::field_vector, X& x, const Mask& mask) const
150         {
151           typename Mask::const_iterator mask_it = mask.begin();
152           for (typename X::iterator it = x.begin(),
153                  end_it = x.end();
154                it != end_it;
155                ++it, ++mask_it)
156             *it = (*mask_it == _rank ? *it : typename X::field_type(0));
157         }
158 
159       public:
160 
161         //! Tests whether the given index is owned by this process.
owned(const ContainerIndex & i) const162         bool owned(const ContainerIndex& i) const
163         {
164           return _rank_partition[i] == _rank;
165         }
166 
167         //! Tests whether the given index belongs to a ghost DOF.
isGhost(const ContainerIndex & i) const168         bool isGhost(const ContainerIndex& i) const
169         {
170           return _ghosts[i];
171         }
172 
173         //! Calculates the (rank-local) dot product of x and y on the disjoint partition defined by the helper.
174         template<typename X, typename Y>
175         typename PromotionTraits<
176           typename X::field_type,
177           typename Y::field_type
178           >::PromotedType
disjointDot(const X & x,const Y & y) const179         disjointDot(const X& x, const Y& y) const
180         {
181           using Backend::native;
182           return disjointDot(ISTL::container_tag(native(x)),
183                              native(x),
184                              native(y),
185                              native(_rank_partition)
186                              );
187         }
188 
189       private:
190 
191         // Implementation for BlockVector, collects the result of recursively
192         // invoking the algorithm on the vector blocks.
193         template<typename X, typename Y, typename Mask>
194         typename PromotionTraits<
195         typename X::field_type,
196         typename Y::field_type
197         >::PromotedType
disjointDot(ISTL::tags::block_vector,const X & x,const Y & y,const Mask & mask) const198         disjointDot(ISTL::tags::block_vector, const X& x, const Y& y, const Mask& mask) const
199         {
200           typedef typename PromotionTraits<
201             typename X::field_type,
202             typename Y::field_type
203             >::PromotedType result_type;
204 
205           result_type r(0);
206 
207           typename Y::const_iterator y_it = y.begin();
208           typename Mask::const_iterator mask_it = mask.begin();
209           for (typename X::const_iterator x_it = x.begin(),
210                  end_it = x.end();
211                x_it != end_it;
212                ++x_it, ++y_it,  ++mask_it)
213             r += disjointDot(ISTL::container_tag(*x_it),*x_it,*y_it,*mask_it);
214 
215           return r;
216         }
217 
218         // Implementation for FieldVector, iterates over the entries and calls Dune::dot() for DOFs
219         // associated with the current rank.
220         template<typename X, typename Y, typename Mask>
221         typename PromotionTraits<
222           typename X::field_type,
223           typename Y::field_type
224           >::PromotedType
disjointDot(ISTL::tags::field_vector,const X & x,const Y & y,const Mask & mask) const225         disjointDot(ISTL::tags::field_vector, const X& x, const Y& y, const Mask& mask) const
226         {
227           typedef typename PromotionTraits<
228             typename X::field_type,
229             typename Y::field_type
230             >::PromotedType result_type;
231 
232           result_type r(0);
233 
234           typename Y::const_iterator y_it = y.begin();
235           typename Mask::const_iterator mask_it = mask.begin();
236           for (typename X::const_iterator x_it = x.begin(),
237                  end_it = x.end();
238                x_it != end_it;
239                ++x_it, ++y_it, ++mask_it)
240             r += (*mask_it == _rank ? Dune::dot(*x_it,*y_it) : result_type(0));
241 
242           return r;
243         }
244 
245       public:
246 
247         //! Returns the MPI rank of this process.
rank() const248         RankIndex rank() const
249         {
250           return _rank;
251         }
252 
253 #if HAVE_MPI
254 
255         //! Makes the matrix consistent and creates the parallel information for AMG.
256         /**
257          * This function accomplishes two things:
258          *
259          * 1. Makes the matrix consistent w.r.t. to the disjoint partitioning of the DOF space,
260          *    i.e. aggregates matrix entries for border entries from neighboring ranks.
261          *
262          * 2. Sets up the parallel communication information for AMG.
263          *
264          * \warning  This function silenty assumes that the matrix only has a single level
265          *           of blocking and will not work correctly otherwise. Also note that AMG
266          *           will only work correctly for P1 discretisations.
267          *
268          * \param m  The PDELab matrix container.
269          * \param c  The parallel information object providing index set, interfaces and
270          *           communicators.
271          */
272         template<typename MatrixType, typename Comm>
273         void createIndexSetAndProjectForAMG(MatrixType& m, Comm& c);
274 
275       private:
276 
277         // Checks whether a matrix block is owned by the current process. Used for the AMG
278         // construction and thus assumes a single level of blocking and blocks with ownership
279         // restricted to a single DOF.
owned_for_amg(std::size_t i) const280         bool owned_for_amg(std::size_t i) const
281         {
282           return Backend::native(_rank_partition)[i][0] == _rank;
283         }
284 
285 #endif // HAVE_MPI
286 
287       private:
288 
289         const GFS& _gfs;
290         const RankIndex _rank;
291         RankVector _rank_partition; // vector to identify unique decomposition
292         std::vector<RankIndex> _neighbor_ranks; // list of neighbors' ranks
293         GhostVector _ghosts; //vector to identify ghost dofs
294         int _verbose; //verbosity
295 
296         //! The actual communication interface used when algorithm requires InteriorBorder_All_Interface.
297         InterfaceType _interiorBorder_all_interface;
298 
299         //! The actual communication interface used when algorithm requires All_All_Interface.
300         InterfaceType _all_all_interface;
301       };
302 
303 #if HAVE_MPI
304 
305       template<typename GFS>
306       template<typename M, typename C>
createIndexSetAndProjectForAMG(M & m,C & c)307       void ParallelHelper<GFS>::createIndexSetAndProjectForAMG(M& m, C& c)
308       {
309 
310         using Backend::native;
311 
312         const bool is_bcrs_matrix =
313           std::is_same<
314             typename ISTL::tags::container<
315               Backend::Native<M>
316               >::type::base_tag,
317           ISTL::tags::bcrs_matrix
318           >::value;
319 
320         const bool block_type_is_field_matrix =
321           std::is_same<
322             typename ISTL::tags::container<
323               typename Backend::Native<M>::block_type
324               >::type::base_tag,
325           ISTL::tags::field_matrix
326           >::value;
327 
328         // We assume M to be a BCRSMatrix in the following, so better check for that
329         static_assert(is_bcrs_matrix && block_type_is_field_matrix, "matrix structure not compatible with AMG");
330 
331         // ********************************************************************************
332         // In the following, the code will always assume that all DOFs stored in a single
333         // block of the BCRSMatrix are attached to the same entity and can be handled
334         // identically. For that reason, the code often restricts itself to inspecting the
335         // first entry of the blocks in the diverse BlockVectors.
336         // ********************************************************************************
337 
338         typedef typename GFS::Traits::GridViewType GV;
339         typedef typename RankVector::size_type size_type;
340         const GV& gv = _gfs.gridView();
341 
342         // Do we need to communicate at all?
343         const bool need_communication = _gfs.gridView().comm().size() > 1;
344 
345         // First find out which dofs we share with other processors
346         using BoolVector = Backend::Vector<GFS,bool>;
347         BoolVector sharedDOF(_gfs, false);
348 
349         if (need_communication)
350           {
351             SharedDOFDataHandle<GFS,BoolVector> data_handle(_gfs,sharedDOF,false);
352             _gfs.gridView().communicate(data_handle,_all_all_interface,Dune::ForwardCommunication);
353           }
354 
355         // Count shared dofs that we own
356         typedef typename C::ParallelIndexSet::GlobalIndex GlobalIndex;
357         GlobalIndex count = 0;
358 
359         for (size_type i = 0; i < sharedDOF.N(); ++i)
360           if (owned_for_amg(i) && native(sharedDOF)[i][0])
361             ++count;
362 
363         dverb << gv.comm().rank() << ": shared block count is " << count.touint() << std::endl;
364 
365         // Communicate per-rank count of owned and shared DOFs to all processes.
366         std::vector<GlobalIndex> counts(_gfs.gridView().comm().size());
367         _gfs.gridView().comm().allgather(&count, 1, &(counts[0]));
368 
369         // Compute start index start_p = \sum_{i=0}^{i<p} counts_i
370         GlobalIndex start = std::accumulate(counts.begin(),counts.begin() + _rank,GlobalIndex(0));
371 
372         using GIVector = Dune::PDELab::Backend::Vector<GFS,GlobalIndex>;
373         GIVector scalarIndices(_gfs, std::numeric_limits<GlobalIndex>::max());
374 
375         for (size_type i = 0; i < sharedDOF.N(); ++i)
376           if (owned_for_amg(i) && native(sharedDOF)[i][0])
377             {
378               native(scalarIndices)[i][0] = start;
379               ++start;
380             }
381 
382         // Publish global indices for the shared DOFS to other processors.
383         if (need_communication)
384           {
385             MinDataHandle<GFS,GIVector> data_handle(_gfs,scalarIndices);
386             _gfs.gridView().communicate(data_handle,_interiorBorder_all_interface,Dune::ForwardCommunication);
387           }
388 
389         // Setup the index set
390         c.indexSet().beginResize();
391         for (size_type i=0; i<scalarIndices.N(); ++i)
392           {
393             Dune::OwnerOverlapCopyAttributeSet::AttributeSet attr;
394             if(native(scalarIndices)[i][0] != std::numeric_limits<GlobalIndex>::max())
395               {
396                 // global index exist in index set
397                 if (owned_for_amg(i))
398                   {
399                     // This dof is managed by us.
400                     attr = Dune::OwnerOverlapCopyAttributeSet::owner;
401                   }
402                 else
403                   {
404                     attr = Dune::OwnerOverlapCopyAttributeSet::copy;
405                   }
406                 c.indexSet().add(native(scalarIndices)[i][0], typename C::ParallelIndexSet::LocalIndex(i,attr));
407               }
408           }
409         c.indexSet().endResize();
410 
411         // Compute neighbors using communication
412         std::set<int> neighbors;
413 
414         if (need_communication)
415           {
416             GFSNeighborDataHandle<GFS,int> data_handle(_gfs,_rank,neighbors);
417             _gfs.gridView().communicate(data_handle,_all_all_interface,Dune::ForwardCommunication);
418           }
419 
420         c.remoteIndices().setNeighbours(neighbors);
421         c.remoteIndices().template rebuild<false>();
422       }
423 
424 #endif // HAVE_MPI
425 
426       template<int s, bool isFakeMPIHelper>
427       struct CommSelector
428       {
429         typedef Dune::Amg::SequentialInformation type;
430       };
431 
432 
433 #if HAVE_MPI
434 
435       // Need MPI for OwnerOverlapCopyCommunication
436       template<int s>
437       struct CommSelector<s,false>
438       {
439         typedef OwnerOverlapCopyCommunication<bigunsignedint<s>,int> type;
440       };
441 
442 #endif // HAVE_MPI
443 
444       template<typename T>
assertParallelUG(T comm)445       void assertParallelUG(T comm)
446       {}
447 
448 #if HAVE_UG && PDELAB_SEQUENTIAL_UG
449       template<int dim>
assertParallelUG(Dune::CollectiveCommunication<Dune::UGGrid<dim>> comm)450       void assertParallelUG(Dune::CollectiveCommunication<Dune::UGGrid<dim> > comm)
451       {
452         static_assert(Dune::AlwaysFalse<Dune::UGGrid<dim> >::value, "Using sequential UG in parallel environment");
453       };
454 #endif
455       //! \} group Backend
456 
457     } // namespace ISTL
458   } // namespace PDELab
459 } // namespace Dune
460 
461 #endif // DUNE_PDELAB_BACKEND_ISTL_PARALLELHELPER_HH
462