1 // -*- tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 // vi: set et ts=8 sw=2 sts=2: 3 #ifndef DUNE_PDELAB_BACKEND_ISTL_PARALLELHELPER_HH 4 #define DUNE_PDELAB_BACKEND_ISTL_PARALLELHELPER_HH 5 6 #include <limits> 7 8 #include <dune/common/parallel/mpihelper.hh> 9 #include <dune/common/stdstreams.hh> 10 #include <dune/common/typetraits.hh> 11 12 #if HAVE_UG && PDELAB_SEQUENTIAL_UG 13 // We need the UGGrid declaration for the assertion 14 #include <dune/grid/uggrid.hh> 15 #endif 16 17 #include <dune/istl/owneroverlapcopy.hh> 18 #include <dune/istl/solvercategory.hh> 19 #include <dune/istl/operators.hh> 20 #include <dune/istl/solvers.hh> 21 #include <dune/istl/preconditioners.hh> 22 #include <dune/istl/scalarproducts.hh> 23 #include <dune/istl/paamg/amg.hh> 24 #include <dune/istl/paamg/pinfo.hh> 25 #include <dune/istl/io.hh> 26 #include <dune/istl/superlu.hh> 27 28 #include <dune/pdelab/constraints/common/constraints.hh> 29 #include <dune/pdelab/gridfunctionspace/genericdatahandle.hh> 30 #include <dune/pdelab/backend/interface.hh> 31 #include <dune/pdelab/backend/istl/vector.hh> 32 #include <dune/pdelab/backend/istl/utility.hh> 33 #include <dune/pdelab/gridfunctionspace/tags.hh> 34 35 namespace Dune { 36 namespace PDELab { 37 namespace ISTL { 38 39 //! \addtogroup Backend 40 //! \ingroup PDELab 41 //! \{ 42 43 44 //======================================================== 45 // A parallel helper class providing a nonoverlapping 46 // decomposition of all degrees of freedom 47 //======================================================== 48 49 template<typename GFS> 50 class ParallelHelper 51 { 52 53 //! Type for storing rank values. 54 typedef int RankIndex; 55 56 //! Type used to store owner rank values of all DOFs. 57 using RankVector = Dune::PDELab::Backend::Vector<GFS,RankIndex>; 58 //! Type used to store ghost flag of all DOFs. 59 using GhostVector = Dune::PDELab::Backend::Vector<GFS,bool>; 60 61 //! ContainerIndex of the underlying GridFunctionSpace. 62 typedef typename GFS::Ordering::Traits::ContainerIndex ContainerIndex; 63 64 public: 65 ParallelHelper(const GFS & gfs,int verbose=1)66 ParallelHelper (const GFS& gfs, int verbose = 1) 67 : _gfs(gfs) 68 , _rank(gfs.gridView().comm().rank()) 69 , _rank_partition(gfs,_rank) 70 , _ghosts(gfs,false) 71 , _verbose(verbose) 72 { 73 74 // Let's try to be clever and reduce the communication overhead by picking the smallest 75 // possible communication interface depending on the overlap structure of the GFS. 76 // FIXME: Switch to simple comparison as soon as dune-grid:1b3e83ec0 is reliably available! 77 if (gfs.entitySet().partitions().value == Partitions::interiorBorder.value) 78 { 79 // The GFS only spans the interior and border partitions, so we can skip sending or 80 // receiving anything else. 81 _interiorBorder_all_interface = InteriorBorder_InteriorBorder_Interface; 82 _all_all_interface = InteriorBorder_InteriorBorder_Interface; 83 } 84 else 85 { 86 // In general, we have to transmit more. 87 _interiorBorder_all_interface = InteriorBorder_All_Interface; 88 _all_all_interface = All_All_Interface; 89 } 90 91 if (_gfs.gridView().comm().size()>1) 92 { 93 94 // find out about ghosts 95 //GFSDataHandle<GFS,GhostVector,GhostGatherScatter> 96 GhostDataHandle<GFS,GhostVector> 97 gdh(_gfs,_ghosts,false); 98 _gfs.gridView().communicate(gdh,_interiorBorder_all_interface,Dune::ForwardCommunication); 99 100 // create disjoint DOF partitioning 101 // GFSDataHandle<GFS,RankVector,DisjointPartitioningGatherScatter<RankIndex> > 102 // ibdh(_gfs,_rank_partition,DisjointPartitioningGatherScatter<RankIndex>(_rank)); 103 DisjointPartitioningDataHandle<GFS,RankVector> pdh(_gfs,_rank_partition); 104 _gfs.gridView().communicate(pdh,_interiorBorder_all_interface,Dune::ForwardCommunication); 105 106 } 107 108 // Generate list of neighbors' ranks 109 std::set<RankIndex> rank_set; 110 for (RankIndex rank : _rank_partition) 111 if (rank != _rank) 112 rank_set.insert(rank); 113 114 for (RankIndex rank : rank_set) 115 _neighbor_ranks.push_back(rank); 116 } 117 118 //! Returns a sorted list of the ranks of all neighboring processes getNeighborRanks() const119 const std::vector<RankIndex>& getNeighborRanks() const 120 { 121 return _neighbor_ranks; 122 } 123 124 //! Mask out all DOFs not owned by the current process with 0. 125 template<typename X> maskForeignDOFs(X & x) const126 void maskForeignDOFs(X& x) const 127 { 128 using Backend::native; 129 // dispatch to implementation. 130 maskForeignDOFs(ISTL::container_tag(native(x)),native(x),native(_rank_partition)); 131 } 132 133 private: 134 135 // Implementation for block vector; recursively masks blocks. 136 template<typename X, typename Mask> maskForeignDOFs(ISTL::tags::block_vector,X & x,const Mask & mask) const137 void maskForeignDOFs(ISTL::tags::block_vector, X& x, const Mask& mask) const 138 { 139 typename Mask::const_iterator mask_it = mask.begin(); 140 for (typename X::iterator it = x.begin(), 141 end_it = x.end(); 142 it != end_it; 143 ++it, ++mask_it) 144 maskForeignDOFs(ISTL::container_tag(*it),*it,*mask_it); 145 } 146 147 // Implementation for field vector, iterates over entries and masks them individually. 148 template<typename X, typename Mask> maskForeignDOFs(ISTL::tags::field_vector,X & x,const Mask & mask) const149 void maskForeignDOFs(ISTL::tags::field_vector, X& x, const Mask& mask) const 150 { 151 typename Mask::const_iterator mask_it = mask.begin(); 152 for (typename X::iterator it = x.begin(), 153 end_it = x.end(); 154 it != end_it; 155 ++it, ++mask_it) 156 *it = (*mask_it == _rank ? *it : typename X::field_type(0)); 157 } 158 159 public: 160 161 //! Tests whether the given index is owned by this process. owned(const ContainerIndex & i) const162 bool owned(const ContainerIndex& i) const 163 { 164 return _rank_partition[i] == _rank; 165 } 166 167 //! Tests whether the given index belongs to a ghost DOF. isGhost(const ContainerIndex & i) const168 bool isGhost(const ContainerIndex& i) const 169 { 170 return _ghosts[i]; 171 } 172 173 //! Calculates the (rank-local) dot product of x and y on the disjoint partition defined by the helper. 174 template<typename X, typename Y> 175 typename PromotionTraits< 176 typename X::field_type, 177 typename Y::field_type 178 >::PromotedType disjointDot(const X & x,const Y & y) const179 disjointDot(const X& x, const Y& y) const 180 { 181 using Backend::native; 182 return disjointDot(ISTL::container_tag(native(x)), 183 native(x), 184 native(y), 185 native(_rank_partition) 186 ); 187 } 188 189 private: 190 191 // Implementation for BlockVector, collects the result of recursively 192 // invoking the algorithm on the vector blocks. 193 template<typename X, typename Y, typename Mask> 194 typename PromotionTraits< 195 typename X::field_type, 196 typename Y::field_type 197 >::PromotedType disjointDot(ISTL::tags::block_vector,const X & x,const Y & y,const Mask & mask) const198 disjointDot(ISTL::tags::block_vector, const X& x, const Y& y, const Mask& mask) const 199 { 200 typedef typename PromotionTraits< 201 typename X::field_type, 202 typename Y::field_type 203 >::PromotedType result_type; 204 205 result_type r(0); 206 207 typename Y::const_iterator y_it = y.begin(); 208 typename Mask::const_iterator mask_it = mask.begin(); 209 for (typename X::const_iterator x_it = x.begin(), 210 end_it = x.end(); 211 x_it != end_it; 212 ++x_it, ++y_it, ++mask_it) 213 r += disjointDot(ISTL::container_tag(*x_it),*x_it,*y_it,*mask_it); 214 215 return r; 216 } 217 218 // Implementation for FieldVector, iterates over the entries and calls Dune::dot() for DOFs 219 // associated with the current rank. 220 template<typename X, typename Y, typename Mask> 221 typename PromotionTraits< 222 typename X::field_type, 223 typename Y::field_type 224 >::PromotedType disjointDot(ISTL::tags::field_vector,const X & x,const Y & y,const Mask & mask) const225 disjointDot(ISTL::tags::field_vector, const X& x, const Y& y, const Mask& mask) const 226 { 227 typedef typename PromotionTraits< 228 typename X::field_type, 229 typename Y::field_type 230 >::PromotedType result_type; 231 232 result_type r(0); 233 234 typename Y::const_iterator y_it = y.begin(); 235 typename Mask::const_iterator mask_it = mask.begin(); 236 for (typename X::const_iterator x_it = x.begin(), 237 end_it = x.end(); 238 x_it != end_it; 239 ++x_it, ++y_it, ++mask_it) 240 r += (*mask_it == _rank ? Dune::dot(*x_it,*y_it) : result_type(0)); 241 242 return r; 243 } 244 245 public: 246 247 //! Returns the MPI rank of this process. rank() const248 RankIndex rank() const 249 { 250 return _rank; 251 } 252 253 #if HAVE_MPI 254 255 //! Makes the matrix consistent and creates the parallel information for AMG. 256 /** 257 * This function accomplishes two things: 258 * 259 * 1. Makes the matrix consistent w.r.t. to the disjoint partitioning of the DOF space, 260 * i.e. aggregates matrix entries for border entries from neighboring ranks. 261 * 262 * 2. Sets up the parallel communication information for AMG. 263 * 264 * \warning This function silenty assumes that the matrix only has a single level 265 * of blocking and will not work correctly otherwise. Also note that AMG 266 * will only work correctly for P1 discretisations. 267 * 268 * \param m The PDELab matrix container. 269 * \param c The parallel information object providing index set, interfaces and 270 * communicators. 271 */ 272 template<typename MatrixType, typename Comm> 273 void createIndexSetAndProjectForAMG(MatrixType& m, Comm& c); 274 275 private: 276 277 // Checks whether a matrix block is owned by the current process. Used for the AMG 278 // construction and thus assumes a single level of blocking and blocks with ownership 279 // restricted to a single DOF. owned_for_amg(std::size_t i) const280 bool owned_for_amg(std::size_t i) const 281 { 282 return Backend::native(_rank_partition)[i][0] == _rank; 283 } 284 285 #endif // HAVE_MPI 286 287 private: 288 289 const GFS& _gfs; 290 const RankIndex _rank; 291 RankVector _rank_partition; // vector to identify unique decomposition 292 std::vector<RankIndex> _neighbor_ranks; // list of neighbors' ranks 293 GhostVector _ghosts; //vector to identify ghost dofs 294 int _verbose; //verbosity 295 296 //! The actual communication interface used when algorithm requires InteriorBorder_All_Interface. 297 InterfaceType _interiorBorder_all_interface; 298 299 //! The actual communication interface used when algorithm requires All_All_Interface. 300 InterfaceType _all_all_interface; 301 }; 302 303 #if HAVE_MPI 304 305 template<typename GFS> 306 template<typename M, typename C> createIndexSetAndProjectForAMG(M & m,C & c)307 void ParallelHelper<GFS>::createIndexSetAndProjectForAMG(M& m, C& c) 308 { 309 310 using Backend::native; 311 312 const bool is_bcrs_matrix = 313 std::is_same< 314 typename ISTL::tags::container< 315 Backend::Native<M> 316 >::type::base_tag, 317 ISTL::tags::bcrs_matrix 318 >::value; 319 320 const bool block_type_is_field_matrix = 321 std::is_same< 322 typename ISTL::tags::container< 323 typename Backend::Native<M>::block_type 324 >::type::base_tag, 325 ISTL::tags::field_matrix 326 >::value; 327 328 // We assume M to be a BCRSMatrix in the following, so better check for that 329 static_assert(is_bcrs_matrix && block_type_is_field_matrix, "matrix structure not compatible with AMG"); 330 331 // ******************************************************************************** 332 // In the following, the code will always assume that all DOFs stored in a single 333 // block of the BCRSMatrix are attached to the same entity and can be handled 334 // identically. For that reason, the code often restricts itself to inspecting the 335 // first entry of the blocks in the diverse BlockVectors. 336 // ******************************************************************************** 337 338 typedef typename GFS::Traits::GridViewType GV; 339 typedef typename RankVector::size_type size_type; 340 const GV& gv = _gfs.gridView(); 341 342 // Do we need to communicate at all? 343 const bool need_communication = _gfs.gridView().comm().size() > 1; 344 345 // First find out which dofs we share with other processors 346 using BoolVector = Backend::Vector<GFS,bool>; 347 BoolVector sharedDOF(_gfs, false); 348 349 if (need_communication) 350 { 351 SharedDOFDataHandle<GFS,BoolVector> data_handle(_gfs,sharedDOF,false); 352 _gfs.gridView().communicate(data_handle,_all_all_interface,Dune::ForwardCommunication); 353 } 354 355 // Count shared dofs that we own 356 typedef typename C::ParallelIndexSet::GlobalIndex GlobalIndex; 357 GlobalIndex count = 0; 358 359 for (size_type i = 0; i < sharedDOF.N(); ++i) 360 if (owned_for_amg(i) && native(sharedDOF)[i][0]) 361 ++count; 362 363 dverb << gv.comm().rank() << ": shared block count is " << count.touint() << std::endl; 364 365 // Communicate per-rank count of owned and shared DOFs to all processes. 366 std::vector<GlobalIndex> counts(_gfs.gridView().comm().size()); 367 _gfs.gridView().comm().allgather(&count, 1, &(counts[0])); 368 369 // Compute start index start_p = \sum_{i=0}^{i<p} counts_i 370 GlobalIndex start = std::accumulate(counts.begin(),counts.begin() + _rank,GlobalIndex(0)); 371 372 using GIVector = Dune::PDELab::Backend::Vector<GFS,GlobalIndex>; 373 GIVector scalarIndices(_gfs, std::numeric_limits<GlobalIndex>::max()); 374 375 for (size_type i = 0; i < sharedDOF.N(); ++i) 376 if (owned_for_amg(i) && native(sharedDOF)[i][0]) 377 { 378 native(scalarIndices)[i][0] = start; 379 ++start; 380 } 381 382 // Publish global indices for the shared DOFS to other processors. 383 if (need_communication) 384 { 385 MinDataHandle<GFS,GIVector> data_handle(_gfs,scalarIndices); 386 _gfs.gridView().communicate(data_handle,_interiorBorder_all_interface,Dune::ForwardCommunication); 387 } 388 389 // Setup the index set 390 c.indexSet().beginResize(); 391 for (size_type i=0; i<scalarIndices.N(); ++i) 392 { 393 Dune::OwnerOverlapCopyAttributeSet::AttributeSet attr; 394 if(native(scalarIndices)[i][0] != std::numeric_limits<GlobalIndex>::max()) 395 { 396 // global index exist in index set 397 if (owned_for_amg(i)) 398 { 399 // This dof is managed by us. 400 attr = Dune::OwnerOverlapCopyAttributeSet::owner; 401 } 402 else 403 { 404 attr = Dune::OwnerOverlapCopyAttributeSet::copy; 405 } 406 c.indexSet().add(native(scalarIndices)[i][0], typename C::ParallelIndexSet::LocalIndex(i,attr)); 407 } 408 } 409 c.indexSet().endResize(); 410 411 // Compute neighbors using communication 412 std::set<int> neighbors; 413 414 if (need_communication) 415 { 416 GFSNeighborDataHandle<GFS,int> data_handle(_gfs,_rank,neighbors); 417 _gfs.gridView().communicate(data_handle,_all_all_interface,Dune::ForwardCommunication); 418 } 419 420 c.remoteIndices().setNeighbours(neighbors); 421 c.remoteIndices().template rebuild<false>(); 422 } 423 424 #endif // HAVE_MPI 425 426 template<int s, bool isFakeMPIHelper> 427 struct CommSelector 428 { 429 typedef Dune::Amg::SequentialInformation type; 430 }; 431 432 433 #if HAVE_MPI 434 435 // Need MPI for OwnerOverlapCopyCommunication 436 template<int s> 437 struct CommSelector<s,false> 438 { 439 typedef OwnerOverlapCopyCommunication<bigunsignedint<s>,int> type; 440 }; 441 442 #endif // HAVE_MPI 443 444 template<typename T> assertParallelUG(T comm)445 void assertParallelUG(T comm) 446 {} 447 448 #if HAVE_UG && PDELAB_SEQUENTIAL_UG 449 template<int dim> assertParallelUG(Dune::CollectiveCommunication<Dune::UGGrid<dim>> comm)450 void assertParallelUG(Dune::CollectiveCommunication<Dune::UGGrid<dim> > comm) 451 { 452 static_assert(Dune::AlwaysFalse<Dune::UGGrid<dim> >::value, "Using sequential UG in parallel environment"); 453 }; 454 #endif 455 //! \} group Backend 456 457 } // namespace ISTL 458 } // namespace PDELab 459 } // namespace Dune 460 461 #endif // DUNE_PDELAB_BACKEND_ISTL_PARALLELHELPER_HH 462