1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced 2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files 3 // LICENSE and NOTICE for details. LLNL-CODE-806117. 4 // 5 // This file is part of the MFEM library. For more information and source code 6 // availability visit https://mfem.org. 7 // 8 // MFEM is free software; you can redistribute it and/or modify it under the 9 // terms of the BSD-3 license. We welcome feedback and contributions, see file 10 // CONTRIBUTING.md for details. 11 12 #ifndef MFEM_PFESPACE 13 #define MFEM_PFESPACE 14 15 #include "../config/config.hpp" 16 17 #ifdef MFEM_USE_MPI 18 19 #include "../linalg/hypre.hpp" 20 #include "../mesh/pmesh.hpp" 21 #include "../mesh/nurbs.hpp" 22 #include "fespace.hpp" 23 24 namespace mfem 25 { 26 27 /// Abstract parallel finite element space. 28 class ParFiniteElementSpace : public FiniteElementSpace 29 { 30 private: 31 /// MPI data. 32 MPI_Comm MyComm; 33 int NRanks, MyRank; 34 35 /// Parallel mesh; #mesh points to this object as well. Not owned. 36 ParMesh *pmesh; 37 /** Parallel non-conforming mesh extension object; same as pmesh->pncmesh. 38 Not owned. */ 39 ParNCMesh *pncmesh; 40 41 /// GroupCommunicator on the local VDofs. Owned. 42 GroupCommunicator *gcomm; 43 44 /// Number of true dofs in this processor (local true dofs). 45 mutable int ltdof_size; 46 47 /// Number of vertex/edge/face/total ghost DOFs (nonconforming case). 48 int ngvdofs, ngedofs, ngfdofs, ngdofs; 49 50 /// The group of each local dof. 51 Array<int> ldof_group; 52 53 /// For a local dof: the local true dof number in the master of its group. 54 mutable Array<int> ldof_ltdof; 55 56 /// Offsets for the dofs in each processor in global numbering. 57 mutable Array<HYPRE_BigInt> dof_offsets; 58 59 /// Offsets for the true dofs in each processor in global numbering. 60 mutable Array<HYPRE_BigInt> tdof_offsets; 61 62 /// Offsets for the true dofs in neighbor processor in global numbering. 63 mutable Array<HYPRE_BigInt> tdof_nb_offsets; 64 65 /// Previous 'dof_offsets' (before Update()), column partition of T. 66 Array<HYPRE_BigInt> old_dof_offsets; 67 68 /// The sign of the basis functions at the scalar local dofs. 69 Array<int> ldof_sign; 70 71 /// The matrix P (interpolation from true dof to dof). Owned. 72 mutable HypreParMatrix *P; 73 /// Optimized action-only prolongation operator for conforming meshes. Owned. 74 mutable Operator *Pconf; 75 76 /** Used to indicate that the space is nonconforming (even if the underlying 77 mesh has NULL @a ncmesh). This occurs in low-order preconditioning on 78 nonconforming meshes. */ 79 bool nonconf_P; 80 81 /// The (block-diagonal) matrix R (restriction of dof to true dof). Owned. 82 mutable SparseMatrix *R; 83 /// Optimized action-only restriction operator for conforming meshes. Owned. 84 mutable Operator *Rconf; 85 /** Transpose of R or Rconf. For conforming mesh, this is a matrix-free 86 (Device)ConformingProlongationOperator, for a non-conforming mesh 87 this is a TransposeOperator wrapping R. */ 88 mutable Operator *R_transpose; 89 pNURBSext() const90 ParNURBSExtension *pNURBSext() const 91 { return dynamic_cast<ParNURBSExtension *>(NURBSext); } 92 93 static ParNURBSExtension *MakeLocalNURBSext( 94 const NURBSExtension *globNURBSext, const NURBSExtension *parNURBSext); 95 GetGroupTopo() const96 GroupTopology &GetGroupTopo() const 97 { return (NURBSext) ? pNURBSext()->gtopo : pmesh->gtopo; } 98 99 // Auxiliary method used in constructors 100 void ParInit(ParMesh *pm); 101 102 void Construct(); 103 void Destroy(); 104 105 // ldof_type = 0 : DOFs communicator, otherwise VDOFs communicator 106 void GetGroupComm(GroupCommunicator &gcomm, int ldof_type, 107 Array<int> *ldof_sign = NULL); 108 109 /// Construct dof_offsets and tdof_offsets using global communication. 110 void GenerateGlobalOffsets() const; 111 112 /// Construct ldof_group and ldof_ltdof. 113 void ConstructTrueDofs(); 114 void ConstructTrueNURBSDofs(); 115 116 void ApplyLDofSigns(Array<int> &dofs) const; 117 void ApplyLDofSigns(Table &el_dof) const; 118 119 typedef NCMesh::MeshId MeshId; 120 typedef ParNCMesh::GroupId GroupId; 121 122 void GetGhostVertexDofs(const MeshId &id, Array<int> &dofs) const; 123 void GetGhostEdgeDofs(const MeshId &edge_id, Array<int> &dofs) const; 124 void GetGhostFaceDofs(const MeshId &face_id, Array<int> &dofs) const; 125 126 void GetGhostDofs(int entity, const MeshId &id, Array<int> &dofs) const; 127 /// Return the dofs associated with the interior of the given mesh entity. 128 void GetBareDofs(int entity, int index, Array<int> &dofs) const; 129 130 int PackDof(int entity, int index, int edof) const; 131 void UnpackDof(int dof, int &entity, int &index, int &edof) const; 132 133 #ifdef MFEM_PMATRIX_STATS 134 mutable int n_msgs_sent, n_msgs_recv; 135 mutable int n_rows_sent, n_rows_recv, n_rows_fwd; 136 #endif 137 138 void ScheduleSendRow(const struct PMatrixRow &row, int dof, GroupId group_id, 139 std::map<int, class NeighborRowMessage> &send_msg) const; 140 141 void ForwardRow(const struct PMatrixRow &row, int dof, 142 GroupId group_sent_id, GroupId group_id, 143 std::map<int, class NeighborRowMessage> &send_msg) const; 144 145 #ifdef MFEM_DEBUG_PMATRIX 146 void DebugDumpDOFs(std::ostream &os, 147 const SparseMatrix &deps, 148 const Array<GroupId> &dof_group, 149 const Array<GroupId> &dof_owner, 150 const Array<bool> &finalized) const; 151 #endif 152 153 /// Helper: create a HypreParMatrix from a list of PMatrixRows. 154 HypreParMatrix* 155 MakeVDimHypreMatrix(const std::vector<struct PMatrixRow> &rows, 156 int local_rows, int local_cols, 157 Array<HYPRE_BigInt> &row_starts, 158 Array<HYPRE_BigInt> &col_starts) const; 159 160 /// Build the P and R matrices. 161 void Build_Dof_TrueDof_Matrix() const; 162 163 /** Used when the ParMesh is non-conforming, i.e. pmesh->pncmesh != NULL. 164 Constructs the matrices P and R, the DOF and true DOF offset arrays, 165 and the DOF -> true DOF map ('dof_tdof'). Returns the number of 166 vector true DOFs. All pointer arguments are optional and can be NULL. */ 167 int BuildParallelConformingInterpolation(HypreParMatrix **P, SparseMatrix **R, 168 Array<HYPRE_BigInt> &dof_offs, 169 Array<HYPRE_BigInt> &tdof_offs, 170 Array<int> *dof_tdof, 171 bool partial = false) const; 172 173 /** Calculate a GridFunction migration matrix after mesh load balancing. 174 The result is a parallel permutation matrix that can be used to update 175 all grid functions defined on this space. */ 176 HypreParMatrix* RebalanceMatrix(int old_ndofs, 177 const Table* old_elem_dof); 178 179 /** Calculate a GridFunction restriction matrix after mesh derefinement. 180 The matrix is constructed so that the new grid function interpolates 181 the original function, i.e., the original function is evaluated at the 182 nodes of the coarse function. */ 183 HypreParMatrix* ParallelDerefinementMatrix(int old_ndofs, 184 const Table *old_elem_dof); 185 186 /// Updates the internal mesh pointer. @warning @a new_mesh must be 187 /// <b>topologically identical</b> to the existing mesh. Used if the address 188 /// of the Mesh object has changed, e.g. in @a Mesh::Swap. 189 virtual void UpdateMeshPointer(Mesh *new_mesh); 190 191 /// Copies the prolongation and restriction matrices from @a fes. 192 /// 193 /// Used for low order preconditioning on non-conforming meshes. If the DOFs 194 /// require a permutation, it will be supplied by non-NULL @a perm. NULL @a 195 /// perm indicates that no permutation is required. 196 virtual void CopyProlongationAndRestriction(const FiniteElementSpace &fes, 197 const Array<int> *perm); 198 199 public: 200 // Face-neighbor data 201 // Number of face-neighbor dofs 202 int num_face_nbr_dofs; 203 // Face-neighbor-element to face-neighbor dof 204 Table face_nbr_element_dof; 205 // Face-neighbor to ldof in the face-neighbor numbering 206 Table face_nbr_ldof; 207 // The global ldof indices of the face-neighbor dofs 208 Array<HYPRE_BigInt> face_nbr_glob_dof_map; 209 // Local face-neighbor data: face-neighbor to ldof 210 Table send_face_nbr_ldof; 211 212 /** @brief Copy constructor: deep copy all data from @a orig except the 213 ParMesh, the FiniteElementCollection, and some derived data. */ 214 /** If the @a pmesh or @a fec pointers are NULL (default), then the new 215 ParFiniteElementSpace will reuse the respective pointers from @a orig. If 216 any of these pointers is not NULL, the given pointer will be used instead 217 of the one used by @a orig. 218 219 @note The objects pointed to by the @a pmesh and @a fec parameters must 220 be either the same objects as the ones used by @a orig, or copies of 221 them. Otherwise, the behavior is undefined. 222 223 @note Derived data objects, such as the parallel prolongation and 224 restriction operators, the update operator, and any of the face-neighbor 225 data, will not be copied, even if they are created in the @a orig object. 226 */ 227 ParFiniteElementSpace(const ParFiniteElementSpace &orig, 228 ParMesh *pmesh = NULL, 229 const FiniteElementCollection *fec = NULL); 230 231 /** @brief Convert/copy the *local* (Par)FiniteElementSpace @a orig to 232 ParFiniteElementSpace: deep copy all data from @a orig except the Mesh, 233 the FiniteElementCollection, and some derived data. */ 234 ParFiniteElementSpace(const FiniteElementSpace &orig, ParMesh &pmesh, 235 const FiniteElementCollection *fec = NULL); 236 237 /** @brief Construct the *local* ParFiniteElementSpace corresponding to the 238 global FE space, @a global_fes. */ 239 /** The parameter @a pm is the *local* ParMesh obtained by decomposing the 240 global Mesh used by @a global_fes. The array @a partitioning represents 241 the parallel decomposition - it maps global element ids to MPI ranks. 242 If the FiniteElementCollection, @a f, is NULL (default), the FE 243 collection used by @a global_fes will be reused. If @a f is not NULL, it 244 must be the same as, or a copy of, the FE collection used by 245 @a global_fes. */ 246 ParFiniteElementSpace(ParMesh *pm, const FiniteElementSpace *global_fes, 247 const int *partitioning, 248 const FiniteElementCollection *f = NULL); 249 250 ParFiniteElementSpace(ParMesh *pm, const FiniteElementCollection *f, 251 int dim = 1, int ordering = Ordering::byNODES); 252 253 /// Construct a NURBS FE space based on the given NURBSExtension, @a ext. 254 /** The parameter @a ext will be deleted by this constructor, replaced by a 255 ParNURBSExtension owned by the ParFiniteElementSpace. 256 @note If the pointer @a ext is NULL, this constructor is equivalent to 257 the standard constructor with the same arguments minus the 258 NURBSExtension, @a ext. */ 259 ParFiniteElementSpace(ParMesh *pm, NURBSExtension *ext, 260 const FiniteElementCollection *f, 261 int dim = 1, int ordering = Ordering::byNODES); 262 GetComm() const263 MPI_Comm GetComm() const { return MyComm; } GetNRanks() const264 int GetNRanks() const { return NRanks; } GetMyRank() const265 int GetMyRank() const { return MyRank; } 266 GetParMesh() const267 inline ParMesh *GetParMesh() const { return pmesh; } 268 GetDofSign(int i)269 int GetDofSign(int i) 270 { return NURBSext || Nonconforming() ? 1 : ldof_sign[VDofToDof(i)]; } GetDofOffsets() const271 HYPRE_BigInt *GetDofOffsets() const { return dof_offsets; } GetTrueDofOffsets() const272 HYPRE_BigInt *GetTrueDofOffsets() const { return tdof_offsets; } GlobalVSize() const273 HYPRE_BigInt GlobalVSize() const 274 { return Dof_TrueDof_Matrix()->GetGlobalNumRows(); } GlobalTrueVSize() const275 HYPRE_BigInt GlobalTrueVSize() const 276 { return Dof_TrueDof_Matrix()->GetGlobalNumCols(); } 277 278 /// Return the number of local vector true dofs. GetTrueVSize() const279 virtual int GetTrueVSize() const { return ltdof_size; } 280 281 /// Returns indexes of degrees of freedom in array dofs for i'th element. 282 virtual void GetElementDofs(int i, Array<int> &dofs) const; 283 284 /// Returns indexes of degrees of freedom for i'th boundary element. 285 virtual void GetBdrElementDofs(int i, Array<int> &dofs) const; 286 287 /** Returns the indexes of the degrees of freedom for i'th face 288 including the dofs for the edges and the vertices of the face. */ 289 virtual int GetFaceDofs(int i, Array<int> &dofs, int variant = 0) const; 290 291 /** Returns pointer to the FiniteElement in the FiniteElementCollection 292 associated with i'th element in the mesh object. If @a i is greater than 293 or equal to the number of local mesh elements, @a i will be interpreted 294 as a shifted index of a face neighbor element. */ 295 virtual const FiniteElement *GetFE(int i) const; 296 297 /** Returns an Operator that converts L-vectors to E-vectors on each face. 298 The parallel version is different from the serial one because of the 299 presence of shared faces. Shared faces are treated as interior faces, 300 the returned operator handles the communication needed to get the 301 shared face values from other MPI ranks */ 302 virtual const FaceRestriction *GetFaceRestriction( 303 ElementDofOrdering e_ordering, FaceType type, 304 L2FaceValues mul = L2FaceValues::DoubleValued) const; 305 306 void GetSharedEdgeDofs(int group, int ei, Array<int> &dofs) const; 307 void GetSharedTriangleDofs(int group, int fi, Array<int> &dofs) const; 308 void GetSharedQuadrilateralDofs(int group, int fi, Array<int> &dofs) const; 309 310 /// The true dof-to-dof interpolation matrix Dof_TrueDof_Matrix() const311 HypreParMatrix *Dof_TrueDof_Matrix() const 312 { if (!P) { Build_Dof_TrueDof_Matrix(); } return P; } 313 314 /** @brief For a non-conforming mesh, construct and return the interpolation 315 matrix from the partially conforming true dofs to the local dofs. */ 316 /** @note The returned pointer must be deleted by the caller. */ 317 HypreParMatrix *GetPartialConformingInterpolation(); 318 319 /** Create and return a new HypreParVector on the true dofs, which is 320 owned by (i.e. it must be destroyed by) the calling function. */ NewTrueDofVector()321 HypreParVector *NewTrueDofVector() 322 { return (new HypreParVector(MyComm,GlobalTrueVSize(),GetTrueDofOffsets()));} 323 324 /// Scale a vector of true dofs 325 void DivideByGroupSize(double *vec); 326 327 /// Return a reference to the internal GroupCommunicator (on VDofs) GroupComm()328 GroupCommunicator &GroupComm() { return *gcomm; } 329 330 /// Return a const reference to the internal GroupCommunicator (on VDofs) GroupComm() const331 const GroupCommunicator &GroupComm() const { return *gcomm; } 332 333 /// Return a new GroupCommunicator on scalar dofs, i.e. for VDim = 1. 334 /** @note The returned pointer must be deleted by the caller. */ 335 GroupCommunicator *ScalarGroupComm(); 336 337 /** @brief Given an integer array on the local degrees of freedom, perform 338 a bitwise OR between the shared dofs. */ 339 /** For non-conforming mesh, synchronization is performed on the cut (aka 340 "partially conforming") space. */ 341 void Synchronize(Array<int> &ldof_marker) const; 342 343 /// Determine the boundary degrees of freedom 344 virtual void GetEssentialVDofs(const Array<int> &bdr_attr_is_ess, 345 Array<int> &ess_dofs, 346 int component = -1) const; 347 348 /** Get a list of essential true dofs, ess_tdof_list, corresponding to the 349 boundary attributes marked in the array bdr_attr_is_ess. */ 350 virtual void GetEssentialTrueDofs(const Array<int> &bdr_attr_is_ess, 351 Array<int> &ess_tdof_list, 352 int component = -1); 353 354 /** If the given ldof is owned by the current processor, return its local 355 tdof number, otherwise return -1 */ 356 int GetLocalTDofNumber(int ldof) const; 357 /// Returns the global tdof number of the given local degree of freedom 358 HYPRE_BigInt GetGlobalTDofNumber(int ldof) const; 359 /** Returns the global tdof number of the given local degree of freedom in 360 the scalar version of the current finite element space. The input should 361 be a scalar local dof. */ 362 HYPRE_BigInt GetGlobalScalarTDofNumber(int sldof); 363 364 HYPRE_BigInt GetMyDofOffset() const; 365 HYPRE_BigInt GetMyTDofOffset() const; 366 367 virtual const Operator *GetProlongationMatrix() const; 368 /** @brief Return logical transpose of restriction matrix, but in 369 non-assembled optimized matrix-free form. 370 371 The implementation is like GetProlongationMatrix, but it sets local 372 DOFs to the true DOF values if owned locally, otherwise zero. */ 373 virtual const Operator *GetRestrictionTransposeOperator() const; 374 /** Get an Operator that performs the action of GetRestrictionMatrix(), 375 but potentially with a non-assembled optimized matrix-free 376 implementation. */ 377 virtual const Operator *GetRestrictionOperator() const; 378 /// Get the R matrix which restricts a local dof vector to true dof vector. GetRestrictionMatrix() const379 virtual const SparseMatrix *GetRestrictionMatrix() const 380 { Dof_TrueDof_Matrix(); return R; } 381 382 // Face-neighbor functions 383 void ExchangeFaceNbrData(); GetFaceNbrVSize() const384 int GetFaceNbrVSize() const { return num_face_nbr_dofs; } 385 void GetFaceNbrElementVDofs(int i, Array<int> &vdofs) const; 386 void GetFaceNbrFaceVDofs(int i, Array<int> &vdofs) const; 387 const FiniteElement *GetFaceNbrFE(int i) const; 388 const FiniteElement *GetFaceNbrFaceFE(int i) const; GetFaceNbrGlobalDofMap()389 const HYPRE_BigInt *GetFaceNbrGlobalDofMap() { return face_nbr_glob_dof_map; } GetFaceNbrElementTransformation(int i) const390 ElementTransformation *GetFaceNbrElementTransformation(int i) const 391 { return pmesh->GetFaceNbrElementTransformation(i); } 392 393 void Lose_Dof_TrueDof_Matrix(); LoseDofOffsets()394 void LoseDofOffsets() { dof_offsets.LoseData(); } LoseTrueDofOffsets()395 void LoseTrueDofOffsets() { tdof_offsets.LoseData(); } 396 Conforming() const397 bool Conforming() const { return pmesh->pncmesh == NULL && !nonconf_P; } Nonconforming() const398 bool Nonconforming() const { return pmesh->pncmesh != NULL || nonconf_P; } 399 400 // Transfer parallel true-dof data from coarse_fes, defined on a coarse mesh, 401 // to this FE space, defined on a refined mesh. See full documentation in the 402 // base class, FiniteElementSpace::GetTrueTransferOperator. 403 virtual void GetTrueTransferOperator(const FiniteElementSpace &coarse_fes, 404 OperatorHandle &T) const; 405 406 /** Reflect changes in the mesh. Calculate one of the refinement/derefinement 407 /rebalance matrices, unless want_transform is false. */ 408 virtual void Update(bool want_transform = true); 409 410 /// Free ParGridFunction transformation matrix (if any), to save memory. UpdatesFinished()411 virtual void UpdatesFinished() 412 { 413 FiniteElementSpace::UpdatesFinished(); 414 old_dof_offsets.DeleteAll(); 415 } 416 ~ParFiniteElementSpace()417 virtual ~ParFiniteElementSpace() { Destroy(); } 418 419 void PrintPartitionStats(); 420 421 /// Obsolete, kept for backward compatibility TrueVSize() const422 int TrueVSize() const { return ltdof_size; } 423 }; 424 425 426 /// Auxiliary class used by ParFiniteElementSpace. 427 class ConformingProlongationOperator : public Operator 428 { 429 protected: 430 Array<int> external_ldofs; 431 const GroupCommunicator &gc; 432 bool local; 433 434 public: 435 ConformingProlongationOperator(int lsize, const GroupCommunicator &gc_, 436 bool local_=false); 437 438 ConformingProlongationOperator(const ParFiniteElementSpace &pfes, 439 bool local_=false); 440 441 const GroupCommunicator &GetGroupCommunicator() const; 442 443 virtual void Mult(const Vector &x, Vector &y) const; 444 445 virtual void MultTranspose(const Vector &x, Vector &y) const; 446 }; 447 448 /// Auxiliary device class used by ParFiniteElementSpace. 449 class DeviceConformingProlongationOperator: public 450 ConformingProlongationOperator 451 { 452 protected: 453 bool mpi_gpu_aware; 454 Array<int> shr_ltdof, ext_ldof; 455 mutable Vector shr_buf, ext_buf; 456 Memory<int> shr_buf_offsets, ext_buf_offsets; 457 Array<int> ltdof_ldof, unq_ltdof; 458 Array<int> unq_shr_i, unq_shr_j; 459 MPI_Request *requests; 460 461 // Kernel: copy ltdofs from 'src' to 'shr_buf' - prepare for send. 462 // shr_buf[i] = src[shr_ltdof[i]] 463 void BcastBeginCopy(const Vector &src) const; 464 465 // Kernel: copy ltdofs from 'src' to ldofs in 'dst'. 466 // dst[ltdof_ldof[i]] = src[i] 467 void BcastLocalCopy(const Vector &src, Vector &dst) const; 468 469 // Kernel: copy ext. dofs from 'ext_buf' to 'dst' - after recv. 470 // dst[ext_ldof[i]] = ext_buf[i] 471 void BcastEndCopy(Vector &dst) const; 472 473 // Kernel: copy ext. dofs from 'src' to 'ext_buf' - prepare for send. 474 // ext_buf[i] = src[ext_ldof[i]] 475 void ReduceBeginCopy(const Vector &src) const; 476 477 // Kernel: copy owned ldofs from 'src' to ltdofs in 'dst'. 478 // dst[i] = src[ltdof_ldof[i]] 479 void ReduceLocalCopy(const Vector &src, Vector &dst) const; 480 481 // Kernel: assemble dofs from 'shr_buf' into to 'dst' - after recv. 482 // dst[shr_ltdof[i]] += shr_buf[i] 483 void ReduceEndAssemble(Vector &dst) const; 484 485 public: 486 DeviceConformingProlongationOperator( 487 const GroupCommunicator &gc_, const SparseMatrix *R, bool local_=false); 488 489 DeviceConformingProlongationOperator(const ParFiniteElementSpace &pfes, 490 bool local_=false); 491 492 virtual ~DeviceConformingProlongationOperator(); 493 494 virtual void Mult(const Vector &x, Vector &y) const; 495 496 virtual void MultTranspose(const Vector &x, Vector &y) const; 497 }; 498 499 } 500 501 #endif // MFEM_USE_MPI 502 503 #endif 504