1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_PFESPACE
13 #define MFEM_PFESPACE
14 
15 #include "../config/config.hpp"
16 
17 #ifdef MFEM_USE_MPI
18 
19 #include "../linalg/hypre.hpp"
20 #include "../mesh/pmesh.hpp"
21 #include "../mesh/nurbs.hpp"
22 #include "fespace.hpp"
23 
24 namespace mfem
25 {
26 
27 /// Abstract parallel finite element space.
28 class ParFiniteElementSpace : public FiniteElementSpace
29 {
30 private:
31    /// MPI data.
32    MPI_Comm MyComm;
33    int NRanks, MyRank;
34 
35    /// Parallel mesh; #mesh points to this object as well. Not owned.
36    ParMesh *pmesh;
37    /** Parallel non-conforming mesh extension object; same as pmesh->pncmesh.
38        Not owned. */
39    ParNCMesh *pncmesh;
40 
41    /// GroupCommunicator on the local VDofs. Owned.
42    GroupCommunicator *gcomm;
43 
44    /// Number of true dofs in this processor (local true dofs).
45    mutable int ltdof_size;
46 
47    /// Number of vertex/edge/face/total ghost DOFs (nonconforming case).
48    int ngvdofs, ngedofs, ngfdofs, ngdofs;
49 
50    /// The group of each local dof.
51    Array<int> ldof_group;
52 
53    /// For a local dof: the local true dof number in the master of its group.
54    mutable Array<int> ldof_ltdof;
55 
56    /// Offsets for the dofs in each processor in global numbering.
57    mutable Array<HYPRE_BigInt> dof_offsets;
58 
59    /// Offsets for the true dofs in each processor in global numbering.
60    mutable Array<HYPRE_BigInt> tdof_offsets;
61 
62    /// Offsets for the true dofs in neighbor processor in global numbering.
63    mutable Array<HYPRE_BigInt> tdof_nb_offsets;
64 
65    /// Previous 'dof_offsets' (before Update()), column partition of T.
66    Array<HYPRE_BigInt> old_dof_offsets;
67 
68    /// The sign of the basis functions at the scalar local dofs.
69    Array<int> ldof_sign;
70 
71    /// The matrix P (interpolation from true dof to dof). Owned.
72    mutable HypreParMatrix *P;
73    /// Optimized action-only prolongation operator for conforming meshes. Owned.
74    mutable Operator *Pconf;
75 
76    /** Used to indicate that the space is nonconforming (even if the underlying
77        mesh has NULL @a ncmesh). This occurs in low-order preconditioning on
78        nonconforming meshes. */
79    bool nonconf_P;
80 
81    /// The (block-diagonal) matrix R (restriction of dof to true dof). Owned.
82    mutable SparseMatrix *R;
83    /// Optimized action-only restriction operator for conforming meshes. Owned.
84    mutable Operator *Rconf;
85    /** Transpose of R or Rconf. For conforming mesh, this is a matrix-free
86        (Device)ConformingProlongationOperator, for a non-conforming mesh
87        this is a TransposeOperator wrapping R. */
88    mutable Operator *R_transpose;
89 
pNURBSext() const90    ParNURBSExtension *pNURBSext() const
91    { return dynamic_cast<ParNURBSExtension *>(NURBSext); }
92 
93    static ParNURBSExtension *MakeLocalNURBSext(
94       const NURBSExtension *globNURBSext, const NURBSExtension *parNURBSext);
95 
GetGroupTopo() const96    GroupTopology &GetGroupTopo() const
97    { return (NURBSext) ? pNURBSext()->gtopo : pmesh->gtopo; }
98 
99    // Auxiliary method used in constructors
100    void ParInit(ParMesh *pm);
101 
102    void Construct();
103    void Destroy();
104 
105    // ldof_type = 0 : DOFs communicator, otherwise VDOFs communicator
106    void GetGroupComm(GroupCommunicator &gcomm, int ldof_type,
107                      Array<int> *ldof_sign = NULL);
108 
109    /// Construct dof_offsets and tdof_offsets using global communication.
110    void GenerateGlobalOffsets() const;
111 
112    /// Construct ldof_group and ldof_ltdof.
113    void ConstructTrueDofs();
114    void ConstructTrueNURBSDofs();
115 
116    void ApplyLDofSigns(Array<int> &dofs) const;
117    void ApplyLDofSigns(Table &el_dof) const;
118 
119    typedef NCMesh::MeshId MeshId;
120    typedef ParNCMesh::GroupId GroupId;
121 
122    void GetGhostVertexDofs(const MeshId &id, Array<int> &dofs) const;
123    void GetGhostEdgeDofs(const MeshId &edge_id, Array<int> &dofs) const;
124    void GetGhostFaceDofs(const MeshId &face_id, Array<int> &dofs) const;
125 
126    void GetGhostDofs(int entity, const MeshId &id, Array<int> &dofs) const;
127    /// Return the dofs associated with the interior of the given mesh entity.
128    void GetBareDofs(int entity, int index, Array<int> &dofs) const;
129 
130    int  PackDof(int entity, int index, int edof) const;
131    void UnpackDof(int dof, int &entity, int &index, int &edof) const;
132 
133 #ifdef MFEM_PMATRIX_STATS
134    mutable int n_msgs_sent, n_msgs_recv;
135    mutable int n_rows_sent, n_rows_recv, n_rows_fwd;
136 #endif
137 
138    void ScheduleSendRow(const struct PMatrixRow &row, int dof, GroupId group_id,
139                         std::map<int, class NeighborRowMessage> &send_msg) const;
140 
141    void ForwardRow(const struct PMatrixRow &row, int dof,
142                    GroupId group_sent_id, GroupId group_id,
143                    std::map<int, class NeighborRowMessage> &send_msg) const;
144 
145 #ifdef MFEM_DEBUG_PMATRIX
146    void DebugDumpDOFs(std::ostream &os,
147                       const SparseMatrix &deps,
148                       const Array<GroupId> &dof_group,
149                       const Array<GroupId> &dof_owner,
150                       const Array<bool> &finalized) const;
151 #endif
152 
153    /// Helper: create a HypreParMatrix from a list of PMatrixRows.
154    HypreParMatrix*
155    MakeVDimHypreMatrix(const std::vector<struct PMatrixRow> &rows,
156                        int local_rows, int local_cols,
157                        Array<HYPRE_BigInt> &row_starts,
158                        Array<HYPRE_BigInt> &col_starts) const;
159 
160    /// Build the P and R matrices.
161    void Build_Dof_TrueDof_Matrix() const;
162 
163    /** Used when the ParMesh is non-conforming, i.e. pmesh->pncmesh != NULL.
164        Constructs the matrices P and R, the DOF and true DOF offset arrays,
165        and the DOF -> true DOF map ('dof_tdof'). Returns the number of
166        vector true DOFs. All pointer arguments are optional and can be NULL. */
167    int BuildParallelConformingInterpolation(HypreParMatrix **P, SparseMatrix **R,
168                                             Array<HYPRE_BigInt> &dof_offs,
169                                             Array<HYPRE_BigInt> &tdof_offs,
170                                             Array<int> *dof_tdof,
171                                             bool partial = false) const;
172 
173    /** Calculate a GridFunction migration matrix after mesh load balancing.
174        The result is a parallel permutation matrix that can be used to update
175        all grid functions defined on this space. */
176    HypreParMatrix* RebalanceMatrix(int old_ndofs,
177                                    const Table* old_elem_dof);
178 
179    /** Calculate a GridFunction restriction matrix after mesh derefinement.
180        The matrix is constructed so that the new grid function interpolates
181        the original function, i.e., the original function is evaluated at the
182        nodes of the coarse function. */
183    HypreParMatrix* ParallelDerefinementMatrix(int old_ndofs,
184                                               const Table *old_elem_dof);
185 
186    /// Updates the internal mesh pointer. @warning @a new_mesh must be
187    /// <b>topologically identical</b> to the existing mesh. Used if the address
188    /// of the Mesh object has changed, e.g. in @a Mesh::Swap.
189    virtual void UpdateMeshPointer(Mesh *new_mesh);
190 
191    /// Copies the prolongation and restriction matrices from @a fes.
192    ///
193    /// Used for low order preconditioning on non-conforming meshes. If the DOFs
194    /// require a permutation, it will be supplied by non-NULL @a perm. NULL @a
195    /// perm indicates that no permutation is required.
196    virtual void CopyProlongationAndRestriction(const FiniteElementSpace &fes,
197                                                const Array<int> *perm);
198 
199 public:
200    // Face-neighbor data
201    // Number of face-neighbor dofs
202    int num_face_nbr_dofs;
203    // Face-neighbor-element to face-neighbor dof
204    Table face_nbr_element_dof;
205    // Face-neighbor to ldof in the face-neighbor numbering
206    Table face_nbr_ldof;
207    // The global ldof indices of the face-neighbor dofs
208    Array<HYPRE_BigInt> face_nbr_glob_dof_map;
209    // Local face-neighbor data: face-neighbor to ldof
210    Table send_face_nbr_ldof;
211 
212    /** @brief Copy constructor: deep copy all data from @a orig except the
213        ParMesh, the FiniteElementCollection, and some derived data. */
214    /** If the @a pmesh or @a fec pointers are NULL (default), then the new
215        ParFiniteElementSpace will reuse the respective pointers from @a orig. If
216        any of these pointers is not NULL, the given pointer will be used instead
217        of the one used by @a orig.
218 
219        @note The objects pointed to by the @a pmesh and @a fec parameters must
220        be either the same objects as the ones used by @a orig, or copies of
221        them. Otherwise, the behavior is undefined.
222 
223        @note Derived data objects, such as the parallel prolongation and
224        restriction operators, the update operator, and any of the face-neighbor
225        data, will not be copied, even if they are created in the @a orig object.
226    */
227    ParFiniteElementSpace(const ParFiniteElementSpace &orig,
228                          ParMesh *pmesh = NULL,
229                          const FiniteElementCollection *fec = NULL);
230 
231    /** @brief Convert/copy the *local* (Par)FiniteElementSpace @a orig to
232        ParFiniteElementSpace: deep copy all data from @a orig except the Mesh,
233        the FiniteElementCollection, and some derived data. */
234    ParFiniteElementSpace(const FiniteElementSpace &orig, ParMesh &pmesh,
235                          const FiniteElementCollection *fec = NULL);
236 
237    /** @brief Construct the *local* ParFiniteElementSpace corresponding to the
238        global FE space, @a global_fes. */
239    /** The parameter @a pm is the *local* ParMesh obtained by decomposing the
240        global Mesh used by @a global_fes. The array @a partitioning represents
241        the parallel decomposition - it maps global element ids to MPI ranks.
242        If the FiniteElementCollection, @a f, is NULL (default), the FE
243        collection used by @a global_fes will be reused. If @a f is not NULL, it
244        must be the same as, or a copy of, the FE collection used by
245        @a global_fes. */
246    ParFiniteElementSpace(ParMesh *pm, const FiniteElementSpace *global_fes,
247                          const int *partitioning,
248                          const FiniteElementCollection *f = NULL);
249 
250    ParFiniteElementSpace(ParMesh *pm, const FiniteElementCollection *f,
251                          int dim = 1, int ordering = Ordering::byNODES);
252 
253    /// Construct a NURBS FE space based on the given NURBSExtension, @a ext.
254    /** The parameter @a ext will be deleted by this constructor, replaced by a
255        ParNURBSExtension owned by the ParFiniteElementSpace.
256        @note If the pointer @a ext is NULL, this constructor is equivalent to
257        the standard constructor with the same arguments minus the
258        NURBSExtension, @a ext. */
259    ParFiniteElementSpace(ParMesh *pm, NURBSExtension *ext,
260                          const FiniteElementCollection *f,
261                          int dim = 1, int ordering = Ordering::byNODES);
262 
GetComm() const263    MPI_Comm GetComm() const { return MyComm; }
GetNRanks() const264    int GetNRanks() const { return NRanks; }
GetMyRank() const265    int GetMyRank() const { return MyRank; }
266 
GetParMesh() const267    inline ParMesh *GetParMesh() const { return pmesh; }
268 
GetDofSign(int i)269    int GetDofSign(int i)
270    { return NURBSext || Nonconforming() ? 1 : ldof_sign[VDofToDof(i)]; }
GetDofOffsets() const271    HYPRE_BigInt *GetDofOffsets()     const { return dof_offsets; }
GetTrueDofOffsets() const272    HYPRE_BigInt *GetTrueDofOffsets() const { return tdof_offsets; }
GlobalVSize() const273    HYPRE_BigInt GlobalVSize() const
274    { return Dof_TrueDof_Matrix()->GetGlobalNumRows(); }
GlobalTrueVSize() const275    HYPRE_BigInt GlobalTrueVSize() const
276    { return Dof_TrueDof_Matrix()->GetGlobalNumCols(); }
277 
278    /// Return the number of local vector true dofs.
GetTrueVSize() const279    virtual int GetTrueVSize() const { return ltdof_size; }
280 
281    /// Returns indexes of degrees of freedom in array dofs for i'th element.
282    virtual void GetElementDofs(int i, Array<int> &dofs) const;
283 
284    /// Returns indexes of degrees of freedom for i'th boundary element.
285    virtual void GetBdrElementDofs(int i, Array<int> &dofs) const;
286 
287    /** Returns the indexes of the degrees of freedom for i'th face
288        including the dofs for the edges and the vertices of the face. */
289    virtual int GetFaceDofs(int i, Array<int> &dofs, int variant = 0) const;
290 
291    /** Returns pointer to the FiniteElement in the FiniteElementCollection
292        associated with i'th element in the mesh object. If @a i is greater than
293        or equal to the number of local mesh elements, @a i will be interpreted
294        as a shifted index of a face neighbor element. */
295    virtual const FiniteElement *GetFE(int i) const;
296 
297    /** Returns an Operator that converts L-vectors to E-vectors on each face.
298        The parallel version is different from the serial one because of the
299        presence of shared faces. Shared faces are treated as interior faces,
300        the returned operator handles the communication needed to get the
301        shared face values from other MPI ranks */
302    virtual const FaceRestriction *GetFaceRestriction(
303       ElementDofOrdering e_ordering, FaceType type,
304       L2FaceValues mul = L2FaceValues::DoubleValued) const;
305 
306    void GetSharedEdgeDofs(int group, int ei, Array<int> &dofs) const;
307    void GetSharedTriangleDofs(int group, int fi, Array<int> &dofs) const;
308    void GetSharedQuadrilateralDofs(int group, int fi, Array<int> &dofs) const;
309 
310    /// The true dof-to-dof interpolation matrix
Dof_TrueDof_Matrix() const311    HypreParMatrix *Dof_TrueDof_Matrix() const
312    { if (!P) { Build_Dof_TrueDof_Matrix(); } return P; }
313 
314    /** @brief For a non-conforming mesh, construct and return the interpolation
315        matrix from the partially conforming true dofs to the local dofs. */
316    /** @note The returned pointer must be deleted by the caller. */
317    HypreParMatrix *GetPartialConformingInterpolation();
318 
319    /** Create and return a new HypreParVector on the true dofs, which is
320        owned by (i.e. it must be destroyed by) the calling function. */
NewTrueDofVector()321    HypreParVector *NewTrueDofVector()
322    { return (new HypreParVector(MyComm,GlobalTrueVSize(),GetTrueDofOffsets()));}
323 
324    /// Scale a vector of true dofs
325    void DivideByGroupSize(double *vec);
326 
327    /// Return a reference to the internal GroupCommunicator (on VDofs)
GroupComm()328    GroupCommunicator &GroupComm() { return *gcomm; }
329 
330    /// Return a const reference to the internal GroupCommunicator (on VDofs)
GroupComm() const331    const GroupCommunicator &GroupComm() const { return *gcomm; }
332 
333    /// Return a new GroupCommunicator on scalar dofs, i.e. for VDim = 1.
334    /** @note The returned pointer must be deleted by the caller. */
335    GroupCommunicator *ScalarGroupComm();
336 
337    /** @brief Given an integer array on the local degrees of freedom, perform
338        a bitwise OR between the shared dofs. */
339    /** For non-conforming mesh, synchronization is performed on the cut (aka
340        "partially conforming") space. */
341    void Synchronize(Array<int> &ldof_marker) const;
342 
343    /// Determine the boundary degrees of freedom
344    virtual void GetEssentialVDofs(const Array<int> &bdr_attr_is_ess,
345                                   Array<int> &ess_dofs,
346                                   int component = -1) const;
347 
348    /** Get a list of essential true dofs, ess_tdof_list, corresponding to the
349        boundary attributes marked in the array bdr_attr_is_ess. */
350    virtual void GetEssentialTrueDofs(const Array<int> &bdr_attr_is_ess,
351                                      Array<int> &ess_tdof_list,
352                                      int component = -1);
353 
354    /** If the given ldof is owned by the current processor, return its local
355        tdof number, otherwise return -1 */
356    int GetLocalTDofNumber(int ldof) const;
357    /// Returns the global tdof number of the given local degree of freedom
358    HYPRE_BigInt GetGlobalTDofNumber(int ldof) const;
359    /** Returns the global tdof number of the given local degree of freedom in
360        the scalar version of the current finite element space. The input should
361        be a scalar local dof. */
362    HYPRE_BigInt GetGlobalScalarTDofNumber(int sldof);
363 
364    HYPRE_BigInt GetMyDofOffset() const;
365    HYPRE_BigInt GetMyTDofOffset() const;
366 
367    virtual const Operator *GetProlongationMatrix() const;
368    /** @brief Return logical transpose of restriction matrix, but in
369        non-assembled optimized matrix-free form.
370 
371        The implementation is like GetProlongationMatrix, but it sets local
372        DOFs to the true DOF values if owned locally, otherwise zero. */
373    virtual const Operator *GetRestrictionTransposeOperator() const;
374    /** Get an Operator that performs the action of GetRestrictionMatrix(),
375        but potentially with a non-assembled optimized matrix-free
376        implementation. */
377    virtual const Operator *GetRestrictionOperator() const;
378    /// Get the R matrix which restricts a local dof vector to true dof vector.
GetRestrictionMatrix() const379    virtual const SparseMatrix *GetRestrictionMatrix() const
380    { Dof_TrueDof_Matrix(); return R; }
381 
382    // Face-neighbor functions
383    void ExchangeFaceNbrData();
GetFaceNbrVSize() const384    int GetFaceNbrVSize() const { return num_face_nbr_dofs; }
385    void GetFaceNbrElementVDofs(int i, Array<int> &vdofs) const;
386    void GetFaceNbrFaceVDofs(int i, Array<int> &vdofs) const;
387    const FiniteElement *GetFaceNbrFE(int i) const;
388    const FiniteElement *GetFaceNbrFaceFE(int i) const;
GetFaceNbrGlobalDofMap()389    const HYPRE_BigInt *GetFaceNbrGlobalDofMap() { return face_nbr_glob_dof_map; }
GetFaceNbrElementTransformation(int i) const390    ElementTransformation *GetFaceNbrElementTransformation(int i) const
391    { return pmesh->GetFaceNbrElementTransformation(i); }
392 
393    void Lose_Dof_TrueDof_Matrix();
LoseDofOffsets()394    void LoseDofOffsets() { dof_offsets.LoseData(); }
LoseTrueDofOffsets()395    void LoseTrueDofOffsets() { tdof_offsets.LoseData(); }
396 
Conforming() const397    bool Conforming() const { return pmesh->pncmesh == NULL && !nonconf_P; }
Nonconforming() const398    bool Nonconforming() const { return pmesh->pncmesh != NULL || nonconf_P; }
399 
400    // Transfer parallel true-dof data from coarse_fes, defined on a coarse mesh,
401    // to this FE space, defined on a refined mesh. See full documentation in the
402    // base class, FiniteElementSpace::GetTrueTransferOperator.
403    virtual void GetTrueTransferOperator(const FiniteElementSpace &coarse_fes,
404                                         OperatorHandle &T) const;
405 
406    /** Reflect changes in the mesh. Calculate one of the refinement/derefinement
407        /rebalance matrices, unless want_transform is false. */
408    virtual void Update(bool want_transform = true);
409 
410    /// Free ParGridFunction transformation matrix (if any), to save memory.
UpdatesFinished()411    virtual void UpdatesFinished()
412    {
413       FiniteElementSpace::UpdatesFinished();
414       old_dof_offsets.DeleteAll();
415    }
416 
~ParFiniteElementSpace()417    virtual ~ParFiniteElementSpace() { Destroy(); }
418 
419    void PrintPartitionStats();
420 
421    /// Obsolete, kept for backward compatibility
TrueVSize() const422    int TrueVSize() const { return ltdof_size; }
423 };
424 
425 
426 /// Auxiliary class used by ParFiniteElementSpace.
427 class ConformingProlongationOperator : public Operator
428 {
429 protected:
430    Array<int> external_ldofs;
431    const GroupCommunicator &gc;
432    bool local;
433 
434 public:
435    ConformingProlongationOperator(int lsize, const GroupCommunicator &gc_,
436                                   bool local_=false);
437 
438    ConformingProlongationOperator(const ParFiniteElementSpace &pfes,
439                                   bool local_=false);
440 
441    const GroupCommunicator &GetGroupCommunicator() const;
442 
443    virtual void Mult(const Vector &x, Vector &y) const;
444 
445    virtual void MultTranspose(const Vector &x, Vector &y) const;
446 };
447 
448 /// Auxiliary device class used by ParFiniteElementSpace.
449 class DeviceConformingProlongationOperator: public
450    ConformingProlongationOperator
451 {
452 protected:
453    bool mpi_gpu_aware;
454    Array<int> shr_ltdof, ext_ldof;
455    mutable Vector shr_buf, ext_buf;
456    Memory<int> shr_buf_offsets, ext_buf_offsets;
457    Array<int> ltdof_ldof, unq_ltdof;
458    Array<int> unq_shr_i, unq_shr_j;
459    MPI_Request *requests;
460 
461    // Kernel: copy ltdofs from 'src' to 'shr_buf' - prepare for send.
462    //         shr_buf[i] = src[shr_ltdof[i]]
463    void BcastBeginCopy(const Vector &src) const;
464 
465    // Kernel: copy ltdofs from 'src' to ldofs in 'dst'.
466    //         dst[ltdof_ldof[i]] = src[i]
467    void BcastLocalCopy(const Vector &src, Vector &dst) const;
468 
469    // Kernel: copy ext. dofs from 'ext_buf' to 'dst' - after recv.
470    //         dst[ext_ldof[i]] = ext_buf[i]
471    void BcastEndCopy(Vector &dst) const;
472 
473    // Kernel: copy ext. dofs from 'src' to 'ext_buf' - prepare for send.
474    //         ext_buf[i] = src[ext_ldof[i]]
475    void ReduceBeginCopy(const Vector &src) const;
476 
477    // Kernel: copy owned ldofs from 'src' to ltdofs in 'dst'.
478    //         dst[i] = src[ltdof_ldof[i]]
479    void ReduceLocalCopy(const Vector &src, Vector &dst) const;
480 
481    // Kernel: assemble dofs from 'shr_buf' into to 'dst' - after recv.
482    //         dst[shr_ltdof[i]] += shr_buf[i]
483    void ReduceEndAssemble(Vector &dst) const;
484 
485 public:
486    DeviceConformingProlongationOperator(
487       const GroupCommunicator &gc_, const SparseMatrix *R, bool local_=false);
488 
489    DeviceConformingProlongationOperator(const ParFiniteElementSpace &pfes,
490                                         bool local_=false);
491 
492    virtual ~DeviceConformingProlongationOperator();
493 
494    virtual void Mult(const Vector &x, Vector &y) const;
495 
496    virtual void MultTranspose(const Vector &x, Vector &y) const;
497 };
498 
499 }
500 
501 #endif // MFEM_USE_MPI
502 
503 #endif
504