1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_PNCMESH
13 #define MFEM_PNCMESH
14 
15 #include "../config/config.hpp"
16 
17 #ifdef MFEM_USE_MPI
18 
19 #include <map>
20 #include <set>
21 
22 #include "ncmesh.hpp"
23 #include "../general/communication.hpp"
24 #include "../general/sort_pairs.hpp"
25 
26 namespace mfem
27 {
28 
29 class FiniteElementSpace;
30 
31 
32 /** \brief A parallel extension of the NCMesh class.
33  *
34  *  The basic idea (and assumption) is that all processors share the coarsest
35  *  layer ("root elements"). This has the advantage that refinements can easily
36  *  be exchanged between processors when rebalancing since individual elements
37  *  can be uniquely identified by the index of the root element and a path in
38  *  the refinement tree.
39  *
40  *  Each leaf element is owned by one of the processors (NCMesh::Element::rank).
41  *  The underlying NCMesh stores not only elements for the current ('MyRank')
42  *  processor, but also a minimal layer of adjacent "ghost" elements owned by
43  *  other processors. The ghost layer is synchronized after refinement.
44  *
45  *  The ghost layer contains all vertex-, edge- and face-neighbors of the
46  *  current processor's region. It is used to determine constraining relations
47  *  and ownership of DOFs on the processor boundary. Ghost elements are never
48  *  seen by the rest of MFEM as they are skipped when a Mesh is created from
49  *  the NCMesh.
50  *
51  *  The processor that owns a vertex, edge or a face (and in turn its DOFs) is
52  *  currently defined to be the one with the lowest rank in the group of
53  *  processors that share the entity.
54  *
55  *  Vertices, edges and faces that are not owned by this ('MyRank') processor
56  *  are ghosts, and are numbered after all real vertices/edges/faces, i.e.,
57  *  they have indices greater than NVertices, NEdges, NFaces, respectively.
58  *
59  *  A shared vertex/edge/face is identified in an interprocessor message by a
60  *  pair of numbers. The first number specifies an element in an ElementSet
61  *  (typically sent at the beginning of the message) that contains the v/e/f.
62  *  The second number is the local index of the v/e/f in that element.
63  */
64 class ParNCMesh : public NCMesh
65 {
66 public:
67    /// Construct by partitioning a serial NCMesh.
68    /** SFC partitioning is used by default. A user-specified partition can be
69        passed in 'part', where part[i] is the desired MPI rank for element i. */
70    ParNCMesh(MPI_Comm comm, const NCMesh& ncmesh, int* part = NULL);
71 
72    /** Load from a stream, parallel version. See the serial NCMesh::NCMesh
73        counterpart for a description of the parameters. */
74    ParNCMesh(MPI_Comm comm, std::istream &input,
75              int version, int &curved, int &is_nc);
76 
77    /// Deep copy of another instance.
78    ParNCMesh(const ParNCMesh &other);
79 
80    virtual ~ParNCMesh();
81 
82    /** An override of NCMesh::Refine, which is called eventually, after making
83        sure that refinements that occur on the processor boundary are sent to
84        the neighbor processors so they can keep their ghost layers up to date.*/
85    virtual void Refine(const Array<Refinement> &refinements);
86 
87    /// Parallel version of NCMesh::LimitNCLevel.
88    virtual void LimitNCLevel(int max_nc_level);
89 
90    /** Parallel version of NCMesh::CheckDerefinementNCLevel. */
91    virtual void CheckDerefinementNCLevel(const Table &deref_table,
92                                          Array<int> &level_ok, int max_nc_level);
93 
94    /** Parallel reimplementation of NCMesh::Derefine, keeps ghost layers
95        in sync. The interface is identical. */
96    virtual void Derefine(const Array<int> &derefs);
97 
98    /** Migrate leaf elements of the global refinement hierarchy (including ghost
99        elements) so that each processor owns the same number of leaves (+-1).
100        The default partitioning strategy is based on equal splitting of the
101        space-filling sequence of leaf elements (custom_partition == NULL).
102        Alternatively, a used-defined element-rank assignment array can be
103        passed. */
104    void Rebalance(const Array<int> *custom_partition = NULL);
105 
106 
107    // interface for ParFiniteElementSpace
108 
GetNElements() const109    int GetNElements() const { return NElements; }
110 
GetNGhostVertices() const111    int GetNGhostVertices() const { return NGhostVertices; }
GetNGhostEdges() const112    int GetNGhostEdges() const { return NGhostEdges; }
GetNGhostFaces() const113    int GetNGhostFaces() const { return NGhostFaces; }
GetNGhostElements() const114    int GetNGhostElements() const { return NGhostElements; }
115 
116    // Return a list of vertices/edges/faces shared by this processor and at
117    // least one other processor. These are subsets of NCMesh::<entity>_list. */
GetSharedVertices()118    const NCList& GetSharedVertices() { GetVertexList(); return shared_vertices; }
GetSharedEdges()119    const NCList& GetSharedEdges() { GetEdgeList(); return shared_edges; }
GetSharedFaces()120    const NCList& GetSharedFaces() { GetFaceList(); return shared_faces; }
121 
122    /// Helper to get shared vertices/edges/faces ('entity' == 0/1/2 resp.).
GetSharedList(int entity)123    const NCList& GetSharedList(int entity)
124    {
125       switch (entity)
126       {
127          case 0: return GetSharedVertices();
128          case 1: return GetSharedEdges();
129          default: return GetSharedFaces();
130       }
131    }
132 
133    /// Return (shared) face orientation relative to its owner element.
GetFaceOrientation(int index) const134    int GetFaceOrientation(int index) const
135    {
136       return (index < NFaces) ? face_orient[index] : 0;
137    }
138 
139    typedef short GroupId;
140    typedef std::vector<int> CommGroup;
141 
142    /// Return vertex/edge/face ('entity' == 0/1/2, resp.) owner.
GetEntityOwnerId(int entity,int index)143    GroupId GetEntityOwnerId(int entity, int index)
144    {
145       MFEM_ASSERT(entity >= 0 && entity < 3, "");
146       MFEM_ASSERT(index >= 0, "");
147       if (!entity_owner[entity].Size())
148       {
149          GetSharedList(entity);
150       }
151       return entity_owner[entity][index];
152    }
153 
154    /** Return the P matrix communication group ID for a vertex/edge/face.
155        The groups are calculated specifically to match the P matrix
156        construction algorithm and its communication pattern. */
GetEntityGroupId(int entity,int index)157    GroupId GetEntityGroupId(int entity, int index)
158    {
159       MFEM_ASSERT(entity >= 0 && entity < 3, "");
160       MFEM_ASSERT(index >= 0, "");
161       if (!entity_pmat_group[entity].Size())
162       {
163          CalculatePMatrixGroups();
164       }
165       return entity_pmat_group[entity][index];
166    }
167 
168    /// Return a list of ranks contained in the group of the given ID.
GetGroup(GroupId id) const169    const CommGroup& GetGroup(GroupId id) const
170    {
171       MFEM_ASSERT(id >= 0, "");
172       return groups[id];
173    }
174 
175    /// Return true if group 'id' contains the given rank.
176    bool GroupContains(GroupId id, int rank) const;
177 
178    /// Return true if the specified vertex/edge/face is a ghost.
IsGhost(int entity,int index) const179    bool IsGhost(int entity, int index) const
180    {
181       if (index < 0) // special case prism edge-face constraint
182       {
183          MFEM_ASSERT(entity == 2, "");
184          entity = 1;
185          index = -1 - index;
186       }
187       switch (entity)
188       {
189          case 0: return index >= NVertices;
190          case 1: return index >= NEdges;
191          default: return index >= NFaces;
192       }
193    }
194 
195    /** Returns owner processor for element 'index'. This is normally MyRank but
196        for index >= NElements (i.e., for ghosts) it may be something else. */
ElementRank(int index) const197    int ElementRank(int index) const
198    {
199       return elements[leaf_elements[index]].rank;
200    }
201 
202 
203    // utility
204 
GetMyRank() const205    int GetMyRank() const { return MyRank; }
206 
207    /// Use the communication pattern from last Rebalance() to send element DOFs.
208    void SendRebalanceDofs(int old_ndofs, const Table &old_element_dofs,
209                           long old_global_offset, FiniteElementSpace* space);
210 
211    /// Receive element DOFs sent by SendRebalanceDofs().
212    void RecvRebalanceDofs(Array<int> &elements, Array<long> &dofs);
213 
214    /** Get previous indices (pre-Rebalance) of current elements. Index of -1
215        indicates that an element didn't exist in the mesh before. */
GetRebalanceOldIndex() const216    const Array<int>& GetRebalanceOldIndex() const { return old_index_or_rank; }
217 
218    /** Get previous (pre-Derefine) fine element ranks. This complements the
219        CoarseFineTransformations::embeddings array in parallel. */
GetDerefineOldRanks() const220    const Array<int>& GetDerefineOldRanks() const { return old_index_or_rank; }
221 
222    /** Exchange element data for derefinements that straddle processor
223        boundaries. 'elem_data' is enlarged and filled with ghost values. */
224    template<typename Type>
225    void SynchronizeDerefinementData(Array<Type> &elem_data,
226                                     const Table &deref_table);
227 
228    /** Extension of NCMesh::GetBoundaryClosure. Filters out ghost vertices and
229        ghost edges from 'bdr_vertices' and 'bdr_edges'. */
230    virtual void GetBoundaryClosure(const Array<int> &bdr_attr_is_ess,
231                                    Array<int> &bdr_vertices,
232                                    Array<int> &bdr_edges);
233 
234    /// Save memory by releasing all non-essential and cached data.
235    virtual void Trim();
236 
237    /// Return total number of bytes allocated.
238    long MemoryUsage(bool with_base = true) const;
239 
240    int PrintMemoryDetail(bool with_base = true) const;
241 
242    /** Extract a debugging Mesh containing all leaf elements, including ghosts.
243        The debug mesh will have element attributes set to element rank + 1. */
244    void GetDebugMesh(Mesh &debug_mesh) const;
245 
246 
247 protected: // interface for ParMesh
248 
249    friend class ParMesh;
250 
251    /** For compatibility with conforming code in ParMesh and ParFESpace.
252        Initializes shared structures in ParMesh: gtopo, shared_*, group_s*, s*_l*.
253        The ParMesh then acts as a parallel mesh cut along the NC interfaces. */
254    void GetConformingSharedStructures(class ParMesh &pmesh);
255 
256    /** Populate face neighbor members of ParMesh from the ghost layer, without
257        communication. */
258    void GetFaceNeighbors(class ParMesh &pmesh);
259 
260 
261 protected: // implementation
262 
263    MPI_Comm MyComm;
264    int NRanks;
265 
266    typedef std::vector<CommGroup> GroupList;
267    typedef std::map<CommGroup, GroupId> GroupMap;
268 
269    GroupList groups;  // comm group list; NOTE: groups[0] = { MyRank }
270    GroupMap group_id; // search index over groups
271 
272    // owner rank for each vertex, edge and face (encoded as singleton group)
273    Array<GroupId> entity_owner[3];
274    // P matrix comm pattern groups for each vertex/edge/face (0/1/2)
275    Array<GroupId> entity_pmat_group[3];
276 
277    // ParMesh-compatible (conforming) groups for each vertex/edge/face (0/1/2)
278    Array<GroupId> entity_conf_group[3];
279    // ParMesh compatibility helper arrays to order groups, also temporary
280    Array<int> entity_elem_local[3];
281 
282    // lists of vertices/edges/faces shared by us and at least one more processor
283    NCList shared_vertices, shared_edges, shared_faces;
284 
285    Array<char> face_orient; // see CalcFaceOrientations
286 
287    /** Type of each leaf element:
288          1 - our element (rank == MyRank),
289          3 - our element, and neighbor to the ghost layer,
290          2 - ghost layer element (existing element, but rank != MyRank),
291          0 - element beyond the ghost layer, may not be a real element.
292        Note: indexed by Element::index. See also UpdateLayers(). */
293    Array<char> element_type;
294 
295    Array<int> ghost_layer;    ///< list of elements whose 'element_type' == 2.
296    Array<int> boundary_layer; ///< list of type 3 elements
297 
298    virtual void Update();
299 
300    /// Return the processor number for a global element number.
Partition(long index,long total_elements) const301    int Partition(long index, long total_elements) const
302    { return index * NRanks / total_elements; }
303 
304    /// Helper to get the partitioning when the serial mesh gets split initially
InitialPartition(int index) const305    int InitialPartition(int index) const
306    { return Partition(index, leaf_elements.Size()); }
307 
308    /// Return the global index of the first element owned by processor 'rank'.
PartitionFirstIndex(int rank,long total_elements) const309    long PartitionFirstIndex(int rank, long total_elements) const
310    { return (rank * total_elements + NRanks-1) / NRanks; }
311 
312    virtual void BuildFaceList();
313    virtual void BuildEdgeList();
314    virtual void BuildVertexList();
315 
316    virtual void ElementSharesFace(int elem, int local, int face);
317    virtual void ElementSharesEdge(int elem, int local, int enode);
318    virtual void ElementSharesVertex(int elem, int local, int vnode);
319 
320    GroupId GetGroupId(const CommGroup &group);
321    GroupId GetSingletonGroup(int rank);
322 
323    Array<int> tmp_owner; // temporary
324    Array<char> tmp_shared_flag; // temporary
325    Array<Connection> entity_index_rank[3]; // temporary
326 
327    void InitOwners(int num, Array<GroupId> &entity_owner);
328    void MakeSharedList(const NCList &list, NCList &shared);
329 
330    void AddConnections(int entity, int index, const Array<int> &ranks);
331    void CalculatePMatrixGroups();
332    void CreateGroups(int nentities, Array<Connection> &index_rank,
333                      Array<GroupId> &entity_group);
334 
335    static int get_face_orientation(Face &face, Element &e1, Element &e2,
336                                    int local[2] = NULL /* optional output */);
337    void CalcFaceOrientations();
338 
339    void UpdateLayers();
340 
341    void MakeSharedTable(int ngroups, int ent, Array<int> &shared_local,
342                         Table &group_shared, Array<char> *entity_geom = NULL,
343                         char geom = 0);
344 
345    /** Uniquely encodes a set of leaf elements in the refinement hierarchy of
346        an NCMesh. Can be dumped to a stream, sent to another processor, loaded,
347        and decoded to identify the same set of elements (refinements) in a
348        different but compatible NCMesh. The encoding can optionally include
349        the refinement types needed to reach the leaves, so the element set can
350        be decoded (recreated) even if the receiver has an incomplete tree. */
351    class ElementSet
352    {
353    public:
ElementSet(NCMesh * ncmesh=NULL,bool include_ref_types=false)354       ElementSet(NCMesh *ncmesh = NULL, bool include_ref_types = false)
355          : ncmesh(ncmesh), include_ref_types(include_ref_types) {}
356       ElementSet(const ElementSet &other);
357 
358       void Encode(const Array<int> &elements);
359       void Dump(std::ostream &os) const;
360 
361       void Load(std::istream &is);
362       void Decode(Array<int> &elements) const;
363 
SetNCMesh(NCMesh * ncmesh)364       void SetNCMesh(NCMesh *ncmesh) { this->ncmesh = ncmesh; }
GetNCMesh() const365       const NCMesh* GetNCMesh() const { return ncmesh; }
366 
367    protected:
368       Array<unsigned char> data; ///< encoded refinement (sub-)trees
369       NCMesh* ncmesh;
370       bool include_ref_types;
371 
372       void EncodeTree(int elem);
373       void DecodeTree(int elem, int &pos, Array<int> &elements) const;
374 
375       void WriteInt(int value);
376       int  GetInt(int pos) const;
377       void FlagElements(const Array<int> &elements, char flag);
378 
379 #ifdef MFEM_DEBUG
380       mutable Array<int> ref_path;
381       std::string RefPath() const;
382 #endif
383    };
384 
385    /** Adjust some of the MeshIds before encoding for recipient 'rank', so that
386        they only reference elements that exist in the recipient's ref. tree. */
387    void AdjustMeshIds(Array<MeshId> ids[], int rank);
388 
389    void ChangeVertexMeshIdElement(NCMesh::MeshId &id, int elem);
390    void ChangeEdgeMeshIdElement(NCMesh::MeshId &id, int elem);
391    void ChangeRemainingMeshIds(Array<MeshId> &ids, int pos,
392                                const Array<Pair<int, int> > &find);
393 
394    // Write/read a processor-independent encoding of vertex/edge/face IDs.
395    void EncodeMeshIds(std::ostream &os, Array<MeshId> ids[]);
396    void DecodeMeshIds(std::istream &is, Array<MeshId> ids[]);
397 
398    // Write/read comm groups and a list of their IDs.
399    void EncodeGroups(std::ostream &os, const Array<GroupId> &ids);
400    void DecodeGroups(std::istream &is, Array<GroupId> &ids);
401 
402    bool CheckElementType(int elem, int type);
403 
404    Array<int> tmp_neighbors; // temporary, used by ElementNeighborProcessors
405 
406    /** Return a list of processors that own elements in the immediate
407        neighborhood of 'elem' (i.e., vertex, edge and face neighbors),
408        and are not 'MyRank'. */
409    void ElementNeighborProcessors(int elem, Array<int> &ranks);
410 
411    /** Get a list of ranks that own elements in the neighborhood of our region.
412        NOTE: MyRank is not included. */
413    void NeighborProcessors(Array<int> &neighbors);
414 
415    /** Traverse the (local) refinement tree and determine which subtrees are
416        no longer needed, i.e., their leaves are not owned by us nor are they our
417        ghosts. These subtrees are then derefined. */
418    void Prune();
419 
420    /// Internal. Recursive part of Prune().
421    bool PruneTree(int elem);
422 
423 
424    /** A base for internal messages used by Refine(), Derefine() and Rebalance().
425     *  Allows sending values associated with elements in a set.
426     *  If RefType == true, the element set is recreated on the receiving end.
427     */
428    template<class ValueType, bool RefTypes, int Tag>
429    class ElementValueMessage : public VarMessage<Tag>
430    {
431    public:
432       using VarMessage<Tag>::data;
433       std::vector<int> elements;
434       std::vector<ValueType> values;
435 
Size() const436       int Size() const { return elements.size(); }
Reserve(int size)437       void Reserve(int size) { elements.reserve(size); values.reserve(size); }
438 
Add(int elem,ValueType val)439       void Add(int elem, ValueType val)
440       { elements.push_back(elem); values.push_back(val); }
441 
442       /// Set pointer to ParNCMesh (needed to encode the message).
SetNCMesh(ParNCMesh * pncmesh)443       void SetNCMesh(ParNCMesh* pncmesh) { this->pncmesh = pncmesh; }
444 
ElementValueMessage()445       ElementValueMessage() : pncmesh(NULL) {}
446 
447    protected:
448       ParNCMesh* pncmesh;
449 
450       virtual void Encode(int);
451       virtual void Decode(int);
452    };
453 
454    /** Used by ParNCMesh::Refine() to inform neighbors about refinements at
455     *  the processor boundary. This keeps their ghost layers synchronized.
456     */
457    class NeighborRefinementMessage : public ElementValueMessage<char, false, 289>
458    {
459    public:
AddRefinement(int elem,char ref_type)460       void AddRefinement(int elem, char ref_type) { Add(elem, ref_type); }
461       typedef std::map<int, NeighborRefinementMessage> Map;
462    };
463 
464    /** Used by ParNCMesh::Derefine() to keep the ghost layers synchronized.
465     */
466    class NeighborDerefinementMessage : public ElementValueMessage<int, false, 290>
467    {
468    public:
AddDerefinement(int elem,int rank)469       void AddDerefinement(int elem, int rank) { Add(elem, rank); }
470       typedef std::map<int, NeighborDerefinementMessage> Map;
471    };
472 
473    /** Used in Step 2 of Rebalance() to synchronize new rank assignments in
474     *  the ghost layer.
475     */
476    class NeighborElementRankMessage : public ElementValueMessage<int, false, 156>
477    {
478    public:
AddElementRank(int elem,int rank)479       void AddElementRank(int elem, int rank) { Add(elem, rank); }
480       typedef std::map<int, NeighborElementRankMessage> Map;
481    };
482 
483    /** Used by Rebalance() to send elements and their ranks. Note that
484     *  RefTypes == true which means the refinement hierarchy will be recreated
485     *  on the receiving side.
486     */
487    class RebalanceMessage : public ElementValueMessage<int, true, 157>
488    {
489    public:
AddElementRank(int elem,int rank)490       void AddElementRank(int elem, int rank) { Add(elem, rank); }
491       typedef std::map<int, RebalanceMessage> Map;
492    };
493 
494    /** Allows migrating element data (DOFs) after Rebalance().
495     *  Used by SendRebalanceDofs and RecvRebalanceDofs.
496     */
497    class RebalanceDofMessage : public VarMessage<158>
498    {
499    public:
500       std::vector<int> elem_ids, dofs;
501       long dof_offset;
502 
503       void SetElements(const Array<int> &elems, NCMesh *ncmesh);
SetNCMesh(NCMesh * ncmesh)504       void SetNCMesh(NCMesh* ncmesh) { eset.SetNCMesh(ncmesh); }
505       long MemoryUsage() const;
506 
507       typedef std::map<int, RebalanceDofMessage> Map;
508 
509    protected:
510       ElementSet eset;
511 
512       virtual void Encode(int);
513       virtual void Decode(int);
514    };
515 
516    /** Assign new Element::rank to leaf elements and send them to their new
517        owners, keeping the ghost layer up to date. Used by Rebalance() and
518        Derefine(). 'target_elements' is the number of elements this rank
519        is supposed to own after the exchange. If this number is not known
520        a priori, the parameter can be set to -1, but more expensive communication
521        (synchronous sends and a barrier) will be used in that case. */
522    void RedistributeElements(Array<int> &new_ranks, int target_elements,
523                              bool record_comm);
524 
525    /** Recorded communication pattern from last Rebalance. Used by
526        Send/RecvRebalanceDofs to ship element DOFs. */
527    RebalanceDofMessage::Map send_rebalance_dofs;
528    RebalanceDofMessage::Map recv_rebalance_dofs;
529 
530    /** After Rebalance, this array holds the old element indices, or -1 if an
531        element didn't exist in the mesh previously. After Derefine, it holds
532        the ranks of the old (potentially non-existent) fine elements. */
533    Array<int> old_index_or_rank;
534 
535    /// Stores modified point matrices created by GetFaceNeighbors
536    Array<DenseMatrix*> aux_pm_store;
537    void ClearAuxPM();
538 
539    long GroupsMemoryUsage() const;
540 
541    friend class NeighborRowMessage;
542 };
543 
544 
545 
546 // comparison operator so that MeshId can be used as key in std::map
operator <(const NCMesh::MeshId & a,const NCMesh::MeshId & b)547 inline bool operator< (const NCMesh::MeshId &a, const NCMesh::MeshId &b)
548 {
549    return a.index < b.index;
550 }
551 
552 // equality of MeshId is based on 'index' (element/local are not unique)
operator ==(const NCMesh::MeshId & a,const NCMesh::MeshId & b)553 inline bool operator== (const NCMesh::MeshId &a, const NCMesh::MeshId &b)
554 {
555    return a.index == b.index;
556 }
557 
558 } // namespace mfem
559 
560 #endif // MFEM_USE_MPI
561 
562 #endif // MFEM_PNCMESH
563