1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../config/config.hpp"
13 
14 #ifdef MFEM_USE_MPI
15 
16 #include "pfespace.hpp"
17 #include "prestriction.hpp"
18 #include "../general/forall.hpp"
19 #include "../general/sort_pairs.hpp"
20 #include "../mesh/mesh_headers.hpp"
21 #include "../general/binaryio.hpp"
22 
23 #include <limits>
24 #include <list>
25 
26 namespace mfem
27 {
28 
ParFiniteElementSpace(const ParFiniteElementSpace & orig,ParMesh * pmesh,const FiniteElementCollection * fec)29 ParFiniteElementSpace::ParFiniteElementSpace(
30    const ParFiniteElementSpace &orig, ParMesh *pmesh,
31    const FiniteElementCollection *fec)
32    : FiniteElementSpace(orig, pmesh, fec)
33 {
34    ParInit(pmesh ? pmesh : orig.pmesh);
35 }
36 
ParFiniteElementSpace(const FiniteElementSpace & orig,ParMesh & pmesh,const FiniteElementCollection * fec)37 ParFiniteElementSpace::ParFiniteElementSpace(
38    const FiniteElementSpace &orig, ParMesh &pmesh,
39    const FiniteElementCollection *fec)
40    : FiniteElementSpace(orig, &pmesh, fec)
41 {
42    ParInit(&pmesh);
43 }
44 
ParFiniteElementSpace(ParMesh * pm,const FiniteElementSpace * global_fes,const int * partitioning,const FiniteElementCollection * f)45 ParFiniteElementSpace::ParFiniteElementSpace(
46    ParMesh *pm, const FiniteElementSpace *global_fes, const int *partitioning,
47    const FiniteElementCollection *f)
48    : FiniteElementSpace(pm, MakeLocalNURBSext(global_fes->GetNURBSext(),
49                                               pm->NURBSext),
50                         f ? f : global_fes->FEColl(),
51                         global_fes->GetVDim(), global_fes->GetOrdering())
52 {
53    ParInit(pm);
54    // For NURBS spaces, the variable-order data is contained in the
55    // NURBSExtension of 'global_fes' and inside the ParNURBSExtension of 'pm'.
56 
57    // TODO: when general variable-order support is added, copy the local portion
58    // of the variable-order data from 'global_fes' to 'this'.
59 }
60 
ParFiniteElementSpace(ParMesh * pm,const FiniteElementCollection * f,int dim,int ordering)61 ParFiniteElementSpace::ParFiniteElementSpace(
62    ParMesh *pm, const FiniteElementCollection *f, int dim, int ordering)
63    : FiniteElementSpace(pm, f, dim, ordering)
64 {
65    ParInit(pm);
66 }
67 
ParFiniteElementSpace(ParMesh * pm,NURBSExtension * ext,const FiniteElementCollection * f,int dim,int ordering)68 ParFiniteElementSpace::ParFiniteElementSpace(
69    ParMesh *pm, NURBSExtension *ext, const FiniteElementCollection *f,
70    int dim, int ordering)
71    : FiniteElementSpace(pm, ext, f, dim, ordering)
72 {
73    ParInit(pm);
74 }
75 
76 // static method
MakeLocalNURBSext(const NURBSExtension * globNURBSext,const NURBSExtension * parNURBSext)77 ParNURBSExtension *ParFiniteElementSpace::MakeLocalNURBSext(
78    const NURBSExtension *globNURBSext, const NURBSExtension *parNURBSext)
79 {
80    if (globNURBSext == NULL) { return NULL; }
81    const ParNURBSExtension *pNURBSext =
82       dynamic_cast<const ParNURBSExtension*>(parNURBSext);
83    MFEM_ASSERT(pNURBSext, "need a ParNURBSExtension");
84    // make a copy of globNURBSext:
85    NURBSExtension *tmp_globNURBSext = new NURBSExtension(*globNURBSext);
86    // tmp_globNURBSext will be deleted by the following ParNURBSExtension ctor:
87    return new ParNURBSExtension(tmp_globNURBSext, pNURBSext);
88 }
89 
ParInit(ParMesh * pm)90 void ParFiniteElementSpace::ParInit(ParMesh *pm)
91 {
92    pmesh = pm;
93    pncmesh = NULL;
94 
95    MyComm = pmesh->GetComm();
96    NRanks = pmesh->GetNRanks();
97    MyRank = pmesh->GetMyRank();
98 
99    gcomm = NULL;
100 
101    P = NULL;
102    Pconf = NULL;
103    nonconf_P = false;
104    Rconf = NULL;
105    R_transpose = NULL;
106    R = NULL;
107 
108    num_face_nbr_dofs = -1;
109 
110    if (NURBSext && !pNURBSext())
111    {
112       // This is necessary in some cases: e.g. when the FiniteElementSpace
113       // constructor creates a serial NURBSExtension of higher order than the
114       // mesh NURBSExtension.
115       MFEM_ASSERT(own_ext, "internal error");
116 
117       ParNURBSExtension *pNe = new ParNURBSExtension(
118          NURBSext, dynamic_cast<ParNURBSExtension *>(pmesh->NURBSext));
119       // serial NURBSext is destroyed by the above constructor
120       NURBSext = pNe;
121       UpdateNURBS();
122    }
123 
124    Construct(); // parallel version of Construct().
125 
126    // Apply the ldof_signs to the elem_dof Table
127    if (Conforming() && !NURBSext)
128    {
129       ApplyLDofSigns(*elem_dof);
130    }
131 }
132 
Construct()133 void ParFiniteElementSpace::Construct()
134 {
135    MFEM_VERIFY(!IsVariableOrder(), "variable orders are not implemented"
136                " for ParFiniteElementSpace yet.");
137 
138    if (NURBSext)
139    {
140       ConstructTrueNURBSDofs();
141       GenerateGlobalOffsets();
142    }
143    else if (Conforming())
144    {
145       ConstructTrueDofs();
146       GenerateGlobalOffsets();
147    }
148    else // Nonconforming()
149    {
150       pncmesh = pmesh->pncmesh;
151 
152       // Initialize 'gcomm' for the cut (aka "partially conforming") space.
153       // In the process, the array 'ldof_ltdof' is also initialized (for the cut
154       // space) and used; however, it will be overwritten below with the real
155       // true dofs. Also, 'ldof_sign' and 'ldof_group' are constructed for the
156       // cut space.
157       ConstructTrueDofs();
158 
159       ngedofs = ngfdofs = 0;
160 
161       // calculate number of ghost DOFs
162       ngvdofs = pncmesh->GetNGhostVertices()
163                 * fec->DofForGeometry(Geometry::POINT);
164 
165       if (pmesh->Dimension() > 1)
166       {
167          ngedofs = pncmesh->GetNGhostEdges()
168                    * fec->DofForGeometry(Geometry::SEGMENT);
169       }
170 
171       if (pmesh->Dimension() > 2)
172       {
173          int stride = fec->DofForGeometry(Geometry::SQUARE);
174          ngfdofs = pncmesh->GetNGhostFaces() * stride;
175       }
176 
177       // total number of ghost DOFs. Ghost DOFs start at index 'ndofs', i.e.,
178       // after all regular DOFs
179       ngdofs = ngvdofs + ngedofs + ngfdofs;
180 
181       // get P and R matrices, initialize DOF offsets, etc. NOTE: in the NC
182       // case this needs to be done here to get the number of true DOFs
183       ltdof_size = BuildParallelConformingInterpolation(
184                       &P, &R, dof_offsets, tdof_offsets, &ldof_ltdof, false);
185 
186       // TODO future: split BuildParallelConformingInterpolation into two parts
187       // to overlap its communication with processing between this constructor
188       // and the point where the P matrix is actually needed.
189    }
190 }
191 
PrintPartitionStats()192 void ParFiniteElementSpace::PrintPartitionStats()
193 {
194    long ltdofs = ltdof_size;
195    long min_ltdofs, max_ltdofs, sum_ltdofs;
196 
197    MPI_Reduce(&ltdofs, &min_ltdofs, 1, MPI_LONG, MPI_MIN, 0, MyComm);
198    MPI_Reduce(&ltdofs, &max_ltdofs, 1, MPI_LONG, MPI_MAX, 0, MyComm);
199    MPI_Reduce(&ltdofs, &sum_ltdofs, 1, MPI_LONG, MPI_SUM, 0, MyComm);
200 
201    if (MyRank == 0)
202    {
203       double avg = double(sum_ltdofs) / NRanks;
204       mfem::out << "True DOF partitioning: min " << min_ltdofs
205                 << ", avg " << std::fixed << std::setprecision(1) << avg
206                 << ", max " << max_ltdofs
207                 << ", (max-avg)/avg " << 100.0*(max_ltdofs - avg)/avg
208                 << "%" << std::endl;
209    }
210 
211    if (NRanks <= 32)
212    {
213       if (MyRank == 0)
214       {
215          mfem::out << "True DOFs by rank: " << ltdofs;
216          for (int i = 1; i < NRanks; i++)
217          {
218             MPI_Status status;
219             MPI_Recv(&ltdofs, 1, MPI_LONG, i, 123, MyComm, &status);
220             mfem::out << " " << ltdofs;
221          }
222          mfem::out << "\n";
223       }
224       else
225       {
226          MPI_Send(&ltdofs, 1, MPI_LONG, 0, 123, MyComm);
227       }
228    }
229 }
230 
GetGroupComm(GroupCommunicator & gc,int ldof_type,Array<int> * ldof_sign)231 void ParFiniteElementSpace::GetGroupComm(
232    GroupCommunicator &gc, int ldof_type, Array<int> *ldof_sign)
233 {
234    int gr;
235    int ng = pmesh->GetNGroups();
236    int nvd, ned, ntd = 0, nqd = 0;
237    Array<int> dofs;
238 
239    int group_ldof_counter;
240    Table &group_ldof = gc.GroupLDofTable();
241 
242    nvd = fec->DofForGeometry(Geometry::POINT);
243    ned = fec->DofForGeometry(Geometry::SEGMENT);
244 
245    if (mesh->Dimension() >= 3)
246    {
247       if (mesh->HasGeometry(Geometry::TRIANGLE))
248       {
249          ntd = fec->DofForGeometry(Geometry::TRIANGLE);
250       }
251       if (mesh->HasGeometry(Geometry::SQUARE))
252       {
253          nqd = fec->DofForGeometry(Geometry::SQUARE);
254       }
255    }
256 
257    if (ldof_sign)
258    {
259       ldof_sign->SetSize(GetNDofs());
260       *ldof_sign = 1;
261    }
262 
263    // count the number of ldofs in all groups (excluding the local group 0)
264    group_ldof_counter = 0;
265    for (gr = 1; gr < ng; gr++)
266    {
267       group_ldof_counter += nvd * pmesh->GroupNVertices(gr);
268       group_ldof_counter += ned * pmesh->GroupNEdges(gr);
269       group_ldof_counter += ntd * pmesh->GroupNTriangles(gr);
270       group_ldof_counter += nqd * pmesh->GroupNQuadrilaterals(gr);
271    }
272    if (ldof_type)
273    {
274       group_ldof_counter *= vdim;
275    }
276    // allocate the I and J arrays in group_ldof
277    group_ldof.SetDims(ng, group_ldof_counter);
278 
279    // build the full group_ldof table
280    group_ldof_counter = 0;
281    group_ldof.GetI()[0] = group_ldof.GetI()[1] = 0;
282    for (gr = 1; gr < ng; gr++)
283    {
284       int j, k, l, m, o, nv, ne, nt, nq;
285       const int *ind;
286 
287       nv = pmesh->GroupNVertices(gr);
288       ne = pmesh->GroupNEdges(gr);
289       nt = pmesh->GroupNTriangles(gr);
290       nq = pmesh->GroupNQuadrilaterals(gr);
291 
292       // vertices
293       if (nvd > 0)
294       {
295          for (j = 0; j < nv; j++)
296          {
297             k = pmesh->GroupVertex(gr, j);
298 
299             dofs.SetSize(nvd);
300             m = nvd * k;
301             for (l = 0; l < nvd; l++, m++)
302             {
303                dofs[l] = m;
304             }
305 
306             if (ldof_type)
307             {
308                DofsToVDofs(dofs);
309             }
310 
311             for (l = 0; l < dofs.Size(); l++)
312             {
313                group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
314             }
315          }
316       }
317 
318       // edges
319       if (ned > 0)
320       {
321          for (j = 0; j < ne; j++)
322          {
323             pmesh->GroupEdge(gr, j, k, o);
324 
325             dofs.SetSize(ned);
326             m = nvdofs+k*ned;
327             ind = fec->DofOrderForOrientation(Geometry::SEGMENT, o);
328             for (l = 0; l < ned; l++)
329             {
330                if (ind[l] < 0)
331                {
332                   dofs[l] = m + (-1-ind[l]);
333                   if (ldof_sign)
334                   {
335                      (*ldof_sign)[dofs[l]] = -1;
336                   }
337                }
338                else
339                {
340                   dofs[l] = m + ind[l];
341                }
342             }
343 
344             if (ldof_type)
345             {
346                DofsToVDofs(dofs);
347             }
348 
349             for (l = 0; l < dofs.Size(); l++)
350             {
351                group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
352             }
353          }
354       }
355 
356       // triangles
357       if (ntd > 0)
358       {
359          for (j = 0; j < nt; j++)
360          {
361             pmesh->GroupTriangle(gr, j, k, o);
362 
363             dofs.SetSize(ntd);
364             m = nvdofs + nedofs + FirstFaceDof(k);
365             ind = fec->DofOrderForOrientation(Geometry::TRIANGLE, o);
366             for (l = 0; l < ntd; l++)
367             {
368                if (ind[l] < 0)
369                {
370                   dofs[l] = m + (-1-ind[l]);
371                   if (ldof_sign)
372                   {
373                      (*ldof_sign)[dofs[l]] = -1;
374                   }
375                }
376                else
377                {
378                   dofs[l] = m + ind[l];
379                }
380             }
381 
382             if (ldof_type)
383             {
384                DofsToVDofs(dofs);
385             }
386 
387             for (l = 0; l < dofs.Size(); l++)
388             {
389                group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
390             }
391          }
392       }
393 
394       // quadrilaterals
395       if (nqd > 0)
396       {
397          for (j = 0; j < nq; j++)
398          {
399             pmesh->GroupQuadrilateral(gr, j, k, o);
400 
401             dofs.SetSize(nqd);
402             m = nvdofs + nedofs + FirstFaceDof(k);
403             ind = fec->DofOrderForOrientation(Geometry::SQUARE, o);
404             for (l = 0; l < nqd; l++)
405             {
406                if (ind[l] < 0)
407                {
408                   dofs[l] = m + (-1-ind[l]);
409                   if (ldof_sign)
410                   {
411                      (*ldof_sign)[dofs[l]] = -1;
412                   }
413                }
414                else
415                {
416                   dofs[l] = m + ind[l];
417                }
418             }
419 
420             if (ldof_type)
421             {
422                DofsToVDofs(dofs);
423             }
424 
425             for (l = 0; l < dofs.Size(); l++)
426             {
427                group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
428             }
429          }
430       }
431 
432       group_ldof.GetI()[gr+1] = group_ldof_counter;
433    }
434 
435    gc.Finalize();
436 }
437 
ApplyLDofSigns(Array<int> & dofs) const438 void ParFiniteElementSpace::ApplyLDofSigns(Array<int> &dofs) const
439 {
440    MFEM_ASSERT(Conforming(), "wrong code path");
441 
442    for (int i = 0; i < dofs.Size(); i++)
443    {
444       if (dofs[i] < 0)
445       {
446          if (ldof_sign[-1-dofs[i]] < 0)
447          {
448             dofs[i] = -1-dofs[i];
449          }
450       }
451       else
452       {
453          if (ldof_sign[dofs[i]] < 0)
454          {
455             dofs[i] = -1-dofs[i];
456          }
457       }
458    }
459 }
460 
ApplyLDofSigns(Table & el_dof) const461 void ParFiniteElementSpace::ApplyLDofSigns(Table &el_dof) const
462 {
463    Array<int> all_dofs(el_dof.GetJ(), el_dof.Size_of_connections());
464    ApplyLDofSigns(all_dofs);
465 }
466 
GetElementDofs(int i,Array<int> & dofs) const467 void ParFiniteElementSpace::GetElementDofs(int i, Array<int> &dofs) const
468 {
469    if (elem_dof)
470    {
471       elem_dof->GetRow(i, dofs);
472       return;
473    }
474    FiniteElementSpace::GetElementDofs(i, dofs);
475    if (Conforming())
476    {
477       ApplyLDofSigns(dofs);
478    }
479 }
480 
GetBdrElementDofs(int i,Array<int> & dofs) const481 void ParFiniteElementSpace::GetBdrElementDofs(int i, Array<int> &dofs) const
482 {
483    if (bdr_elem_dof)
484    {
485       bdr_elem_dof->GetRow(i, dofs);
486       return;
487    }
488    FiniteElementSpace::GetBdrElementDofs(i, dofs);
489    if (Conforming())
490    {
491       ApplyLDofSigns(dofs);
492    }
493 }
494 
GetFaceDofs(int i,Array<int> & dofs,int variant) const495 int ParFiniteElementSpace::GetFaceDofs(int i, Array<int> &dofs,
496                                        int variant) const
497 {
498    if (face_dof && variant == 0)
499    {
500       face_dof->GetRow(i, dofs);
501       return fec->GetOrder();
502    }
503    int p = FiniteElementSpace::GetFaceDofs(i, dofs, variant);
504    if (Conforming())
505    {
506       ApplyLDofSigns(dofs);
507    }
508    return p;
509 }
510 
GetFE(int i) const511 const FiniteElement *ParFiniteElementSpace::GetFE(int i) const
512 {
513    int ne = mesh->GetNE();
514    if (i >= ne) { return GetFaceNbrFE(i - ne); }
515    else { return FiniteElementSpace::GetFE(i); }
516 }
517 
GetFaceRestriction(ElementDofOrdering e_ordering,FaceType type,L2FaceValues mul) const518 const FaceRestriction *ParFiniteElementSpace::GetFaceRestriction(
519    ElementDofOrdering e_ordering, FaceType type, L2FaceValues mul) const
520 {
521    const bool is_dg_space = IsDGSpace();
522    const L2FaceValues m = (is_dg_space && mul==L2FaceValues::DoubleValued) ?
523                           L2FaceValues::DoubleValued : L2FaceValues::SingleValued;
524    auto key = std::make_tuple(is_dg_space, e_ordering, type, m);
525    auto itr = L2F.find(key);
526    if (itr != L2F.end())
527    {
528       return itr->second;
529    }
530    else
531    {
532       FaceRestriction *res;
533       if (is_dg_space)
534       {
535          res = new ParL2FaceRestriction(*this, e_ordering, type, m);
536       }
537       else
538       {
539          res = new H1FaceRestriction(*this, e_ordering, type);
540       }
541       L2F[key] = res;
542       return res;
543    }
544 }
545 
GetSharedEdgeDofs(int group,int ei,Array<int> & dofs) const546 void ParFiniteElementSpace::GetSharedEdgeDofs(
547    int group, int ei, Array<int> &dofs) const
548 {
549    int l_edge, ori;
550    MFEM_ASSERT(0 <= ei && ei < pmesh->GroupNEdges(group), "invalid edge index");
551    pmesh->GroupEdge(group, ei, l_edge, ori);
552    if (ori > 0) // ori = +1 or -1
553    {
554       GetEdgeDofs(l_edge, dofs);
555    }
556    else
557    {
558       Array<int> rdofs;
559       fec->SubDofOrder(Geometry::SEGMENT, 1, 1, dofs);
560       GetEdgeDofs(l_edge, rdofs);
561       for (int i = 0; i < dofs.Size(); i++)
562       {
563          const int di = dofs[i];
564          dofs[i] = (di >= 0) ? rdofs[di] : -1-rdofs[-1-di];
565       }
566    }
567 }
568 
GetSharedTriangleDofs(int group,int fi,Array<int> & dofs) const569 void ParFiniteElementSpace::GetSharedTriangleDofs(
570    int group, int fi, Array<int> &dofs) const
571 {
572    int l_face, ori;
573    MFEM_ASSERT(0 <= fi && fi < pmesh->GroupNTriangles(group),
574                "invalid triangular face index");
575    pmesh->GroupTriangle(group, fi, l_face, ori);
576    if (ori == 0)
577    {
578       GetFaceDofs(l_face, dofs);
579    }
580    else
581    {
582       Array<int> rdofs;
583       fec->SubDofOrder(Geometry::TRIANGLE, 2, ori, dofs);
584       GetFaceDofs(l_face, rdofs);
585       for (int i = 0; i < dofs.Size(); i++)
586       {
587          const int di = dofs[i];
588          dofs[i] = (di >= 0) ? rdofs[di] : -1-rdofs[-1-di];
589       }
590    }
591 }
592 
GetSharedQuadrilateralDofs(int group,int fi,Array<int> & dofs) const593 void ParFiniteElementSpace::GetSharedQuadrilateralDofs(
594    int group, int fi, Array<int> &dofs) const
595 {
596    int l_face, ori;
597    MFEM_ASSERT(0 <= fi && fi < pmesh->GroupNQuadrilaterals(group),
598                "invalid quadrilateral face index");
599    pmesh->GroupQuadrilateral(group, fi, l_face, ori);
600    if (ori == 0)
601    {
602       GetFaceDofs(l_face, dofs);
603    }
604    else
605    {
606       Array<int> rdofs;
607       fec->SubDofOrder(Geometry::SQUARE, 2, ori, dofs);
608       GetFaceDofs(l_face, rdofs);
609       for (int i = 0; i < dofs.Size(); i++)
610       {
611          const int di = dofs[i];
612          dofs[i] = (di >= 0) ? rdofs[di] : -1-rdofs[-1-di];
613       }
614    }
615 }
616 
GenerateGlobalOffsets() const617 void ParFiniteElementSpace::GenerateGlobalOffsets() const
618 {
619    MFEM_ASSERT(Conforming(), "wrong code path");
620 
621    HYPRE_BigInt ldof[2];
622    Array<HYPRE_BigInt> *offsets[2] = { &dof_offsets, &tdof_offsets };
623 
624    ldof[0] = GetVSize();
625    ldof[1] = TrueVSize();
626 
627    pmesh->GenerateOffsets(2, ldof, offsets);
628 
629    if (HYPRE_AssumedPartitionCheck())
630    {
631       // communicate the neighbor offsets in tdof_nb_offsets
632       GroupTopology &gt = GetGroupTopo();
633       int nsize = gt.GetNumNeighbors()-1;
634       MPI_Request *requests = new MPI_Request[2*nsize];
635       MPI_Status  *statuses = new MPI_Status[2*nsize];
636       tdof_nb_offsets.SetSize(nsize+1);
637       tdof_nb_offsets[0] = tdof_offsets[0];
638 
639       // send and receive neighbors' local tdof offsets
640       int request_counter = 0;
641       for (int i = 1; i <= nsize; i++)
642       {
643          MPI_Irecv(&tdof_nb_offsets[i], 1, HYPRE_MPI_BIG_INT,
644                    gt.GetNeighborRank(i), 5365, MyComm,
645                    &requests[request_counter++]);
646       }
647       for (int i = 1; i <= nsize; i++)
648       {
649          MPI_Isend(&tdof_nb_offsets[0], 1, HYPRE_MPI_BIG_INT,
650                    gt.GetNeighborRank(i), 5365, MyComm,
651                    &requests[request_counter++]);
652       }
653       MPI_Waitall(request_counter, requests, statuses);
654 
655       delete [] statuses;
656       delete [] requests;
657    }
658 }
659 
Build_Dof_TrueDof_Matrix() const660 void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P
661 {
662    MFEM_ASSERT(Conforming(), "wrong code path");
663 
664    if (P) { return; }
665 
666    int ldof  = GetVSize();
667    int ltdof = TrueVSize();
668 
669    HYPRE_Int *i_diag = Memory<HYPRE_Int>(ldof+1);
670    HYPRE_Int *j_diag = Memory<HYPRE_Int>(ltdof);
671    int diag_counter;
672 
673    HYPRE_Int *i_offd = Memory<HYPRE_Int>(ldof+1);
674    HYPRE_Int *j_offd = Memory<HYPRE_Int>(ldof-ltdof);
675    int offd_counter;
676 
677    HYPRE_BigInt *cmap = Memory<HYPRE_BigInt>(ldof-ltdof);
678 
679    HYPRE_BigInt *col_starts = GetTrueDofOffsets();
680    HYPRE_BigInt *row_starts = GetDofOffsets();
681 
682    Array<Pair<HYPRE_BigInt, int> > cmap_j_offd(ldof-ltdof);
683 
684    i_diag[0] = i_offd[0] = 0;
685    diag_counter = offd_counter = 0;
686    for (int i = 0; i < ldof; i++)
687    {
688       int ltdof = GetLocalTDofNumber(i);
689       if (ltdof >= 0)
690       {
691          j_diag[diag_counter++] = ltdof;
692       }
693       else
694       {
695          cmap_j_offd[offd_counter].one = GetGlobalTDofNumber(i);
696          cmap_j_offd[offd_counter].two = offd_counter;
697          offd_counter++;
698       }
699       i_diag[i+1] = diag_counter;
700       i_offd[i+1] = offd_counter;
701    }
702 
703    SortPairs<HYPRE_BigInt, int>(cmap_j_offd, offd_counter);
704 
705    for (int i = 0; i < offd_counter; i++)
706    {
707       cmap[i] = cmap_j_offd[i].one;
708       j_offd[cmap_j_offd[i].two] = i;
709    }
710 
711    P = new HypreParMatrix(MyComm, MyRank, NRanks, row_starts, col_starts,
712                           i_diag, j_diag, i_offd, j_offd, cmap, offd_counter);
713 
714    SparseMatrix Pdiag;
715    P->GetDiag(Pdiag);
716    R = Transpose(Pdiag);
717 }
718 
GetPartialConformingInterpolation()719 HypreParMatrix *ParFiniteElementSpace::GetPartialConformingInterpolation()
720 {
721    HypreParMatrix *P_pc;
722    Array<HYPRE_BigInt> P_pc_row_starts, P_pc_col_starts;
723    BuildParallelConformingInterpolation(&P_pc, NULL, P_pc_row_starts,
724                                         P_pc_col_starts, NULL, true);
725    P_pc->CopyRowStarts();
726    P_pc->CopyColStarts();
727    return P_pc;
728 }
729 
DivideByGroupSize(double * vec)730 void ParFiniteElementSpace::DivideByGroupSize(double *vec)
731 {
732    GroupTopology &gt = GetGroupTopo();
733    for (int i = 0; i < ldof_group.Size(); i++)
734    {
735       if (gt.IAmMaster(ldof_group[i])) // we are the master
736       {
737          if (ldof_ltdof[i] >= 0) // see note below
738          {
739             vec[ldof_ltdof[i]] /= gt.GetGroupSize(ldof_group[i]);
740          }
741          // NOTE: in NC meshes, ldof_ltdof generated for the gtopo
742          // groups by ConstructTrueDofs gets overwritten by
743          // BuildParallelConformingInterpolation. Some DOFs that are
744          // seen as true by the conforming code are actually slaves and
745          // end up with a -1 in ldof_ltdof.
746       }
747    }
748 }
749 
ScalarGroupComm()750 GroupCommunicator *ParFiniteElementSpace::ScalarGroupComm()
751 {
752    GroupCommunicator *gc = new GroupCommunicator(GetGroupTopo());
753    if (NURBSext)
754    {
755       gc->Create(pNURBSext()->ldof_group);
756    }
757    else
758    {
759       GetGroupComm(*gc, 0);
760    }
761    return gc;
762 }
763 
Synchronize(Array<int> & ldof_marker) const764 void ParFiniteElementSpace::Synchronize(Array<int> &ldof_marker) const
765 {
766    // For non-conforming mesh, synchronization is performed on the cut (aka
767    // "partially conforming") space.
768 
769    MFEM_VERIFY(ldof_marker.Size() == GetVSize(), "invalid in/out array");
770 
771    // implement allreduce(|) as reduce(|) + broadcast
772    gcomm->Reduce<int>(ldof_marker, GroupCommunicator::BitOR);
773    gcomm->Bcast(ldof_marker);
774 }
775 
GetEssentialVDofs(const Array<int> & bdr_attr_is_ess,Array<int> & ess_dofs,int component) const776 void ParFiniteElementSpace::GetEssentialVDofs(const Array<int> &bdr_attr_is_ess,
777                                               Array<int> &ess_dofs,
778                                               int component) const
779 {
780    FiniteElementSpace::GetEssentialVDofs(bdr_attr_is_ess, ess_dofs, component);
781 
782    if (Conforming())
783    {
784       // Make sure that processors without boundary elements mark
785       // their boundary dofs (if they have any).
786       Synchronize(ess_dofs);
787    }
788 }
789 
GetEssentialTrueDofs(const Array<int> & bdr_attr_is_ess,Array<int> & ess_tdof_list,int component)790 void ParFiniteElementSpace::GetEssentialTrueDofs(const Array<int>
791                                                  &bdr_attr_is_ess,
792                                                  Array<int> &ess_tdof_list,
793                                                  int component)
794 {
795    Array<int> ess_dofs, true_ess_dofs;
796 
797    GetEssentialVDofs(bdr_attr_is_ess, ess_dofs, component);
798    GetRestrictionMatrix()->BooleanMult(ess_dofs, true_ess_dofs);
799 
800 #ifdef MFEM_DEBUG
801    // Verify that in boolean arithmetic: P^T ess_dofs = R ess_dofs.
802    Array<int> true_ess_dofs2(true_ess_dofs.Size());
803    HypreParMatrix *Pt = Dof_TrueDof_Matrix()->Transpose();
804    const int *ess_dofs_data = ess_dofs.HostRead();
805    Pt->BooleanMult(1, ess_dofs_data, 0, true_ess_dofs2);
806    delete Pt;
807    int counter = 0;
808    const int *ted = true_ess_dofs.HostRead();
809    for (int i = 0; i < true_ess_dofs.Size(); i++)
810    {
811       if (bool(ted[i]) != bool(true_ess_dofs2[i])) { counter++; }
812    }
813    MFEM_VERIFY(counter == 0, "internal MFEM error: counter = " << counter);
814 #endif
815 
816    MarkerToList(true_ess_dofs, ess_tdof_list);
817 }
818 
GetLocalTDofNumber(int ldof) const819 int ParFiniteElementSpace::GetLocalTDofNumber(int ldof) const
820 {
821    if (Nonconforming())
822    {
823       Dof_TrueDof_Matrix(); // make sure P has been built
824 
825       return ldof_ltdof[ldof]; // NOTE: contains -1 for slaves/DOFs we don't own
826    }
827    else
828    {
829       if (GetGroupTopo().IAmMaster(ldof_group[ldof]))
830       {
831          return ldof_ltdof[ldof];
832       }
833       else
834       {
835          return -1;
836       }
837    }
838 }
839 
GetGlobalTDofNumber(int ldof) const840 HYPRE_BigInt ParFiniteElementSpace::GetGlobalTDofNumber(int ldof) const
841 {
842    if (Nonconforming())
843    {
844       MFEM_VERIFY(ldof_ltdof[ldof] >= 0, "ldof " << ldof << " not a true DOF.");
845 
846       return GetMyTDofOffset() + ldof_ltdof[ldof];
847    }
848    else
849    {
850       if (HYPRE_AssumedPartitionCheck())
851       {
852          return ldof_ltdof[ldof] +
853                 tdof_nb_offsets[GetGroupTopo().GetGroupMaster(ldof_group[ldof])];
854       }
855       else
856       {
857          return ldof_ltdof[ldof] +
858                 tdof_offsets[GetGroupTopo().GetGroupMasterRank(ldof_group[ldof])];
859       }
860    }
861 }
862 
GetGlobalScalarTDofNumber(int sldof)863 HYPRE_BigInt ParFiniteElementSpace::GetGlobalScalarTDofNumber(int sldof)
864 {
865    if (Nonconforming())
866    {
867       MFEM_ABORT("Not implemented for NC mesh.");
868    }
869 
870    if (HYPRE_AssumedPartitionCheck())
871    {
872       if (ordering == Ordering::byNODES)
873       {
874          return ldof_ltdof[sldof] +
875                 tdof_nb_offsets[GetGroupTopo().GetGroupMaster(
876                                    ldof_group[sldof])] / vdim;
877       }
878       else
879       {
880          return (ldof_ltdof[sldof*vdim] +
881                  tdof_nb_offsets[GetGroupTopo().GetGroupMaster(
882                                     ldof_group[sldof*vdim])]) / vdim;
883       }
884    }
885 
886    if (ordering == Ordering::byNODES)
887    {
888       return ldof_ltdof[sldof] +
889              tdof_offsets[GetGroupTopo().GetGroupMasterRank(
890                              ldof_group[sldof])] / vdim;
891    }
892    else
893    {
894       return (ldof_ltdof[sldof*vdim] +
895               tdof_offsets[GetGroupTopo().GetGroupMasterRank(
896                               ldof_group[sldof*vdim])]) / vdim;
897    }
898 }
899 
GetMyDofOffset() const900 HYPRE_BigInt ParFiniteElementSpace::GetMyDofOffset() const
901 {
902    return HYPRE_AssumedPartitionCheck() ? dof_offsets[0] : dof_offsets[MyRank];
903 }
904 
GetMyTDofOffset() const905 HYPRE_BigInt ParFiniteElementSpace::GetMyTDofOffset() const
906 {
907    return HYPRE_AssumedPartitionCheck()? tdof_offsets[0] : tdof_offsets[MyRank];
908 }
909 
GetProlongationMatrix() const910 const Operator *ParFiniteElementSpace::GetProlongationMatrix() const
911 {
912    if (Conforming())
913    {
914       if (Pconf) { return Pconf; }
915 
916       if (NRanks == 1)
917       {
918          Pconf = new IdentityOperator(GetTrueVSize());
919       }
920       else
921       {
922          if (!Device::Allows(Backend::DEVICE_MASK))
923          {
924             Pconf = new ConformingProlongationOperator(*this);
925          }
926          else
927          {
928             Pconf = new DeviceConformingProlongationOperator(*this);
929          }
930       }
931       return Pconf;
932    }
933    else
934    {
935       return Dof_TrueDof_Matrix();
936    }
937 }
938 
GetRestrictionOperator() const939 const Operator *ParFiniteElementSpace::GetRestrictionOperator() const
940 {
941    if (Conforming())
942    {
943       if (Rconf) { return Rconf; }
944 
945       if (NRanks == 1)
946       {
947          R_transpose = new IdentityOperator(GetTrueVSize());
948       }
949       else
950       {
951          if (!Device::Allows(Backend::DEVICE_MASK))
952          {
953             R_transpose = new ConformingProlongationOperator(*this, true);
954          }
955          else
956          {
957             R_transpose =
958                new DeviceConformingProlongationOperator(*this, true);
959          }
960       }
961       Rconf = new TransposeOperator(R_transpose);
962       return Rconf;
963    }
964    else
965    {
966       Dof_TrueDof_Matrix();
967       R_transpose = new TransposeOperator(R);
968       return R;
969    }
970 }
971 
GetRestrictionTransposeOperator() const972 const Operator *ParFiniteElementSpace::GetRestrictionTransposeOperator() const
973 {
974    GetRestrictionOperator();
975    return R_transpose;
976 }
977 
ExchangeFaceNbrData()978 void ParFiniteElementSpace::ExchangeFaceNbrData()
979 {
980    if (num_face_nbr_dofs >= 0) { return; }
981 
982    pmesh->ExchangeFaceNbrData();
983 
984    int num_face_nbrs = pmesh->GetNFaceNeighbors();
985 
986    if (num_face_nbrs == 0)
987    {
988       num_face_nbr_dofs = 0;
989       return;
990    }
991 
992    MPI_Request *requests = new MPI_Request[2*num_face_nbrs];
993    MPI_Request *send_requests = requests;
994    MPI_Request *recv_requests = requests + num_face_nbrs;
995    MPI_Status  *statuses = new MPI_Status[num_face_nbrs];
996 
997    Array<int> ldofs;
998    Array<int> ldof_marker(GetVSize());
999    ldof_marker = -1;
1000 
1001    Table send_nbr_elem_dof;
1002 
1003    send_nbr_elem_dof.MakeI(pmesh->send_face_nbr_elements.Size_of_connections());
1004    send_face_nbr_ldof.MakeI(num_face_nbrs);
1005    face_nbr_ldof.MakeI(num_face_nbrs);
1006    int *send_el_off = pmesh->send_face_nbr_elements.GetI();
1007    int *recv_el_off = pmesh->face_nbr_elements_offset;
1008    for (int fn = 0; fn < num_face_nbrs; fn++)
1009    {
1010       int *my_elems = pmesh->send_face_nbr_elements.GetRow(fn);
1011       int  num_my_elems = pmesh->send_face_nbr_elements.RowSize(fn);
1012 
1013       for (int i = 0; i < num_my_elems; i++)
1014       {
1015          GetElementVDofs(my_elems[i], ldofs);
1016          for (int j = 0; j < ldofs.Size(); j++)
1017          {
1018             int ldof = (ldofs[j] >= 0 ? ldofs[j] : -1-ldofs[j]);
1019 
1020             if (ldof_marker[ldof] != fn)
1021             {
1022                ldof_marker[ldof] = fn;
1023                send_face_nbr_ldof.AddAColumnInRow(fn);
1024             }
1025          }
1026          send_nbr_elem_dof.AddColumnsInRow(send_el_off[fn] + i, ldofs.Size());
1027       }
1028 
1029       int nbr_rank = pmesh->GetFaceNbrRank(fn);
1030       int tag = 0;
1031       MPI_Isend(&send_face_nbr_ldof.GetI()[fn], 1, MPI_INT, nbr_rank, tag,
1032                 MyComm, &send_requests[fn]);
1033 
1034       MPI_Irecv(&face_nbr_ldof.GetI()[fn], 1, MPI_INT, nbr_rank, tag,
1035                 MyComm, &recv_requests[fn]);
1036    }
1037 
1038    MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1039    face_nbr_ldof.MakeJ();
1040 
1041    num_face_nbr_dofs = face_nbr_ldof.Size_of_connections();
1042 
1043    MPI_Waitall(num_face_nbrs, send_requests, statuses);
1044    send_face_nbr_ldof.MakeJ();
1045 
1046    // send/receive the I arrays of send_nbr_elem_dof/face_nbr_element_dof,
1047    // respectively (they contain the number of dofs for each face-neighbor
1048    // element)
1049    face_nbr_element_dof.MakeI(recv_el_off[num_face_nbrs]);
1050 
1051    int *send_I = send_nbr_elem_dof.GetI();
1052    int *recv_I = face_nbr_element_dof.GetI();
1053    for (int fn = 0; fn < num_face_nbrs; fn++)
1054    {
1055       int nbr_rank = pmesh->GetFaceNbrRank(fn);
1056       int tag = 0;
1057       MPI_Isend(send_I + send_el_off[fn], send_el_off[fn+1] - send_el_off[fn],
1058                 MPI_INT, nbr_rank, tag, MyComm, &send_requests[fn]);
1059 
1060       MPI_Irecv(recv_I + recv_el_off[fn], recv_el_off[fn+1] - recv_el_off[fn],
1061                 MPI_INT, nbr_rank, tag, MyComm, &recv_requests[fn]);
1062    }
1063 
1064    MPI_Waitall(num_face_nbrs, send_requests, statuses);
1065    send_nbr_elem_dof.MakeJ();
1066 
1067    ldof_marker = -1;
1068 
1069    for (int fn = 0; fn < num_face_nbrs; fn++)
1070    {
1071       int *my_elems = pmesh->send_face_nbr_elements.GetRow(fn);
1072       int  num_my_elems = pmesh->send_face_nbr_elements.RowSize(fn);
1073 
1074       for (int i = 0; i < num_my_elems; i++)
1075       {
1076          GetElementVDofs(my_elems[i], ldofs);
1077          for (int j = 0; j < ldofs.Size(); j++)
1078          {
1079             int ldof = (ldofs[j] >= 0 ? ldofs[j] : -1-ldofs[j]);
1080 
1081             if (ldof_marker[ldof] != fn)
1082             {
1083                ldof_marker[ldof] = fn;
1084                send_face_nbr_ldof.AddConnection(fn, ldofs[j]);
1085             }
1086          }
1087          send_nbr_elem_dof.AddConnections(
1088             send_el_off[fn] + i, ldofs, ldofs.Size());
1089       }
1090    }
1091    send_face_nbr_ldof.ShiftUpI();
1092    send_nbr_elem_dof.ShiftUpI();
1093 
1094    // convert the ldof indices in send_nbr_elem_dof
1095    int *send_J = send_nbr_elem_dof.GetJ();
1096    for (int fn = 0, j = 0; fn < num_face_nbrs; fn++)
1097    {
1098       int  num_ldofs = send_face_nbr_ldof.RowSize(fn);
1099       int *ldofs     = send_face_nbr_ldof.GetRow(fn);
1100       int  j_end     = send_I[send_el_off[fn+1]];
1101 
1102       for (int i = 0; i < num_ldofs; i++)
1103       {
1104          int ldof = (ldofs[i] >= 0 ? ldofs[i] : -1-ldofs[i]);
1105          ldof_marker[ldof] = i;
1106       }
1107 
1108       for ( ; j < j_end; j++)
1109       {
1110          int ldof = (send_J[j] >= 0 ? send_J[j] : -1-send_J[j]);
1111          send_J[j] = (send_J[j] >= 0 ? ldof_marker[ldof] : -1-ldof_marker[ldof]);
1112       }
1113    }
1114 
1115    MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1116    face_nbr_element_dof.MakeJ();
1117 
1118    // send/receive the J arrays of send_nbr_elem_dof/face_nbr_element_dof,
1119    // respectively (they contain the element dofs in enumeration local for
1120    // the face-neighbor pair)
1121    int *recv_J = face_nbr_element_dof.GetJ();
1122    for (int fn = 0; fn < num_face_nbrs; fn++)
1123    {
1124       int nbr_rank = pmesh->GetFaceNbrRank(fn);
1125       int tag = 0;
1126 
1127       MPI_Isend(send_J + send_I[send_el_off[fn]],
1128                 send_I[send_el_off[fn+1]] - send_I[send_el_off[fn]],
1129                 MPI_INT, nbr_rank, tag, MyComm, &send_requests[fn]);
1130 
1131       MPI_Irecv(recv_J + recv_I[recv_el_off[fn]],
1132                 recv_I[recv_el_off[fn+1]] - recv_I[recv_el_off[fn]],
1133                 MPI_INT, nbr_rank, tag, MyComm, &recv_requests[fn]);
1134    }
1135 
1136    MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1137 
1138    // shift the J array of face_nbr_element_dof
1139    for (int fn = 0, j = 0; fn < num_face_nbrs; fn++)
1140    {
1141       int shift = face_nbr_ldof.GetI()[fn];
1142       int j_end = recv_I[recv_el_off[fn+1]];
1143 
1144       for ( ; j < j_end; j++)
1145       {
1146          if (recv_J[j] >= 0)
1147          {
1148             recv_J[j] += shift;
1149          }
1150          else
1151          {
1152             recv_J[j] -= shift;
1153          }
1154       }
1155    }
1156 
1157    MPI_Waitall(num_face_nbrs, send_requests, statuses);
1158 
1159    // send/receive the J arrays of send_face_nbr_ldof/face_nbr_ldof,
1160    // respectively
1161    for (int fn = 0; fn < num_face_nbrs; fn++)
1162    {
1163       int nbr_rank = pmesh->GetFaceNbrRank(fn);
1164       int tag = 0;
1165 
1166       MPI_Isend(send_face_nbr_ldof.GetRow(fn),
1167                 send_face_nbr_ldof.RowSize(fn),
1168                 MPI_INT, nbr_rank, tag, MyComm, &send_requests[fn]);
1169 
1170       MPI_Irecv(face_nbr_ldof.GetRow(fn),
1171                 face_nbr_ldof.RowSize(fn),
1172                 MPI_INT, nbr_rank, tag, MyComm, &recv_requests[fn]);
1173    }
1174 
1175    MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1176    MPI_Waitall(num_face_nbrs, send_requests, statuses);
1177 
1178    // send my_dof_offset (i.e. my_ldof_offset) to face neighbors and receive
1179    // their offset in dof_face_nbr_offsets, used to define face_nbr_glob_dof_map
1180    face_nbr_glob_dof_map.SetSize(num_face_nbr_dofs);
1181    Array<HYPRE_BigInt> dof_face_nbr_offsets(num_face_nbrs);
1182    HYPRE_BigInt my_dof_offset = GetMyDofOffset();
1183    for (int fn = 0; fn < num_face_nbrs; fn++)
1184    {
1185       int nbr_rank = pmesh->GetFaceNbrRank(fn);
1186       int tag = 0;
1187 
1188       MPI_Isend(&my_dof_offset, 1, HYPRE_MPI_BIG_INT, nbr_rank, tag,
1189                 MyComm, &send_requests[fn]);
1190 
1191       MPI_Irecv(&dof_face_nbr_offsets[fn], 1, HYPRE_MPI_BIG_INT, nbr_rank, tag,
1192                 MyComm, &recv_requests[fn]);
1193    }
1194 
1195    MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1196 
1197    // set the array face_nbr_glob_dof_map which holds the global ldof indices of
1198    // the face-neighbor dofs
1199    for (int fn = 0, j = 0; fn < num_face_nbrs; fn++)
1200    {
1201       for (int j_end = face_nbr_ldof.GetI()[fn+1]; j < j_end; j++)
1202       {
1203          int ldof = face_nbr_ldof.GetJ()[j];
1204          if (ldof < 0)
1205          {
1206             ldof = -1-ldof;
1207          }
1208 
1209          face_nbr_glob_dof_map[j] = dof_face_nbr_offsets[fn] + ldof;
1210       }
1211    }
1212 
1213    MPI_Waitall(num_face_nbrs, send_requests, statuses);
1214 
1215    delete [] statuses;
1216    delete [] requests;
1217 }
1218 
GetFaceNbrElementVDofs(int i,Array<int> & vdofs) const1219 void ParFiniteElementSpace::GetFaceNbrElementVDofs(
1220    int i, Array<int> &vdofs) const
1221 {
1222    face_nbr_element_dof.GetRow(i, vdofs);
1223 }
1224 
GetFaceNbrFaceVDofs(int i,Array<int> & vdofs) const1225 void ParFiniteElementSpace::GetFaceNbrFaceVDofs(int i, Array<int> &vdofs) const
1226 {
1227    // Works for NC mesh where 'i' is an index returned by
1228    // ParMesh::GetSharedFace() such that i >= Mesh::GetNumFaces(), i.e. 'i' is
1229    // the index of a ghost face.
1230    MFEM_ASSERT(Nonconforming() && i >= pmesh->GetNumFaces(), "");
1231    int el1, el2, inf1, inf2;
1232    pmesh->GetFaceElements(i, &el1, &el2);
1233    el2 = -1 - el2;
1234    pmesh->GetFaceInfos(i, &inf1, &inf2);
1235    MFEM_ASSERT(0 <= el2 && el2 < face_nbr_element_dof.Size(), "");
1236    const int nd = face_nbr_element_dof.RowSize(el2);
1237    const int *vol_vdofs = face_nbr_element_dof.GetRow(el2);
1238    const Element *face_nbr_el = pmesh->face_nbr_elements[el2];
1239    Geometry::Type geom = face_nbr_el->GetGeometryType();
1240    const int face_dim = Geometry::Dimension[geom]-1;
1241 
1242    fec->SubDofOrder(geom, face_dim, inf2, vdofs);
1243    // Convert local dofs to local vdofs.
1244    Ordering::DofsToVDofs<Ordering::byNODES>(nd/vdim, vdim, vdofs);
1245    // Convert local vdofs to global vdofs.
1246    for (int j = 0; j < vdofs.Size(); j++)
1247    {
1248       const int ldof = vdofs[j];
1249       vdofs[j] = (ldof >= 0) ? vol_vdofs[ldof] : -1-vol_vdofs[-1-ldof];
1250    }
1251 }
1252 
GetFaceNbrFE(int i) const1253 const FiniteElement *ParFiniteElementSpace::GetFaceNbrFE(int i) const
1254 {
1255    const FiniteElement *FE =
1256       fec->FiniteElementForGeometry(
1257          pmesh->face_nbr_elements[i]->GetGeometryType());
1258 
1259    if (NURBSext)
1260    {
1261       mfem_error("ParFiniteElementSpace::GetFaceNbrFE"
1262                  " does not support NURBS!");
1263    }
1264    return FE;
1265 }
1266 
GetFaceNbrFaceFE(int i) const1267 const FiniteElement *ParFiniteElementSpace::GetFaceNbrFaceFE(int i) const
1268 {
1269    // Works for NC mesh where 'i' is an index returned by
1270    // ParMesh::GetSharedFace() such that i >= Mesh::GetNumFaces(), i.e. 'i' is
1271    // the index of a ghost face.
1272    // Works in tandem with GetFaceNbrFaceVDofs() defined above.
1273 
1274    MFEM_ASSERT(Nonconforming() && !NURBSext, "");
1275    Geometry::Type face_geom = pmesh->GetFaceGeometryType(i);
1276    return fec->FiniteElementForGeometry(face_geom);
1277 }
1278 
Lose_Dof_TrueDof_Matrix()1279 void ParFiniteElementSpace::Lose_Dof_TrueDof_Matrix()
1280 {
1281    hypre_ParCSRMatrix *csrP = (hypre_ParCSRMatrix*)(*P);
1282    hypre_ParCSRMatrixOwnsRowStarts(csrP) = 1;
1283    hypre_ParCSRMatrixOwnsColStarts(csrP) = 1;
1284    P -> StealData();
1285    dof_offsets.LoseData();
1286    tdof_offsets.LoseData();
1287 }
1288 
ConstructTrueDofs()1289 void ParFiniteElementSpace::ConstructTrueDofs()
1290 {
1291    int i, gr, n = GetVSize();
1292    GroupTopology &gt = pmesh->gtopo;
1293    gcomm = new GroupCommunicator(gt);
1294    Table &group_ldof = gcomm->GroupLDofTable();
1295 
1296    GetGroupComm(*gcomm, 1, &ldof_sign);
1297 
1298    // Define ldof_group and mark ldof_ltdof with
1299    //   -1 for ldof that is ours
1300    //   -2 for ldof that is in a group with another master
1301    ldof_group.SetSize(n);
1302    ldof_ltdof.SetSize(n);
1303    ldof_group = 0;
1304    ldof_ltdof = -1;
1305 
1306    for (gr = 1; gr < group_ldof.Size(); gr++)
1307    {
1308       const int *ldofs = group_ldof.GetRow(gr);
1309       const int nldofs = group_ldof.RowSize(gr);
1310       for (i = 0; i < nldofs; i++)
1311       {
1312          ldof_group[ldofs[i]] = gr;
1313       }
1314 
1315       if (!gt.IAmMaster(gr)) // we are not the master
1316       {
1317          for (i = 0; i < nldofs; i++)
1318          {
1319             ldof_ltdof[ldofs[i]] = -2;
1320          }
1321       }
1322    }
1323 
1324    // count ltdof_size
1325    ltdof_size = 0;
1326    for (i = 0; i < n; i++)
1327    {
1328       if (ldof_ltdof[i] == -1)
1329       {
1330          ldof_ltdof[i] = ltdof_size++;
1331       }
1332    }
1333    gcomm->SetLTDofTable(ldof_ltdof);
1334 
1335    // have the group masters broadcast their ltdofs to the rest of the group
1336    gcomm->Bcast(ldof_ltdof);
1337 }
1338 
ConstructTrueNURBSDofs()1339 void ParFiniteElementSpace::ConstructTrueNURBSDofs()
1340 {
1341    int n = GetVSize();
1342    GroupTopology &gt = pNURBSext()->gtopo;
1343    gcomm = new GroupCommunicator(gt);
1344 
1345    // pNURBSext()->ldof_group is for scalar space!
1346    if (vdim == 1)
1347    {
1348       ldof_group.MakeRef(pNURBSext()->ldof_group);
1349    }
1350    else
1351    {
1352       const int *scalar_ldof_group = pNURBSext()->ldof_group;
1353       ldof_group.SetSize(n);
1354       for (int i = 0; i < n; i++)
1355       {
1356          ldof_group[i] = scalar_ldof_group[VDofToDof(i)];
1357       }
1358    }
1359 
1360    gcomm->Create(ldof_group);
1361 
1362    // ldof_sign.SetSize(n);
1363    // ldof_sign = 1;
1364    ldof_sign.DeleteAll();
1365 
1366    ltdof_size = 0;
1367    ldof_ltdof.SetSize(n);
1368    for (int i = 0; i < n; i++)
1369    {
1370       if (gt.IAmMaster(ldof_group[i]))
1371       {
1372          ldof_ltdof[i] = ltdof_size;
1373          ltdof_size++;
1374       }
1375       else
1376       {
1377          ldof_ltdof[i] = -2;
1378       }
1379    }
1380    gcomm->SetLTDofTable(ldof_ltdof);
1381 
1382    // have the group masters broadcast their ltdofs to the rest of the group
1383    gcomm->Bcast(ldof_ltdof);
1384 }
1385 
GetGhostVertexDofs(const MeshId & id,Array<int> & dofs) const1386 void ParFiniteElementSpace::GetGhostVertexDofs(const MeshId &id,
1387                                                Array<int> &dofs) const
1388 {
1389    int nv = fec->DofForGeometry(Geometry::POINT);
1390    dofs.SetSize(nv);
1391    for (int j = 0; j < nv; j++)
1392    {
1393       dofs[j] = ndofs + nv*id.index + j;
1394    }
1395 }
1396 
GetGhostEdgeDofs(const MeshId & edge_id,Array<int> & dofs) const1397 void ParFiniteElementSpace::GetGhostEdgeDofs(const MeshId &edge_id,
1398                                              Array<int> &dofs) const
1399 {
1400    int nv = fec->DofForGeometry(Geometry::POINT);
1401    int ne = fec->DofForGeometry(Geometry::SEGMENT);
1402    dofs.SetSize(2*nv + ne);
1403 
1404    int V[2], ghost = pncmesh->GetNVertices();
1405    pmesh->pncmesh->GetEdgeVertices(edge_id, V);
1406 
1407    for (int i = 0; i < 2; i++)
1408    {
1409       int k = (V[i] < ghost) ? V[i]*nv : (ndofs + (V[i] - ghost)*nv);
1410       for (int j = 0; j < nv; j++)
1411       {
1412          dofs[i*nv + j] = k++;
1413       }
1414    }
1415 
1416    int k = ndofs + ngvdofs + (edge_id.index - pncmesh->GetNEdges())*ne;
1417    for (int j = 0; j < ne; j++)
1418    {
1419       dofs[2*nv + j] = k++;
1420    }
1421 }
1422 
GetGhostFaceDofs(const MeshId & face_id,Array<int> & dofs) const1423 void ParFiniteElementSpace::GetGhostFaceDofs(const MeshId &face_id,
1424                                              Array<int> &dofs) const
1425 {
1426    int nfv, V[4], E[4], Eo[4];
1427    nfv = pmesh->pncmesh->GetFaceVerticesEdges(face_id, V, E, Eo);
1428 
1429    int nv = fec->DofForGeometry(Geometry::POINT);
1430    int ne = fec->DofForGeometry(Geometry::SEGMENT);
1431    int nf_tri = fec->DofForGeometry(Geometry::TRIANGLE);
1432    int nf_quad = fec->DofForGeometry(Geometry::SQUARE);
1433    int nf = (nfv == 3) ? nf_tri : nf_quad;
1434 
1435    dofs.SetSize(nfv*(nv + ne) + nf);
1436 
1437    int offset = 0;
1438    for (int i = 0; i < nfv; i++)
1439    {
1440       int ghost = pncmesh->GetNVertices();
1441       int first = (V[i] < ghost) ? V[i]*nv : (ndofs + (V[i] - ghost)*nv);
1442       for (int j = 0; j < nv; j++)
1443       {
1444          dofs[offset++] = first + j;
1445       }
1446    }
1447 
1448    for (int i = 0; i < nfv; i++)
1449    {
1450       int ghost = pncmesh->GetNEdges();
1451       int first = (E[i] < ghost) ? nvdofs + E[i]*ne
1452                   /*          */ : ndofs + ngvdofs + (E[i] - ghost)*ne;
1453       const int *ind = fec->DofOrderForOrientation(Geometry::SEGMENT, Eo[i]);
1454       for (int j = 0; j < ne; j++)
1455       {
1456          dofs[offset++] = (ind[j] >= 0) ? (first + ind[j])
1457                           /*         */ : (-1 - (first + (-1 - ind[j])));
1458       }
1459    }
1460 
1461    const int ghost_face_index = face_id.index - pncmesh->GetNFaces();
1462    int first = ndofs + ngvdofs + ngedofs + nf_quad*ghost_face_index;
1463 
1464    for (int j = 0; j < nf; j++)
1465    {
1466       dofs[offset++] = first + j;
1467    }
1468 }
1469 
GetGhostDofs(int entity,const MeshId & id,Array<int> & dofs) const1470 void ParFiniteElementSpace::GetGhostDofs(int entity, const MeshId &id,
1471                                          Array<int> &dofs) const
1472 {
1473    // helper to get ghost vertex, ghost edge or ghost face DOFs
1474    switch (entity)
1475    {
1476       case 0: GetGhostVertexDofs(id, dofs); break;
1477       case 1: GetGhostEdgeDofs(id, dofs); break;
1478       case 2: GetGhostFaceDofs(id, dofs); break;
1479    }
1480 }
1481 
GetBareDofs(int entity,int index,Array<int> & dofs) const1482 void ParFiniteElementSpace::GetBareDofs(int entity, int index,
1483                                         Array<int> &dofs) const
1484 {
1485    int ned, ghost, first;
1486    switch (entity)
1487    {
1488       case 0:
1489          ned = fec->DofForGeometry(Geometry::POINT);
1490          ghost = pncmesh->GetNVertices();
1491          first = (index < ghost)
1492                  ? index*ned // regular vertex
1493                  : ndofs + (index - ghost)*ned; // ghost vertex
1494          break;
1495 
1496       case 1:
1497          ned = fec->DofForGeometry(Geometry::SEGMENT);
1498          ghost = pncmesh->GetNEdges();
1499          first = (index < ghost)
1500                  ? nvdofs + index*ned // regular edge
1501                  : ndofs + ngvdofs + (index - ghost)*ned; // ghost edge
1502          break;
1503 
1504       default:
1505          Geometry::Type geom = pncmesh->GetFaceGeometry(index);
1506          MFEM_ASSERT(geom == Geometry::SQUARE ||
1507                      geom == Geometry::TRIANGLE, "");
1508 
1509          ned = fec->DofForGeometry(geom);
1510          ghost = pncmesh->GetNFaces();
1511 
1512          if (index < ghost) // regular face
1513          {
1514             first = nvdofs + nedofs + FirstFaceDof(index);
1515          }
1516          else // ghost face
1517          {
1518             index -= ghost;
1519             int stride = fec->DofForGeometry(Geometry::SQUARE);
1520             first = ndofs + ngvdofs + ngedofs + index*stride;
1521          }
1522          break;
1523    }
1524 
1525    dofs.SetSize(ned);
1526    for (int i = 0; i < ned; i++)
1527    {
1528       dofs[i] = first + i;
1529    }
1530 }
1531 
PackDof(int entity,int index,int edof) const1532 int ParFiniteElementSpace::PackDof(int entity, int index, int edof) const
1533 {
1534    // DOFs are ordered as follows:
1535    // vertices | edges | faces | internal | ghost vert. | g. edges | g. faces
1536 
1537    int ghost, ned;
1538    switch (entity)
1539    {
1540       case 0:
1541          ghost = pncmesh->GetNVertices();
1542          ned = fec->DofForGeometry(Geometry::POINT);
1543 
1544          return (index < ghost)
1545                 ? index*ned + edof // regular vertex
1546                 : ndofs + (index - ghost)*ned + edof; // ghost vertex
1547 
1548       case 1:
1549          ghost = pncmesh->GetNEdges();
1550          ned = fec->DofForGeometry(Geometry::SEGMENT);
1551 
1552          return (index < ghost)
1553                 ? nvdofs + index*ned + edof // regular edge
1554                 : ndofs + ngvdofs + (index - ghost)*ned + edof; // ghost edge
1555 
1556       default:
1557          ghost = pncmesh->GetNFaces();
1558          ned = fec->DofForGeometry(pncmesh->GetFaceGeometry(index));
1559 
1560          if (index < ghost) // regular face
1561          {
1562             return nvdofs + nedofs + FirstFaceDof(index) + edof;
1563          }
1564          else // ghost face
1565          {
1566             index -= ghost;
1567             int stride = fec->DofForGeometry(Geometry::SQUARE);
1568             return ndofs + ngvdofs + ngedofs + index*stride + edof;
1569          }
1570    }
1571 }
1572 
bisect(const int * array,int size,int value)1573 static int bisect(const int* array, int size, int value)
1574 {
1575    const int* end = array + size;
1576    const int* pos = std::lower_bound(array, end, value);
1577    MFEM_VERIFY(pos != end, "value not found");
1578    return pos - array;
1579 }
1580 
1581 /** Dissect a DOF number to obtain the entity type (0=vertex, 1=edge, 2=face),
1582  *  entity index and the DOF number within the entity.
1583  */
UnpackDof(int dof,int & entity,int & index,int & edof) const1584 void ParFiniteElementSpace::UnpackDof(int dof,
1585                                       int &entity, int &index, int &edof) const
1586 {
1587    MFEM_VERIFY(dof >= 0, "");
1588    if (dof < ndofs)
1589    {
1590       if (dof < nvdofs) // regular vertex
1591       {
1592          int nv = fec->DofForGeometry(Geometry::POINT);
1593          entity = 0, index = dof / nv, edof = dof % nv;
1594          return;
1595       }
1596       dof -= nvdofs;
1597       if (dof < nedofs) // regular edge
1598       {
1599          int ne = fec->DofForGeometry(Geometry::SEGMENT);
1600          entity = 1, index = dof / ne, edof = dof % ne;
1601          return;
1602       }
1603       dof -= nedofs;
1604       if (dof < nfdofs) // regular face
1605       {
1606          if (uni_fdof >= 0) // uniform faces
1607          {
1608             int nf = fec->DofForGeometry(pncmesh->GetFaceGeometry(0));
1609             index = dof / nf, edof = dof % nf;
1610          }
1611          else // mixed faces or var-order space
1612          {
1613             const Table &table = var_face_dofs;
1614             MFEM_ASSERT(table.Size(), "");
1615             int jpos = bisect(table.GetJ(), table.Size_of_connections(), dof);
1616             index = bisect(table.GetI(), table.Size(), jpos);
1617             edof = dof - table.GetRow(index)[0];
1618          }
1619          entity = 2;
1620          return;
1621       }
1622       MFEM_ABORT("Cannot unpack internal DOF");
1623    }
1624    else
1625    {
1626       dof -= ndofs;
1627       if (dof < ngvdofs) // ghost vertex
1628       {
1629          int nv = fec->DofForGeometry(Geometry::POINT);
1630          entity = 0, index = pncmesh->GetNVertices() + dof / nv, edof = dof % nv;
1631          return;
1632       }
1633       dof -= ngvdofs;
1634       if (dof < ngedofs) // ghost edge
1635       {
1636          int ne = fec->DofForGeometry(Geometry::SEGMENT);
1637          entity = 1, index = pncmesh->GetNEdges() + dof / ne, edof = dof % ne;
1638          return;
1639       }
1640       dof -= ngedofs;
1641       if (dof < ngfdofs) // ghost face
1642       {
1643          int stride = fec->DofForGeometry(Geometry::SQUARE);
1644          index = pncmesh->GetNFaces() + dof / stride, edof = dof % stride;
1645          entity = 2;
1646          return;
1647       }
1648       MFEM_ABORT("Out of range DOF.");
1649    }
1650 }
1651 
1652 /** Represents an element of the P matrix. The column number is global and
1653  *  corresponds to vector dimension 0. The other dimension columns are offset
1654  *  by 'stride'.
1655  */
1656 struct PMatrixElement
1657 {
1658    HYPRE_BigInt column;
1659    int stride;
1660    double value;
1661 
PMatrixElementmfem::PMatrixElement1662    PMatrixElement(HYPRE_BigInt col = 0, int str = 0, double val = 0)
1663       : column(col), stride(str), value(val) {}
1664 
operator <mfem::PMatrixElement1665    bool operator<(const PMatrixElement &other) const
1666    { return column < other.column; }
1667 
1668    typedef std::vector<PMatrixElement> List;
1669 };
1670 
1671 /** Represents one row of the P matrix, for the construction code below.
1672  *  The row is complete: diagonal and offdiagonal elements are not distinguished.
1673  */
1674 struct PMatrixRow
1675 {
1676    PMatrixElement::List elems;
1677 
1678    /// Add other row, times 'coef'.
AddRowmfem::PMatrixRow1679    void AddRow(const PMatrixRow &other, double coef)
1680    {
1681       elems.reserve(elems.size() + other.elems.size());
1682       for (unsigned i = 0; i < other.elems.size(); i++)
1683       {
1684          const PMatrixElement &oei = other.elems[i];
1685          elems.push_back(
1686             PMatrixElement(oei.column, oei.stride, coef * oei.value));
1687       }
1688    }
1689 
1690    /// Remove duplicate columns and sum their values.
Collapsemfem::PMatrixRow1691    void Collapse()
1692    {
1693       if (!elems.size()) { return; }
1694       std::sort(elems.begin(), elems.end());
1695 
1696       int j = 0;
1697       for (unsigned i = 1; i < elems.size(); i++)
1698       {
1699          if (elems[j].column == elems[i].column)
1700          {
1701             elems[j].value += elems[i].value;
1702          }
1703          else
1704          {
1705             elems[++j] = elems[i];
1706          }
1707       }
1708       elems.resize(j+1);
1709    }
1710 
writemfem::PMatrixRow1711    void write(std::ostream &os, double sign) const
1712    {
1713       bin_io::write<int>(os, elems.size());
1714       for (unsigned i = 0; i < elems.size(); i++)
1715       {
1716          const PMatrixElement &e = elems[i];
1717          bin_io::write<HYPRE_BigInt>(os, e.column);
1718          bin_io::write<int>(os, e.stride);
1719          bin_io::write<double>(os, e.value * sign);
1720       }
1721    }
1722 
readmfem::PMatrixRow1723    void read(std::istream &is, double sign)
1724    {
1725       elems.resize(bin_io::read<int>(is));
1726       for (unsigned i = 0; i < elems.size(); i++)
1727       {
1728          PMatrixElement &e = elems[i];
1729          e.column = bin_io::read<HYPRE_BigInt>(is);
1730          e.stride = bin_io::read<int>(is);
1731          e.value = bin_io::read<double>(is) * sign;
1732       }
1733    }
1734 };
1735 
1736 /** Represents a message to another processor containing P matrix rows.
1737  *  Used by ParFiniteElementSpace::ParallelConformingInterpolation.
1738  */
1739 class NeighborRowMessage : public VarMessage<314>
1740 {
1741 public:
1742    typedef NCMesh::MeshId MeshId;
1743    typedef ParNCMesh::GroupId GroupId;
1744 
1745    struct RowInfo
1746    {
1747       int entity, index, edof;
1748       GroupId group;
1749       PMatrixRow row;
1750 
RowInfomfem::NeighborRowMessage::RowInfo1751       RowInfo(int ent, int idx, int edof, GroupId grp, const PMatrixRow &row)
1752          : entity(ent), index(idx), edof(edof), group(grp), row(row) {}
1753 
RowInfomfem::NeighborRowMessage::RowInfo1754       RowInfo(int ent, int idx, int edof, GroupId grp)
1755          : entity(ent), index(idx), edof(edof), group(grp) {}
1756 
1757       typedef std::vector<RowInfo> List;
1758    };
1759 
NeighborRowMessage()1760    NeighborRowMessage() : pncmesh(NULL) {}
1761 
AddRow(int entity,int index,int edof,GroupId group,const PMatrixRow & row)1762    void AddRow(int entity, int index, int edof, GroupId group,
1763                const PMatrixRow &row)
1764    {
1765       rows.push_back(RowInfo(entity, index, edof, group, row));
1766    }
1767 
GetRows() const1768    const RowInfo::List& GetRows() const { return rows; }
1769 
SetNCMesh(ParNCMesh * pnc)1770    void SetNCMesh(ParNCMesh* pnc) { pncmesh = pnc; }
SetFEC(const FiniteElementCollection * fec)1771    void SetFEC(const FiniteElementCollection* fec) { this->fec = fec; }
1772 
1773    typedef std::map<int, NeighborRowMessage> Map;
1774 
1775 protected:
1776    RowInfo::List rows;
1777 
1778    ParNCMesh *pncmesh;
1779    const FiniteElementCollection* fec;
1780 
1781    virtual void Encode(int rank);
1782    virtual void Decode(int);
1783 };
1784 
1785 
Encode(int rank)1786 void NeighborRowMessage::Encode(int rank)
1787 {
1788    std::ostringstream stream;
1789 
1790    Array<MeshId> ent_ids[3];
1791    Array<GroupId> group_ids[3];
1792    Array<int> row_idx[3];
1793 
1794    // encode MeshIds and groups
1795    for (unsigned i = 0; i < rows.size(); i++)
1796    {
1797       const RowInfo &ri = rows[i];
1798       const MeshId &id = pncmesh->GetNCList(ri.entity).LookUp(ri.index);
1799       ent_ids[ri.entity].Append(id);
1800       row_idx[ri.entity].Append(i);
1801       group_ids[ri.entity].Append(ri.group);
1802    }
1803 
1804    Array<GroupId> all_group_ids;
1805    all_group_ids.Reserve(rows.size());
1806    for (int i = 0; i < 3; i++)
1807    {
1808       all_group_ids.Append(group_ids[i]);
1809    }
1810 
1811    pncmesh->AdjustMeshIds(ent_ids, rank);
1812    pncmesh->EncodeMeshIds(stream, ent_ids);
1813    pncmesh->EncodeGroups(stream, all_group_ids);
1814 
1815    // write all rows to the stream
1816    for (int ent = 0; ent < 3; ent++)
1817    {
1818       const Array<MeshId> &ids = ent_ids[ent];
1819       for (int i = 0; i < ids.Size(); i++)
1820       {
1821          const MeshId &id = ids[i];
1822          const RowInfo &ri = rows[row_idx[ent][i]];
1823          MFEM_ASSERT(ent == ri.entity, "");
1824 
1825 #ifdef MFEM_DEBUG_PMATRIX
1826          mfem::out << "Rank " << pncmesh->MyRank << " sending to " << rank
1827                    << ": ent " << ri.entity << ", index " << ri.index
1828                    << ", edof " << ri.edof << " (id " << id.element << "/"
1829                    << int(id.local) << ")" << std::endl;
1830 #endif
1831 
1832          // handle orientation and sign change
1833          int edof = ri.edof;
1834          double s = 1.0;
1835          if (ent == 1)
1836          {
1837             int eo = pncmesh->GetEdgeNCOrientation(id);
1838             const int* ind = fec->DofOrderForOrientation(Geometry::SEGMENT, eo);
1839             if ((edof = ind[edof]) < 0)
1840             {
1841                edof = -1 - edof;
1842                s = -1;
1843             }
1844          }
1845 
1846          bin_io::write<int>(stream, edof);
1847          ri.row.write(stream, s);
1848       }
1849    }
1850 
1851    rows.clear();
1852    stream.str().swap(data);
1853 }
1854 
Decode(int rank)1855 void NeighborRowMessage::Decode(int rank)
1856 {
1857    std::istringstream stream(data);
1858 
1859    Array<MeshId> ent_ids[3];
1860    Array<GroupId> group_ids;
1861 
1862    // decode vertex/edge/face IDs and groups
1863    pncmesh->DecodeMeshIds(stream, ent_ids);
1864    pncmesh->DecodeGroups(stream, group_ids);
1865 
1866    int nrows = ent_ids[0].Size() + ent_ids[1].Size() + ent_ids[2].Size();
1867    MFEM_ASSERT(nrows == group_ids.Size(), "");
1868 
1869    rows.clear();
1870    rows.reserve(nrows);
1871 
1872    // read rows
1873    for (int ent = 0, gi = 0; ent < 3; ent++)
1874    {
1875       const Array<MeshId> &ids = ent_ids[ent];
1876       for (int i = 0; i < ids.Size(); i++)
1877       {
1878          const MeshId &id = ids[i];
1879          int edof = bin_io::read<int>(stream);
1880 
1881          // handle orientation and sign change
1882          const int *ind = NULL;
1883          if (ent == 1)
1884          {
1885             int eo = pncmesh->GetEdgeNCOrientation(id);
1886             ind = fec->DofOrderForOrientation(Geometry::SEGMENT, eo);
1887          }
1888          else if (ent == 2)
1889          {
1890             Geometry::Type geom = pncmesh->GetFaceGeometry(id.index);
1891             int fo = pncmesh->GetFaceOrientation(id.index);
1892             ind = fec->DofOrderForOrientation(geom, fo);
1893          }
1894 
1895          double s = 1.0;
1896          if (ind && (edof = ind[edof]) < 0)
1897          {
1898             edof = -1 - edof;
1899             s = -1.0;
1900          }
1901 
1902          rows.push_back(RowInfo(ent, id.index, edof, group_ids[gi++]));
1903          rows.back().row.read(stream, s);
1904 
1905 #ifdef MFEM_DEBUG_PMATRIX
1906          mfem::out << "Rank " << pncmesh->MyRank << " receiving from " << rank
1907                    << ": ent " << rows.back().entity << ", index "
1908                    << rows.back().index << ", edof " << rows.back().edof
1909                    << std::endl;
1910 #endif
1911       }
1912    }
1913 }
1914 
1915 void
ScheduleSendRow(const PMatrixRow & row,int dof,GroupId group_id,NeighborRowMessage::Map & send_msg) const1916 ParFiniteElementSpace::ScheduleSendRow(const PMatrixRow &row, int dof,
1917                                        GroupId group_id,
1918                                        NeighborRowMessage::Map &send_msg) const
1919 {
1920    int ent, idx, edof;
1921    UnpackDof(dof, ent, idx, edof);
1922 
1923    const ParNCMesh::CommGroup &group = pncmesh->GetGroup(group_id);
1924    for (unsigned i = 0; i < group.size(); i++)
1925    {
1926       int rank = group[i];
1927       if (rank != MyRank)
1928       {
1929          NeighborRowMessage &msg = send_msg[rank];
1930          msg.AddRow(ent, idx, edof, group_id, row);
1931          msg.SetNCMesh(pncmesh);
1932          msg.SetFEC(fec);
1933 #ifdef MFEM_PMATRIX_STATS
1934          n_rows_sent++;
1935 #endif
1936       }
1937    }
1938 }
1939 
ForwardRow(const PMatrixRow & row,int dof,GroupId group_sent_id,GroupId group_id,NeighborRowMessage::Map & send_msg) const1940 void ParFiniteElementSpace::ForwardRow(const PMatrixRow &row, int dof,
1941                                        GroupId group_sent_id, GroupId group_id,
1942                                        NeighborRowMessage::Map &send_msg) const
1943 {
1944    int ent, idx, edof;
1945    UnpackDof(dof, ent, idx, edof);
1946 
1947    const ParNCMesh::CommGroup &group = pncmesh->GetGroup(group_id);
1948    for (unsigned i = 0; i < group.size(); i++)
1949    {
1950       int rank = group[i];
1951       if (rank != MyRank && !pncmesh->GroupContains(group_sent_id, rank))
1952       {
1953          NeighborRowMessage &msg = send_msg[rank];
1954          GroupId invalid = -1; // to prevent forwarding again
1955          msg.AddRow(ent, idx, edof, invalid, row);
1956          msg.SetNCMesh(pncmesh);
1957          msg.SetFEC(fec);
1958 #ifdef MFEM_PMATRIX_STATS
1959          n_rows_fwd++;
1960 #endif
1961 #ifdef MFEM_DEBUG_PMATRIX
1962          mfem::out << "Rank " << pncmesh->GetMyRank() << " forwarding to "
1963                    << rank << ": ent " << ent << ", index" << idx
1964                    << ", edof " << edof << std::endl;
1965 #endif
1966       }
1967    }
1968 }
1969 
1970 #ifdef MFEM_DEBUG_PMATRIX
1971 void ParFiniteElementSpace
DebugDumpDOFs(std::ostream & os,const SparseMatrix & deps,const Array<GroupId> & dof_group,const Array<GroupId> & dof_owner,const Array<bool> & finalized) const1972 ::DebugDumpDOFs(std::ostream &os,
1973                 const SparseMatrix &deps,
1974                 const Array<GroupId> &dof_group,
1975                 const Array<GroupId> &dof_owner,
1976                 const Array<bool> &finalized) const
1977 {
1978    for (int i = 0; i < dof_group.Size(); i++)
1979    {
1980       os << i << ": ";
1981       if (i < (nvdofs + nedofs + nfdofs) || i >= ndofs)
1982       {
1983          int ent, idx, edof;
1984          UnpackDof(i, ent, idx, edof);
1985 
1986          os << edof << " @ ";
1987          if (i > ndofs) { os << "ghost "; }
1988          switch (ent)
1989          {
1990             case 0: os << "vertex "; break;
1991             case 1: os << "edge "; break;
1992             default: os << "face "; break;
1993          }
1994          os << idx << "; ";
1995 
1996          if (i < deps.Height() && deps.RowSize(i))
1997          {
1998             os << "depends on ";
1999             for (int j = 0; j < deps.RowSize(i); j++)
2000             {
2001                os << deps.GetRowColumns(i)[j] << " ("
2002                   << deps.GetRowEntries(i)[j] << ")";
2003                if (j < deps.RowSize(i)-1) { os << ", "; }
2004             }
2005             os << "; ";
2006          }
2007          else
2008          {
2009             os << "no deps; ";
2010          }
2011 
2012          os << "group " << dof_group[i] << " (";
2013          const ParNCMesh::CommGroup &g = pncmesh->GetGroup(dof_group[i]);
2014          for (unsigned j = 0; j < g.size(); j++)
2015          {
2016             if (j) { os << ", "; }
2017             os << g[j];
2018          }
2019 
2020          os << "), owner " << dof_owner[i] << " (rank "
2021             << pncmesh->GetGroup(dof_owner[i])[0] << "); "
2022             << (finalized[i] ? "finalized" : "NOT finalized");
2023       }
2024       else
2025       {
2026          os << "internal";
2027       }
2028       os << "\n";
2029    }
2030 }
2031 #endif
2032 
2033 int ParFiniteElementSpace
BuildParallelConformingInterpolation(HypreParMatrix ** P,SparseMatrix ** R,Array<HYPRE_BigInt> & dof_offs,Array<HYPRE_BigInt> & tdof_offs,Array<int> * dof_tdof,bool partial) const2034 ::BuildParallelConformingInterpolation(HypreParMatrix **P, SparseMatrix **R,
2035                                        Array<HYPRE_BigInt> &dof_offs,
2036                                        Array<HYPRE_BigInt> &tdof_offs,
2037                                        Array<int> *dof_tdof,
2038                                        bool partial) const
2039 {
2040    bool dg = (nvdofs == 0 && nedofs == 0 && nfdofs == 0);
2041 
2042 #ifdef MFEM_PMATRIX_STATS
2043    n_msgs_sent = n_msgs_recv = 0;
2044    n_rows_sent = n_rows_recv = n_rows_fwd = 0;
2045 #endif
2046 
2047    // *** STEP 1: build master-slave dependency lists ***
2048 
2049    int total_dofs = ndofs + ngdofs;
2050    SparseMatrix deps(ndofs, total_dofs);
2051 
2052    if (!dg && !partial)
2053    {
2054       Array<int> master_dofs, slave_dofs;
2055 
2056       // loop through *all* master edges/faces, constrain their slaves
2057       for (int entity = 0; entity <= 2; entity++)
2058       {
2059          const NCMesh::NCList &list = pncmesh->GetNCList(entity);
2060          if (!list.masters.Size()) { continue; }
2061 
2062          IsoparametricTransformation T;
2063          DenseMatrix I;
2064 
2065          // process masters that we own or that affect our edges/faces
2066          for (int mi = 0; mi < list.masters.Size(); mi++)
2067          {
2068             const NCMesh::Master &mf = list.masters[mi];
2069 
2070             // get master DOFs
2071             if (pncmesh->IsGhost(entity, mf.index))
2072             {
2073                GetGhostDofs(entity, mf, master_dofs);
2074             }
2075             else
2076             {
2077                GetEntityDofs(entity, mf.index, master_dofs, mf.Geom());
2078             }
2079 
2080             if (!master_dofs.Size()) { continue; }
2081 
2082             const FiniteElement* fe = fec->FiniteElementForGeometry(mf.Geom());
2083             if (!fe) { continue; }
2084 
2085             switch (mf.Geom())
2086             {
2087                case Geometry::SQUARE:   T.SetFE(&QuadrilateralFE); break;
2088                case Geometry::TRIANGLE: T.SetFE(&TriangleFE); break;
2089                case Geometry::SEGMENT:  T.SetFE(&SegmentFE); break;
2090                default: MFEM_ABORT("unsupported geometry");
2091             }
2092 
2093             // constrain slaves that exist in our mesh
2094             for (int si = mf.slaves_begin; si < mf.slaves_end; si++)
2095             {
2096                const NCMesh::Slave &sf = list.slaves[si];
2097                if (pncmesh->IsGhost(entity, sf.index)) { continue; }
2098 
2099                const int variant = 0; // TODO parallel var-order
2100                GetEntityDofs(entity, sf.index, slave_dofs, mf.Geom(), variant);
2101                if (!slave_dofs.Size()) { continue; }
2102 
2103                list.OrientedPointMatrix(sf, T.GetPointMat());
2104                fe->GetLocalInterpolation(T, I);
2105 
2106                // make each slave DOF dependent on all master DOFs
2107                AddDependencies(deps, master_dofs, slave_dofs, I);
2108             }
2109          }
2110       }
2111 
2112       deps.Finalize();
2113    }
2114 
2115    // *** STEP 2: initialize group and owner ID for each DOF ***
2116 
2117    Array<GroupId> dof_group(total_dofs);
2118    Array<GroupId> dof_owner(total_dofs);
2119    dof_group = 0;
2120    dof_owner = 0;
2121 
2122    if (!dg)
2123    {
2124       Array<int> dofs;
2125 
2126       // initialize dof_group[], dof_owner[]
2127       for (int entity = 0; entity <= 2; entity++)
2128       {
2129          const NCMesh::NCList &list = pncmesh->GetNCList(entity);
2130 
2131          int lsize[3] =
2132          { list.conforming.Size(), list.masters.Size(), list.slaves.Size() };
2133 
2134          for (int l = 0; l < 3; l++)
2135          {
2136             for (int i = 0; i < lsize[l]; i++)
2137             {
2138                const MeshId &id =
2139                   (l == 0) ? list.conforming[i] :
2140                   (l == 1) ? (const MeshId&) list.masters[i]
2141                   /*    */ : (const MeshId&) list.slaves[i];
2142 
2143                if (id.index < 0) { continue; }
2144 
2145                GroupId owner = pncmesh->GetEntityOwnerId(entity, id.index);
2146                GroupId group = pncmesh->GetEntityGroupId(entity, id.index);
2147 
2148                GetBareDofs(entity, id.index, dofs);
2149 
2150                for (int j = 0; j < dofs.Size(); j++)
2151                {
2152                   int dof = dofs[j];
2153                   dof_owner[dof] = owner;
2154                   dof_group[dof] = group;
2155                }
2156             }
2157          }
2158       }
2159    }
2160 
2161    // *** STEP 3: count true DOFs and calculate P row/column partitions ***
2162 
2163    Array<bool> finalized(total_dofs);
2164    finalized = false;
2165 
2166    // DOFs that stayed independent and are ours are true DOFs
2167    int num_true_dofs = 0;
2168    for (int i = 0; i < ndofs; i++)
2169    {
2170       if (dof_owner[i] == 0 && deps.RowSize(i) == 0)
2171       {
2172          num_true_dofs++;
2173          finalized[i] = true;
2174       }
2175    }
2176 
2177    // calculate global offsets
2178    HYPRE_BigInt loc_sizes[2] = { ndofs*vdim, num_true_dofs*vdim };
2179    Array<HYPRE_BigInt>* offsets[2] = { &dof_offs, &tdof_offs };
2180    pmesh->GenerateOffsets(2, loc_sizes, offsets); // calls MPI_Scan, MPI_Bcast
2181 
2182    HYPRE_BigInt my_tdof_offset =
2183       tdof_offs[HYPRE_AssumedPartitionCheck() ? 0 : MyRank];
2184 
2185    if (R)
2186    {
2187       // initialize the restriction matrix (also parallel but block-diagonal)
2188       *R = new SparseMatrix(num_true_dofs*vdim, ndofs*vdim);
2189    }
2190    if (dof_tdof)
2191    {
2192       dof_tdof->SetSize(ndofs*vdim);
2193       *dof_tdof = -1;
2194    }
2195 
2196    std::vector<PMatrixRow> pmatrix(total_dofs);
2197 
2198    bool bynodes = (ordering == Ordering::byNODES);
2199    int vdim_factor = bynodes ? 1 : vdim;
2200    int dof_stride = bynodes ? ndofs : 1;
2201    int tdof_stride = bynodes ? num_true_dofs : 1;
2202 
2203    // big container for all messages we send (the list is for iterations)
2204    std::list<NeighborRowMessage::Map> send_msg;
2205    send_msg.push_back(NeighborRowMessage::Map());
2206 
2207    // put identity in P and R for true DOFs, set ldof_ltdof
2208    for (int dof = 0, tdof = 0; dof < ndofs; dof++)
2209    {
2210       if (finalized[dof])
2211       {
2212          pmatrix[dof].elems.push_back(
2213             PMatrixElement(my_tdof_offset + vdim_factor*tdof, tdof_stride, 1.));
2214 
2215          // prepare messages to neighbors with identity rows
2216          if (dof_group[dof] != 0)
2217          {
2218             ScheduleSendRow(pmatrix[dof], dof, dof_group[dof], send_msg.back());
2219          }
2220 
2221          for (int vd = 0; vd < vdim; vd++)
2222          {
2223             int vdof = dof*vdim_factor + vd*dof_stride;
2224             int vtdof = tdof*vdim_factor + vd*tdof_stride;
2225 
2226             if (R) { (*R)->Add(vtdof, vdof, 1.0); }
2227             if (dof_tdof) { (*dof_tdof)[vdof] = vtdof; }
2228          }
2229          tdof++;
2230       }
2231    }
2232 
2233    // send identity rows
2234    NeighborRowMessage::IsendAll(send_msg.back(), MyComm);
2235 #ifdef MFEM_PMATRIX_STATS
2236    n_msgs_sent += send_msg.back().size();
2237 #endif
2238 
2239    if (R) { (*R)->Finalize(); }
2240 
2241    // *** STEP 4: main loop ***
2242 
2243    // a single instance (recv_msg) is reused for all incoming messages
2244    NeighborRowMessage recv_msg;
2245    recv_msg.SetNCMesh(pncmesh);
2246    recv_msg.SetFEC(fec);
2247 
2248    int num_finalized = num_true_dofs;
2249    PMatrixRow buffer;
2250    buffer.elems.reserve(1024);
2251 
2252    while (num_finalized < ndofs)
2253    {
2254       // prepare a new round of send buffers
2255       if (send_msg.back().size())
2256       {
2257          send_msg.push_back(NeighborRowMessage::Map());
2258       }
2259 
2260       // check for incoming messages, receive PMatrixRows
2261       int rank, size;
2262       while (NeighborRowMessage::IProbe(rank, size, MyComm))
2263       {
2264          recv_msg.Recv(rank, size, MyComm);
2265 #ifdef MFEM_PMATRIX_STATS
2266          n_msgs_recv++;
2267          n_rows_recv += recv_msg.GetRows().size();
2268 #endif
2269 
2270          const NeighborRowMessage::RowInfo::List &rows = recv_msg.GetRows();
2271          for (unsigned i = 0; i < rows.size(); i++)
2272          {
2273             const NeighborRowMessage::RowInfo &ri = rows[i];
2274             int dof = PackDof(ri.entity, ri.index, ri.edof);
2275             pmatrix[dof] = ri.row;
2276 
2277             if (dof < ndofs && !finalized[dof]) { num_finalized++; }
2278             finalized[dof] = true;
2279 
2280             if (ri.group >= 0 && dof_group[dof] != ri.group)
2281             {
2282                // the sender didn't see the complete group, forward the message
2283                ForwardRow(ri.row, dof, ri.group, dof_group[dof], send_msg.back());
2284             }
2285          }
2286       }
2287 
2288       // finalize all rows that can currently be finalized
2289       bool done = false;
2290       while (!done)
2291       {
2292          done = true;
2293          for (int dof = 0; dof < ndofs; dof++)
2294          {
2295             if (finalized[dof]) { continue; }
2296 
2297             bool owned = (dof_owner[dof] == 0);
2298             bool shared = (dof_group[dof] != 0);
2299 
2300             if (owned && DofFinalizable(dof, finalized, deps))
2301             {
2302                const int* dep_col = deps.GetRowColumns(dof);
2303                const double* dep_coef = deps.GetRowEntries(dof);
2304                int num_dep = deps.RowSize(dof);
2305 
2306                // form linear combination of rows
2307                buffer.elems.clear();
2308                for (int j = 0; j < num_dep; j++)
2309                {
2310                   buffer.AddRow(pmatrix[dep_col[j]], dep_coef[j]);
2311                }
2312                buffer.Collapse();
2313                pmatrix[dof] = buffer;
2314 
2315                finalized[dof] = true;
2316                num_finalized++;
2317                done = false;
2318 
2319                // send row to neighbors who need it
2320                if (shared)
2321                {
2322                   ScheduleSendRow(pmatrix[dof], dof, dof_group[dof],
2323                                   send_msg.back());
2324                }
2325             }
2326          }
2327       }
2328 
2329 #ifdef MFEM_DEBUG_PMATRIX
2330       /*static int dump = 0;
2331       if (dump < 10)
2332       {
2333          char fname[100];
2334          sprintf(fname, "dofs%02d.txt", MyRank);
2335          std::ofstream f(fname);
2336          DebugDumpDOFs(f, deps, dof_group, dof_owner, finalized);
2337          dump++;
2338       }*/
2339 #endif
2340 
2341       // send current batch of messages
2342       NeighborRowMessage::IsendAll(send_msg.back(), MyComm);
2343 #ifdef MFEM_PMATRIX_STATS
2344       n_msgs_sent += send_msg.back().size();
2345 #endif
2346    }
2347 
2348    if (P)
2349    {
2350       *P = MakeVDimHypreMatrix(pmatrix, ndofs, num_true_dofs,
2351                                dof_offs, tdof_offs);
2352    }
2353 
2354    // clean up possible remaining messages in the queue to avoid receiving
2355    // them erroneously in the next run
2356    int rank, size;
2357    while (NeighborRowMessage::IProbe(rank, size, MyComm))
2358    {
2359       recv_msg.RecvDrop(rank, size, MyComm);
2360    }
2361 
2362    // make sure we can discard all send buffers
2363    for (std::list<NeighborRowMessage::Map>::iterator
2364         it = send_msg.begin(); it != send_msg.end(); ++it)
2365    {
2366       NeighborRowMessage::WaitAllSent(*it);
2367    }
2368 
2369 #ifdef MFEM_PMATRIX_STATS
2370    int n_rounds = send_msg.size();
2371    int glob_rounds, glob_msgs_sent, glob_msgs_recv;
2372    int glob_rows_sent, glob_rows_recv, glob_rows_fwd;
2373 
2374    MPI_Reduce(&n_rounds,    &glob_rounds,    1, MPI_INT, MPI_SUM, 0, MyComm);
2375    MPI_Reduce(&n_msgs_sent, &glob_msgs_sent, 1, MPI_INT, MPI_SUM, 0, MyComm);
2376    MPI_Reduce(&n_msgs_recv, &glob_msgs_recv, 1, MPI_INT, MPI_SUM, 0, MyComm);
2377    MPI_Reduce(&n_rows_sent, &glob_rows_sent, 1, MPI_INT, MPI_SUM, 0, MyComm);
2378    MPI_Reduce(&n_rows_recv, &glob_rows_recv, 1, MPI_INT, MPI_SUM, 0, MyComm);
2379    MPI_Reduce(&n_rows_fwd,  &glob_rows_fwd,  1, MPI_INT, MPI_SUM, 0, MyComm);
2380 
2381    if (MyRank == 0)
2382    {
2383       mfem::out << "P matrix stats (avg per rank): "
2384                 << double(glob_rounds)/NRanks << " rounds, "
2385                 << double(glob_msgs_sent)/NRanks << " msgs sent, "
2386                 << double(glob_msgs_recv)/NRanks << " msgs recv, "
2387                 << double(glob_rows_sent)/NRanks << " rows sent, "
2388                 << double(glob_rows_recv)/NRanks << " rows recv, "
2389                 << double(glob_rows_fwd)/NRanks << " rows forwarded."
2390                 << std::endl;
2391    }
2392 #endif
2393 
2394    return num_true_dofs*vdim;
2395 }
2396 
2397 
2398 HypreParMatrix* ParFiniteElementSpace
MakeVDimHypreMatrix(const std::vector<PMatrixRow> & rows,int local_rows,int local_cols,Array<HYPRE_BigInt> & row_starts,Array<HYPRE_BigInt> & col_starts) const2399 ::MakeVDimHypreMatrix(const std::vector<PMatrixRow> &rows,
2400                       int local_rows, int local_cols,
2401                       Array<HYPRE_BigInt> &row_starts,
2402                       Array<HYPRE_BigInt> &col_starts) const
2403 {
2404    bool assumed = HYPRE_AssumedPartitionCheck();
2405    bool bynodes = (ordering == Ordering::byNODES);
2406 
2407    HYPRE_BigInt first_col = col_starts[assumed ? 0 : MyRank];
2408    HYPRE_BigInt next_col = col_starts[assumed ? 1 : MyRank+1];
2409 
2410    // count nonzeros in diagonal/offdiagonal parts
2411    HYPRE_Int nnz_diag = 0, nnz_offd = 0;
2412    std::map<HYPRE_BigInt, int> col_map;
2413    for (int i = 0; i < local_rows; i++)
2414    {
2415       for (unsigned j = 0; j < rows[i].elems.size(); j++)
2416       {
2417          const PMatrixElement &elem = rows[i].elems[j];
2418          HYPRE_BigInt col = elem.column;
2419          if (col >= first_col && col < next_col)
2420          {
2421             nnz_diag += vdim;
2422          }
2423          else
2424          {
2425             nnz_offd += vdim;
2426             for (int vd = 0; vd < vdim; vd++)
2427             {
2428                col_map[col] = -1;
2429                col += elem.stride;
2430             }
2431          }
2432       }
2433    }
2434 
2435    // create offd column mapping
2436    HYPRE_BigInt *cmap = Memory<HYPRE_BigInt>(col_map.size());
2437    int offd_col = 0;
2438    for (auto it = col_map.begin(); it != col_map.end(); ++it)
2439    {
2440       cmap[offd_col] = it->first;
2441       it->second = offd_col++;
2442    }
2443 
2444    HYPRE_Int *I_diag = Memory<HYPRE_Int>(vdim*local_rows + 1);
2445    HYPRE_Int *I_offd = Memory<HYPRE_Int>(vdim*local_rows + 1);
2446 
2447    HYPRE_Int *J_diag = Memory<HYPRE_Int>(nnz_diag);
2448    HYPRE_Int *J_offd = Memory<HYPRE_Int>(nnz_offd);
2449 
2450    double *A_diag = Memory<double>(nnz_diag);
2451    double *A_offd = Memory<double>(nnz_offd);
2452 
2453    int vdim1 = bynodes ? vdim : 1;
2454    int vdim2 = bynodes ? 1 : vdim;
2455    int vdim_offset = bynodes ? local_cols : 1;
2456 
2457    // copy the diag/offd elements
2458    nnz_diag = nnz_offd = 0;
2459    int vrow = 0;
2460    for (int vd1 = 0; vd1 < vdim1; vd1++)
2461    {
2462       for (int i = 0; i < local_rows; i++)
2463       {
2464          for (int vd2 = 0; vd2 < vdim2; vd2++)
2465          {
2466             I_diag[vrow] = nnz_diag;
2467             I_offd[vrow++] = nnz_offd;
2468 
2469             int vd = bynodes ? vd1 : vd2;
2470             for (unsigned j = 0; j < rows[i].elems.size(); j++)
2471             {
2472                const PMatrixElement &elem = rows[i].elems[j];
2473                if (elem.column >= first_col && elem.column < next_col)
2474                {
2475                   J_diag[nnz_diag] = elem.column + vd*vdim_offset - first_col;
2476                   A_diag[nnz_diag++] = elem.value;
2477                }
2478                else
2479                {
2480                   J_offd[nnz_offd] = col_map[elem.column + vd*elem.stride];
2481                   A_offd[nnz_offd++] = elem.value;
2482                }
2483             }
2484          }
2485       }
2486    }
2487    MFEM_ASSERT(vrow == vdim*local_rows, "");
2488    I_diag[vrow] = nnz_diag;
2489    I_offd[vrow] = nnz_offd;
2490 
2491    return new HypreParMatrix(MyComm,
2492                              row_starts.Last(), col_starts.Last(),
2493                              row_starts.GetData(), col_starts.GetData(),
2494                              I_diag, J_diag, A_diag,
2495                              I_offd, J_offd, A_offd,
2496                              col_map.size(), cmap);
2497 }
2498 
2499 template <typename int_type>
make_i_array(int nrows)2500 static int_type* make_i_array(int nrows)
2501 {
2502    int_type *I = Memory<int_type>(nrows+1);
2503    for (int i = 0; i <= nrows; i++) { I[i] = -1; }
2504    return I;
2505 }
2506 
2507 template <typename int_type>
make_j_array(int_type * I,int nrows)2508 static int_type* make_j_array(int_type* I, int nrows)
2509 {
2510    int nnz = 0;
2511    for (int i = 0; i < nrows; i++)
2512    {
2513       if (I[i] >= 0) { nnz++; }
2514    }
2515    int_type *J = Memory<int_type>(nnz);
2516 
2517    I[nrows] = -1;
2518    for (int i = 0, k = 0; i <= nrows; i++)
2519    {
2520       int_type col = I[i];
2521       I[i] = k;
2522       if (col >= 0) { J[k++] = col; }
2523    }
2524    return J;
2525 }
2526 
2527 HypreParMatrix*
RebalanceMatrix(int old_ndofs,const Table * old_elem_dof)2528 ParFiniteElementSpace::RebalanceMatrix(int old_ndofs,
2529                                        const Table* old_elem_dof)
2530 {
2531    MFEM_VERIFY(Nonconforming(), "Only supported for nonconforming meshes.");
2532    MFEM_VERIFY(old_dof_offsets.Size(), "ParFiniteElementSpace::Update needs to "
2533                "be called before ParFiniteElementSpace::RebalanceMatrix");
2534 
2535    HYPRE_BigInt old_offset = HYPRE_AssumedPartitionCheck()
2536                              ? old_dof_offsets[0] : old_dof_offsets[MyRank];
2537 
2538    // send old DOFs of elements we used to own
2539    ParNCMesh* pncmesh = pmesh->pncmesh;
2540    pncmesh->SendRebalanceDofs(old_ndofs, *old_elem_dof, old_offset, this);
2541 
2542    Array<int> dofs;
2543    int vsize = GetVSize();
2544 
2545    const Array<int> &old_index = pncmesh->GetRebalanceOldIndex();
2546    MFEM_VERIFY(old_index.Size() == pmesh->GetNE(),
2547                "Mesh::Rebalance was not called before "
2548                "ParFiniteElementSpace::RebalanceMatrix");
2549 
2550    // prepare the local (diagonal) part of the matrix
2551    HYPRE_Int* i_diag = make_i_array<HYPRE_Int>(vsize);
2552    for (int i = 0; i < pmesh->GetNE(); i++)
2553    {
2554       if (old_index[i] >= 0) // we had this element before
2555       {
2556          const int* old_dofs = old_elem_dof->GetRow(old_index[i]);
2557          GetElementDofs(i, dofs);
2558 
2559          for (int vd = 0; vd < vdim; vd++)
2560          {
2561             for (int j = 0; j < dofs.Size(); j++)
2562             {
2563                int row = DofToVDof(dofs[j], vd);
2564                if (row < 0) { row = -1 - row; }
2565 
2566                int col = DofToVDof(old_dofs[j], vd, old_ndofs);
2567                if (col < 0) { col = -1 - col; }
2568 
2569                i_diag[row] = col;
2570             }
2571          }
2572       }
2573    }
2574    HYPRE_Int* j_diag = make_j_array(i_diag, vsize);
2575 
2576    // receive old DOFs for elements we obtained from others in Rebalance
2577    Array<int> new_elements;
2578    Array<long> old_remote_dofs;
2579    pncmesh->RecvRebalanceDofs(new_elements, old_remote_dofs);
2580 
2581    // create the offdiagonal part of the matrix
2582    HYPRE_BigInt* i_offd = make_i_array<HYPRE_BigInt>(vsize);
2583    for (int i = 0, pos = 0; i < new_elements.Size(); i++)
2584    {
2585       GetElementDofs(new_elements[i], dofs);
2586       const long* old_dofs = &old_remote_dofs[pos];
2587       pos += dofs.Size() * vdim;
2588 
2589       for (int vd = 0; vd < vdim; vd++)
2590       {
2591          for (int j = 0; j < dofs.Size(); j++)
2592          {
2593             int row = DofToVDof(dofs[j], vd);
2594             if (row < 0) { row = -1 - row; }
2595 
2596             if (i_diag[row] == i_diag[row+1]) // diag row empty?
2597             {
2598                i_offd[row] = old_dofs[j + vd * dofs.Size()];
2599             }
2600          }
2601       }
2602    }
2603    HYPRE_BigInt* j_offd = make_j_array(i_offd, vsize);
2604 
2605 #ifndef HYPRE_MIXEDINT
2606    HYPRE_Int *i_offd_hi = i_offd;
2607 #else
2608    // Copy of i_offd array as array of HYPRE_Int
2609    HYPRE_Int *i_offd_hi = Memory<HYPRE_Int>(vsize + 1);
2610    std::copy(i_offd, i_offd + vsize + 1, i_offd_hi);
2611    Memory<HYPRE_BigInt>(i_offd, vsize + 1, true).Delete();
2612 #endif
2613 
2614    // create the offd column map
2615    int offd_cols = i_offd_hi[vsize];
2616    Array<Pair<HYPRE_BigInt, int> > cmap_offd(offd_cols);
2617    for (int i = 0; i < offd_cols; i++)
2618    {
2619       cmap_offd[i].one = j_offd[i];
2620       cmap_offd[i].two = i;
2621    }
2622 
2623 #ifndef HYPRE_MIXEDINT
2624    HYPRE_Int *j_offd_hi = j_offd;
2625 #else
2626    HYPRE_Int *j_offd_hi = Memory<HYPRE_Int>(offd_cols);
2627    Memory<HYPRE_BigInt>(j_offd, offd_cols, true).Delete();
2628 #endif
2629 
2630    SortPairs<HYPRE_BigInt, int>(cmap_offd, offd_cols);
2631 
2632    HYPRE_BigInt* cmap = Memory<HYPRE_BigInt>(offd_cols);
2633    for (int i = 0; i < offd_cols; i++)
2634    {
2635       cmap[i] = cmap_offd[i].one;
2636       j_offd_hi[cmap_offd[i].two] = i;
2637    }
2638 
2639    HypreParMatrix *M;
2640    M = new HypreParMatrix(MyComm, MyRank, NRanks, dof_offsets, old_dof_offsets,
2641                           i_diag, j_diag, i_offd_hi, j_offd_hi, cmap, offd_cols);
2642    return M;
2643 }
2644 
2645 
2646 struct DerefDofMessage
2647 {
2648    std::vector<HYPRE_BigInt> dofs;
2649    MPI_Request request;
2650 };
2651 
2652 HypreParMatrix*
ParallelDerefinementMatrix(int old_ndofs,const Table * old_elem_dof)2653 ParFiniteElementSpace::ParallelDerefinementMatrix(int old_ndofs,
2654                                                   const Table* old_elem_dof)
2655 {
2656    int nrk = HYPRE_AssumedPartitionCheck() ? 2 : NRanks;
2657 
2658    MFEM_VERIFY(Nonconforming(), "Not implemented for conforming meshes.");
2659    MFEM_VERIFY(old_dof_offsets[nrk], "Missing previous (finer) space.");
2660 
2661 #if 0 // check no longer seems to work with NC tet refinement
2662    MFEM_VERIFY(dof_offsets[nrk] <= old_dof_offsets[nrk],
2663                "Previous space is not finer.");
2664 #endif
2665 
2666    // Note to the reader: please make sure you first read
2667    // FiniteElementSpace::RefinementMatrix, then
2668    // FiniteElementSpace::DerefinementMatrix, and only then this function.
2669    // You have been warned! :-)
2670 
2671    Mesh::GeometryList elem_geoms(*mesh);
2672 
2673    Array<int> dofs, old_dofs, old_vdofs;
2674    Vector row;
2675 
2676    ParNCMesh* pncmesh = pmesh->pncmesh;
2677 
2678    int ldof[Geometry::NumGeom];
2679    for (int i = 0; i < Geometry::NumGeom; i++)
2680    {
2681       ldof[i] = 0;
2682    }
2683    for (int i = 0; i < elem_geoms.Size(); i++)
2684    {
2685       Geometry::Type geom = elem_geoms[i];
2686       ldof[geom] = fec->FiniteElementForGeometry(geom)->GetDof();
2687    }
2688 
2689    const CoarseFineTransformations &dtrans = pncmesh->GetDerefinementTransforms();
2690    const Array<int> &old_ranks = pncmesh->GetDerefineOldRanks();
2691 
2692    std::map<int, DerefDofMessage> messages;
2693 
2694    HYPRE_BigInt old_offset = HYPRE_AssumedPartitionCheck()
2695                              ? old_dof_offsets[0] : old_dof_offsets[MyRank];
2696 
2697    // communicate DOFs for derefinements that straddle processor boundaries,
2698    // note that this is infrequent due to the way elements are ordered
2699    for (int k = 0; k < dtrans.embeddings.Size(); k++)
2700    {
2701       const Embedding &emb = dtrans.embeddings[k];
2702 
2703       int fine_rank = old_ranks[k];
2704       int coarse_rank = (emb.parent < 0) ? (-1 - emb.parent)
2705                         : pncmesh->ElementRank(emb.parent);
2706 
2707       if (coarse_rank != MyRank && fine_rank == MyRank)
2708       {
2709          old_elem_dof->GetRow(k, dofs);
2710          DofsToVDofs(dofs, old_ndofs);
2711 
2712          DerefDofMessage &msg = messages[k];
2713          msg.dofs.resize(dofs.Size());
2714          for (int i = 0; i < dofs.Size(); i++)
2715          {
2716             msg.dofs[i] = old_offset + dofs[i];
2717          }
2718 
2719          MPI_Isend(&msg.dofs[0], msg.dofs.size(), HYPRE_MPI_BIG_INT,
2720                    coarse_rank, 291, MyComm, &msg.request);
2721       }
2722       else if (coarse_rank == MyRank && fine_rank != MyRank)
2723       {
2724          MFEM_ASSERT(emb.parent >= 0, "");
2725          Geometry::Type geom = mesh->GetElementBaseGeometry(emb.parent);
2726 
2727          DerefDofMessage &msg = messages[k];
2728          msg.dofs.resize(ldof[geom]*vdim);
2729 
2730          MPI_Irecv(&msg.dofs[0], ldof[geom]*vdim, HYPRE_MPI_BIG_INT,
2731                    fine_rank, 291, MyComm, &msg.request);
2732       }
2733       // TODO: coalesce Isends/Irecvs to the same rank. Typically, on uniform
2734       // derefinement, there should be just one send to MyRank-1 and one recv
2735       // from MyRank+1
2736    }
2737 
2738    DenseTensor localR[Geometry::NumGeom];
2739    for (int i = 0; i < elem_geoms.Size(); i++)
2740    {
2741       GetLocalDerefinementMatrices(elem_geoms[i], localR[elem_geoms[i]]);
2742    }
2743 
2744    // create the diagonal part of the derefinement matrix
2745    SparseMatrix *diag = new SparseMatrix(ndofs*vdim, old_ndofs*vdim);
2746 
2747    Array<char> mark(diag->Height());
2748    mark = 0;
2749 
2750    for (int k = 0; k < dtrans.embeddings.Size(); k++)
2751    {
2752       const Embedding &emb = dtrans.embeddings[k];
2753       if (emb.parent < 0) { continue; }
2754 
2755       int coarse_rank = pncmesh->ElementRank(emb.parent);
2756       int fine_rank = old_ranks[k];
2757 
2758       if (coarse_rank == MyRank && fine_rank == MyRank)
2759       {
2760          Geometry::Type geom = mesh->GetElementBaseGeometry(emb.parent);
2761          DenseMatrix &lR = localR[geom](emb.matrix);
2762 
2763          elem_dof->GetRow(emb.parent, dofs);
2764          old_elem_dof->GetRow(k, old_dofs);
2765 
2766          for (int vd = 0; vd < vdim; vd++)
2767          {
2768             old_dofs.Copy(old_vdofs);
2769             DofsToVDofs(vd, old_vdofs, old_ndofs);
2770 
2771             for (int i = 0; i < lR.Height(); i++)
2772             {
2773                if (!std::isfinite(lR(i, 0))) { continue; }
2774 
2775                int r = DofToVDof(dofs[i], vd);
2776                int m = (r >= 0) ? r : (-1 - r);
2777 
2778                if (!mark[m])
2779                {
2780                   lR.GetRow(i, row);
2781                   diag->SetRow(r, old_vdofs, row);
2782                   mark[m] = 1;
2783                }
2784             }
2785          }
2786       }
2787    }
2788    diag->Finalize();
2789 
2790    // wait for all sends/receives to complete
2791    for (auto it = messages.begin(); it != messages.end(); ++it)
2792    {
2793       MPI_Wait(&it->second.request, MPI_STATUS_IGNORE);
2794    }
2795 
2796    // create the offdiagonal part of the derefinement matrix
2797    SparseMatrix *offd = new SparseMatrix(ndofs*vdim, 1);
2798 
2799    std::map<HYPRE_BigInt, int> col_map;
2800    for (int k = 0; k < dtrans.embeddings.Size(); k++)
2801    {
2802       const Embedding &emb = dtrans.embeddings[k];
2803       if (emb.parent < 0) { continue; }
2804 
2805       int coarse_rank = pncmesh->ElementRank(emb.parent);
2806       int fine_rank = old_ranks[k];
2807 
2808       if (coarse_rank == MyRank && fine_rank != MyRank)
2809       {
2810          Geometry::Type geom = mesh->GetElementBaseGeometry(emb.parent);
2811          DenseMatrix &lR = localR[geom](emb.matrix);
2812 
2813          elem_dof->GetRow(emb.parent, dofs);
2814 
2815          DerefDofMessage &msg = messages[k];
2816          MFEM_ASSERT(msg.dofs.size(), "");
2817 
2818          for (int vd = 0; vd < vdim; vd++)
2819          {
2820             MFEM_ASSERT(ldof[geom], "");
2821             HYPRE_BigInt* remote_dofs = &msg.dofs[vd*ldof[geom]];
2822 
2823             for (int i = 0; i < lR.Height(); i++)
2824             {
2825                if (!std::isfinite(lR(i, 0))) { continue; }
2826 
2827                int r = DofToVDof(dofs[i], vd);
2828                int m = (r >= 0) ? r : (-1 - r);
2829 
2830                if (!mark[m])
2831                {
2832                   lR.GetRow(i, row);
2833                   MFEM_ASSERT(ldof[geom] == row.Size(), "");
2834                   for (int j = 0; j < ldof[geom]; j++)
2835                   {
2836                      if (row[j] == 0.0) { continue; } // NOTE: lR thresholded
2837                      int &lcol = col_map[remote_dofs[j]];
2838                      if (!lcol) { lcol = col_map.size(); }
2839                      offd->_Set_(m, lcol-1, row[j]);
2840                   }
2841                   mark[m] = 1;
2842                }
2843             }
2844          }
2845       }
2846    }
2847    messages.clear();
2848    offd->Finalize(0);
2849    offd->SetWidth(col_map.size());
2850 
2851    // create offd column mapping for use by hypre
2852    HYPRE_BigInt *cmap = Memory<HYPRE_BigInt>(offd->Width());
2853    for (auto it = col_map.begin(); it != col_map.end(); ++it)
2854    {
2855       cmap[it->second-1] = it->first;
2856    }
2857 
2858    // reorder offd columns so that 'cmap' is monotonic
2859    // NOTE: this is easier and probably faster (offd is small) than making
2860    // sure cmap is determined and sorted before the offd matrix is created
2861    {
2862       int width = offd->Width();
2863       Array<Pair<HYPRE_BigInt, int> > reorder(width);
2864       for (int i = 0; i < width; i++)
2865       {
2866          reorder[i].one = cmap[i];
2867          reorder[i].two = i;
2868       }
2869       reorder.Sort();
2870 
2871       Array<int> reindex(width);
2872       for (int i = 0; i < width; i++)
2873       {
2874          reindex[reorder[i].two] = i;
2875          cmap[i] = reorder[i].one;
2876       }
2877 
2878       int *J = offd->GetJ();
2879       for (int i = 0; i < offd->NumNonZeroElems(); i++)
2880       {
2881          J[i] = reindex[J[i]];
2882       }
2883       offd->SortColumnIndices();
2884    }
2885 
2886    HypreParMatrix* R;
2887    R = new HypreParMatrix(MyComm, dof_offsets[nrk], old_dof_offsets[nrk],
2888                           dof_offsets, old_dof_offsets, diag, offd, cmap,
2889                           true);
2890 
2891    R->SetOwnerFlags(R->OwnsDiag(), R->OwnsOffd(), 1);
2892 
2893    return R;
2894 }
2895 
Destroy()2896 void ParFiniteElementSpace::Destroy()
2897 {
2898    ldof_group.DeleteAll();
2899    ldof_ltdof.DeleteAll();
2900    dof_offsets.DeleteAll();
2901    tdof_offsets.DeleteAll();
2902    tdof_nb_offsets.DeleteAll();
2903    // preserve old_dof_offsets
2904    ldof_sign.DeleteAll();
2905 
2906    delete P; P = NULL;
2907    delete Pconf; Pconf = NULL;
2908    delete Rconf; Rconf = NULL;
2909    delete R_transpose; R_transpose = NULL;
2910    delete R; R = NULL;
2911 
2912    delete gcomm; gcomm = NULL;
2913 
2914    num_face_nbr_dofs = -1;
2915    face_nbr_element_dof.Clear();
2916    face_nbr_ldof.Clear();
2917    face_nbr_glob_dof_map.DeleteAll();
2918    send_face_nbr_ldof.Clear();
2919 }
2920 
CopyProlongationAndRestriction(const FiniteElementSpace & fes,const Array<int> * perm)2921 void ParFiniteElementSpace::CopyProlongationAndRestriction(
2922    const FiniteElementSpace &fes, const Array<int> *perm)
2923 {
2924    const ParFiniteElementSpace *pfes
2925       = dynamic_cast<const ParFiniteElementSpace*>(&fes);
2926    MFEM_VERIFY(pfes != NULL, "");
2927    MFEM_VERIFY(P == NULL, "");
2928    MFEM_VERIFY(R == NULL, "");
2929 
2930    SparseMatrix *perm_mat = NULL, *perm_mat_tr = NULL;
2931    if (perm)
2932    {
2933       int n = perm->Size();
2934       perm_mat = new SparseMatrix(n, n);
2935       for (int i=0; i<n; ++i)
2936       {
2937          double s;
2938          int j = DecodeDof((*perm)[i], s);
2939          perm_mat->Set(i, j, s);
2940       }
2941       perm_mat->Finalize();
2942       perm_mat_tr = Transpose(*perm_mat);
2943    }
2944 
2945    if (pfes->P != NULL)
2946    {
2947       if (perm) { P = pfes->P->LeftDiagMult(*perm_mat); }
2948       else { P = new HypreParMatrix(*pfes->P); }
2949       nonconf_P = true;
2950    }
2951    if (pfes->R != NULL)
2952    {
2953       if (perm) { R = Mult(*pfes->R, *perm_mat_tr); }
2954       else { R = new SparseMatrix(*pfes->R); }
2955    }
2956 
2957    delete perm_mat;
2958    delete perm_mat_tr;
2959 }
2960 
GetTrueTransferOperator(const FiniteElementSpace & coarse_fes,OperatorHandle & T) const2961 void ParFiniteElementSpace::GetTrueTransferOperator(
2962    const FiniteElementSpace &coarse_fes, OperatorHandle &T) const
2963 {
2964    OperatorHandle Tgf(T.Type() == Operator::Hypre_ParCSR ?
2965                       Operator::MFEM_SPARSEMAT : Operator::ANY_TYPE);
2966    GetTransferOperator(coarse_fes, Tgf);
2967    Dof_TrueDof_Matrix(); // Make sure R is built - we need R in all cases.
2968    if (T.Type() == Operator::Hypre_ParCSR)
2969    {
2970       const ParFiniteElementSpace *c_pfes =
2971          dynamic_cast<const ParFiniteElementSpace *>(&coarse_fes);
2972       MFEM_ASSERT(c_pfes != NULL, "coarse_fes must be a parallel space");
2973       SparseMatrix *RA = mfem::Mult(*R, *Tgf.As<SparseMatrix>());
2974       Tgf.Clear();
2975       T.Reset(c_pfes->Dof_TrueDof_Matrix()->
2976               LeftDiagMult(*RA, GetTrueDofOffsets()));
2977       delete RA;
2978    }
2979    else
2980    {
2981       T.Reset(new TripleProductOperator(R, Tgf.Ptr(),
2982                                         coarse_fes.GetProlongationMatrix(),
2983                                         false, Tgf.OwnsOperator(), false));
2984       Tgf.SetOperatorOwner(false);
2985    }
2986 }
2987 
Update(bool want_transform)2988 void ParFiniteElementSpace::Update(bool want_transform)
2989 {
2990    MFEM_VERIFY(!IsVariableOrder(),
2991                "Parallel variable order space not supported yet.");
2992 
2993    if (mesh->GetSequence() == mesh_sequence)
2994    {
2995       return; // no need to update, no-op
2996    }
2997    if (want_transform && mesh->GetSequence() != mesh_sequence + 1)
2998    {
2999       MFEM_ABORT("Error in update sequence. Space needs to be updated after "
3000                  "each mesh modification.");
3001    }
3002 
3003    if (NURBSext)
3004    {
3005       UpdateNURBS();
3006       return;
3007    }
3008 
3009    Table* old_elem_dof = NULL;
3010    int old_ndofs;
3011 
3012    // save old DOF table
3013    if (want_transform)
3014    {
3015       old_elem_dof = elem_dof;
3016       elem_dof = NULL;
3017       old_ndofs = ndofs;
3018       Swap(dof_offsets, old_dof_offsets);
3019    }
3020 
3021    Destroy();
3022    FiniteElementSpace::Destroy(); // calls Th.Clear()
3023 
3024    FiniteElementSpace::Construct();
3025    Construct();
3026 
3027    BuildElementToDofTable();
3028 
3029    if (want_transform)
3030    {
3031       // calculate appropriate GridFunction transformation
3032       switch (mesh->GetLastOperation())
3033       {
3034          case Mesh::REFINE:
3035          {
3036             if (Th.Type() != Operator::MFEM_SPARSEMAT)
3037             {
3038                Th.Reset(new RefinementOperator(this, old_elem_dof, old_ndofs));
3039                // The RefinementOperator takes ownership of 'old_elem_dofs', so
3040                // we no longer own it:
3041                old_elem_dof = NULL;
3042             }
3043             else
3044             {
3045                // calculate fully assembled matrix
3046                Th.Reset(RefinementMatrix(old_ndofs, old_elem_dof));
3047             }
3048             break;
3049          }
3050 
3051          case Mesh::DEREFINE:
3052          {
3053             Th.Reset(ParallelDerefinementMatrix(old_ndofs, old_elem_dof));
3054             if (Nonconforming())
3055             {
3056                Th.SetOperatorOwner(false);
3057                Th.Reset(new TripleProductOperator(P, R, Th.Ptr(),
3058                                                   false, false, true));
3059             }
3060             break;
3061          }
3062 
3063          case Mesh::REBALANCE:
3064          {
3065             Th.Reset(RebalanceMatrix(old_ndofs, old_elem_dof));
3066             break;
3067          }
3068 
3069          default:
3070             break;
3071       }
3072 
3073       delete old_elem_dof;
3074    }
3075 }
3076 
UpdateMeshPointer(Mesh * new_mesh)3077 void ParFiniteElementSpace::UpdateMeshPointer(Mesh *new_mesh)
3078 {
3079    ParMesh *new_pmesh = dynamic_cast<ParMesh*>(new_mesh);
3080    MFEM_VERIFY(new_pmesh != NULL,
3081                "ParFiniteElementSpace::UpdateMeshPointer(...) must be a ParMesh");
3082    mesh = new_mesh;
3083    pmesh = new_pmesh;
3084 }
3085 
ConformingProlongationOperator(int lsize,const GroupCommunicator & gc_,bool local_)3086 ConformingProlongationOperator::ConformingProlongationOperator(
3087    int lsize, const GroupCommunicator &gc_, bool local_)
3088    : gc(gc_), local(local_)
3089 {
3090    const Table &group_ldof = gc.GroupLDofTable();
3091 
3092    int n_external = 0;
3093    for (int g=1; g<group_ldof.Size(); ++g)
3094    {
3095       if (!gc.GetGroupTopology().IAmMaster(g))
3096       {
3097          n_external += group_ldof.RowSize(g);
3098       }
3099    }
3100    int tsize = lsize - n_external;
3101 
3102    height = lsize;
3103    width = tsize;
3104 
3105    external_ldofs.Reserve(n_external);
3106    for (int gr = 1; gr < group_ldof.Size(); gr++)
3107    {
3108       if (!gc.GetGroupTopology().IAmMaster(gr))
3109       {
3110          external_ldofs.Append(group_ldof.GetRow(gr), group_ldof.RowSize(gr));
3111       }
3112    }
3113    external_ldofs.Sort();
3114 }
3115 
GetGroupCommunicator() const3116 const GroupCommunicator &ConformingProlongationOperator::GetGroupCommunicator()
3117 const
3118 {
3119    return gc;
3120 }
3121 
ConformingProlongationOperator(const ParFiniteElementSpace & pfes,bool local_)3122 ConformingProlongationOperator::ConformingProlongationOperator(
3123    const ParFiniteElementSpace &pfes, bool local_)
3124    : Operator(pfes.GetVSize(), pfes.GetTrueVSize()),
3125      external_ldofs(),
3126      gc(pfes.GroupComm()),
3127      local(local_)
3128 {
3129    MFEM_VERIFY(pfes.Conforming(), "");
3130    const Table &group_ldof = gc.GroupLDofTable();
3131    external_ldofs.Reserve(Height()-Width());
3132    for (int gr = 1; gr < group_ldof.Size(); gr++)
3133    {
3134       if (!gc.GetGroupTopology().IAmMaster(gr))
3135       {
3136          external_ldofs.Append(group_ldof.GetRow(gr), group_ldof.RowSize(gr));
3137       }
3138    }
3139    external_ldofs.Sort();
3140    MFEM_ASSERT(external_ldofs.Size() == Height()-Width(), "");
3141 #ifdef MFEM_DEBUG
3142    for (int j = 1; j < external_ldofs.Size(); j++)
3143    {
3144       // Check for repeated ldofs.
3145       MFEM_VERIFY(external_ldofs[j-1] < external_ldofs[j], "");
3146    }
3147    int j = 0;
3148    for (int i = 0; i < external_ldofs.Size(); i++)
3149    {
3150       const int end = external_ldofs[i];
3151       for ( ; j < end; j++)
3152       {
3153          MFEM_VERIFY(j-i == pfes.GetLocalTDofNumber(j), "");
3154       }
3155       j = end+1;
3156    }
3157    for ( ; j < Height(); j++)
3158    {
3159       MFEM_VERIFY(j-external_ldofs.Size() == pfes.GetLocalTDofNumber(j), "");
3160    }
3161    // gc.PrintInfo();
3162    // pfes.Dof_TrueDof_Matrix()->PrintCommPkg();
3163 #endif
3164 }
3165 
Mult(const Vector & x,Vector & y) const3166 void ConformingProlongationOperator::Mult(const Vector &x, Vector &y) const
3167 {
3168    MFEM_ASSERT(x.Size() == Width(), "");
3169    MFEM_ASSERT(y.Size() == Height(), "");
3170 
3171    const double *xdata = x.HostRead();
3172    double *ydata = y.HostWrite();
3173    const int m = external_ldofs.Size();
3174 
3175    const int in_layout = 2; // 2 - input is ltdofs array
3176    if (local)
3177    {
3178       y = 0.0;
3179    }
3180    else
3181    {
3182       gc.BcastBegin(const_cast<double*>(xdata), in_layout);
3183    }
3184 
3185    int j = 0;
3186    for (int i = 0; i < m; i++)
3187    {
3188       const int end = external_ldofs[i];
3189       std::copy(xdata+j-i, xdata+end-i, ydata+j);
3190       j = end+1;
3191    }
3192    std::copy(xdata+j-m, xdata+Width(), ydata+j);
3193 
3194    const int out_layout = 0; // 0 - output is ldofs array
3195    if (!local)
3196    {
3197       gc.BcastEnd(ydata, out_layout);
3198    }
3199 }
3200 
MultTranspose(const Vector & x,Vector & y) const3201 void ConformingProlongationOperator::MultTranspose(
3202    const Vector &x, Vector &y) const
3203 {
3204    MFEM_ASSERT(x.Size() == Height(), "");
3205    MFEM_ASSERT(y.Size() == Width(), "");
3206 
3207    const double *xdata = x.HostRead();
3208    double *ydata = y.HostWrite();
3209    const int m = external_ldofs.Size();
3210 
3211    if (!local)
3212    {
3213       gc.ReduceBegin(xdata);
3214    }
3215 
3216    int j = 0;
3217    for (int i = 0; i < m; i++)
3218    {
3219       const int end = external_ldofs[i];
3220       std::copy(xdata+j, xdata+end, ydata+j-i);
3221       j = end+1;
3222    }
3223    std::copy(xdata+j, xdata+Height(), ydata+j-m);
3224 
3225    const int out_layout = 2; // 2 - output is an array on all ltdofs
3226    if (!local)
3227    {
3228       gc.ReduceEnd<double>(ydata, out_layout, GroupCommunicator::Sum);
3229    }
3230 }
3231 
DeviceConformingProlongationOperator(const GroupCommunicator & gc_,const SparseMatrix * R,bool local_)3232 DeviceConformingProlongationOperator::DeviceConformingProlongationOperator(
3233    const GroupCommunicator &gc_, const SparseMatrix *R, bool local_)
3234    : ConformingProlongationOperator(R->Width(), gc_, local_),
3235      mpi_gpu_aware(Device::GetGPUAwareMPI())
3236 {
3237    MFEM_ASSERT(R->Finalized(), "");
3238    const int tdofs = R->Height();
3239    MFEM_ASSERT(tdofs == R->HostReadI()[tdofs], "");
3240    ltdof_ldof = Array<int>(const_cast<int*>(R->HostReadJ()), tdofs);
3241    ltdof_ldof.UseDevice();
3242    {
3243       Table nbr_ltdof;
3244       gc.GetNeighborLTDofTable(nbr_ltdof);
3245       const int nb_connections = nbr_ltdof.Size_of_connections();
3246       shr_ltdof.SetSize(nb_connections);
3247       shr_ltdof.CopyFrom(nbr_ltdof.GetJ());
3248       shr_buf.SetSize(nb_connections);
3249       shr_buf.UseDevice(true);
3250       shr_buf_offsets = nbr_ltdof.GetIMemory();
3251       {
3252          Array<int> shr_ltdof(nbr_ltdof.GetJ(), nb_connections);
3253          Array<int> unique_ltdof(shr_ltdof);
3254          unique_ltdof.Sort();
3255          unique_ltdof.Unique();
3256          // Note: the next loop modifies the J array of nbr_ltdof
3257          for (int i = 0; i < shr_ltdof.Size(); i++)
3258          {
3259             shr_ltdof[i] = unique_ltdof.FindSorted(shr_ltdof[i]);
3260             MFEM_ASSERT(shr_ltdof[i] != -1, "internal error");
3261          }
3262          Table unique_shr;
3263          Transpose(shr_ltdof, unique_shr, unique_ltdof.Size());
3264          unq_ltdof = Array<int>(unique_ltdof, unique_ltdof.Size());
3265          unq_shr_i = Array<int>(unique_shr.GetI(), unique_shr.Size()+1);
3266          unq_shr_j = Array<int>(unique_shr.GetJ(), unique_shr.Size_of_connections());
3267       }
3268       nbr_ltdof.GetJMemory().Delete();
3269       nbr_ltdof.LoseData();
3270    }
3271    {
3272       Table nbr_ldof;
3273       gc.GetNeighborLDofTable(nbr_ldof);
3274       const int nb_connections = nbr_ldof.Size_of_connections();
3275       ext_ldof.SetSize(nb_connections);
3276       ext_ldof.CopyFrom(nbr_ldof.GetJ());
3277       ext_ldof.GetMemory().UseDevice(true);
3278       ext_buf.SetSize(nb_connections);
3279       ext_buf.UseDevice(true);
3280       ext_buf_offsets = nbr_ldof.GetIMemory();
3281       nbr_ldof.GetJMemory().Delete();
3282       nbr_ldof.LoseData();
3283    }
3284    const GroupTopology &gtopo = gc.GetGroupTopology();
3285    int req_counter = 0;
3286    for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++)
3287    {
3288       const int send_offset = shr_buf_offsets[nbr];
3289       const int send_size = shr_buf_offsets[nbr+1] - send_offset;
3290       if (send_size > 0) { req_counter++; }
3291 
3292       const int recv_offset = ext_buf_offsets[nbr];
3293       const int recv_size = ext_buf_offsets[nbr+1] - recv_offset;
3294       if (recv_size > 0) { req_counter++; }
3295    }
3296    requests = new MPI_Request[req_counter];
3297 }
3298 
DeviceConformingProlongationOperator(const ParFiniteElementSpace & pfes,bool local_)3299 DeviceConformingProlongationOperator::DeviceConformingProlongationOperator(
3300    const ParFiniteElementSpace &pfes, bool local_)
3301    : DeviceConformingProlongationOperator(pfes.GroupComm(),
3302                                           pfes.GetRestrictionMatrix(),
3303                                           local_)
3304 {
3305    MFEM_ASSERT(pfes.Conforming(), "internal error");
3306    MFEM_ASSERT(pfes.GetRestrictionMatrix()->Height() == pfes.GetTrueVSize(), "");
3307 }
3308 
ExtractSubVector(const Array<int> & indices,const Vector & in,Vector & out)3309 static void ExtractSubVector(const Array<int> &indices,
3310                              const Vector &in, Vector &out)
3311 {
3312    MFEM_ASSERT(indices.Size() == out.Size(), "incompatible sizes!");
3313    auto y = out.Write();
3314    const auto x = in.Read();
3315    const auto I = indices.Read();
3316    MFEM_FORALL(i, indices.Size(), y[i] = x[I[i]];); // indices can be repeated
3317 }
3318 
BcastBeginCopy(const Vector & x) const3319 void DeviceConformingProlongationOperator::BcastBeginCopy(
3320    const Vector &x) const
3321 {
3322    // shr_buf[i] = src[shr_ltdof[i]]
3323    if (shr_ltdof.Size() == 0) { return; }
3324    ExtractSubVector(shr_ltdof, x, shr_buf);
3325    // If the above kernel is executed asynchronously, we should wait for it to
3326    // complete
3327    if (mpi_gpu_aware) { MFEM_STREAM_SYNC; }
3328 }
3329 
SetSubVector(const Array<int> & indices,const Vector & in,Vector & out)3330 static void SetSubVector(const Array<int> &indices,
3331                          const Vector &in, Vector &out)
3332 {
3333    MFEM_ASSERT(indices.Size() == in.Size(), "incompatible sizes!");
3334    // Use ReadWrite() since we modify only a subset of the indices:
3335    auto y = out.ReadWrite();
3336    const auto x = in.Read();
3337    const auto I = indices.Read();
3338    MFEM_FORALL(i, indices.Size(), y[I[i]] = x[i];);
3339 }
3340 
BcastLocalCopy(const Vector & x,Vector & y) const3341 void DeviceConformingProlongationOperator::BcastLocalCopy(
3342    const Vector &x, Vector &y) const
3343 {
3344    // dst[ltdof_ldof[i]] = src[i]
3345    if (ltdof_ldof.Size() == 0) { return; }
3346    SetSubVector(ltdof_ldof, x, y);
3347 }
3348 
BcastEndCopy(Vector & y) const3349 void DeviceConformingProlongationOperator::BcastEndCopy(
3350    Vector &y) const
3351 {
3352    // dst[ext_ldof[i]] = ext_buf[i]
3353    if (ext_ldof.Size() == 0) { return; }
3354    SetSubVector(ext_ldof, ext_buf, y);
3355 }
3356 
Mult(const Vector & x,Vector & y) const3357 void DeviceConformingProlongationOperator::Mult(const Vector &x,
3358                                                 Vector &y) const
3359 {
3360    const GroupTopology &gtopo = gc.GetGroupTopology();
3361    int req_counter = 0;
3362    // Make sure 'y' is marked as valid on device and for use on device.
3363    // This ensures that there is no unnecessary host to device copy when the
3364    // input 'y' is valid on host (in 'y.SetSubVector(ext_ldof, 0.0)' when local
3365    // is true) or BcastLocalCopy (when local is false).
3366    y.Write();
3367    if (local)
3368    {
3369       // done on device since we've marked ext_ldof for use on device:
3370       y.SetSubVector(ext_ldof, 0.0);
3371    }
3372    else
3373    {
3374       BcastBeginCopy(x); // copy to 'shr_buf'
3375       for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++)
3376       {
3377          const int send_offset = shr_buf_offsets[nbr];
3378          const int send_size = shr_buf_offsets[nbr+1] - send_offset;
3379          if (send_size > 0)
3380          {
3381             auto send_buf = mpi_gpu_aware ? shr_buf.Read() : shr_buf.HostRead();
3382             MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE,
3383                       gtopo.GetNeighborRank(nbr), 41822,
3384                       gtopo.GetComm(), &requests[req_counter++]);
3385          }
3386          const int recv_offset = ext_buf_offsets[nbr];
3387          const int recv_size = ext_buf_offsets[nbr+1] - recv_offset;
3388          if (recv_size > 0)
3389          {
3390             auto recv_buf = mpi_gpu_aware ? ext_buf.Write() : ext_buf.HostWrite();
3391             MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE,
3392                       gtopo.GetNeighborRank(nbr), 41822,
3393                       gtopo.GetComm(), &requests[req_counter++]);
3394          }
3395       }
3396    }
3397    BcastLocalCopy(x, y);
3398    if (!local)
3399    {
3400       MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE);
3401       BcastEndCopy(y); // copy from 'ext_buf'
3402    }
3403 }
3404 
~DeviceConformingProlongationOperator()3405 DeviceConformingProlongationOperator::~DeviceConformingProlongationOperator()
3406 {
3407    delete [] requests;
3408    ext_buf_offsets.Delete();
3409    shr_buf_offsets.Delete();
3410 }
3411 
ReduceBeginCopy(const Vector & x) const3412 void DeviceConformingProlongationOperator::ReduceBeginCopy(
3413    const Vector &x) const
3414 {
3415    // ext_buf[i] = src[ext_ldof[i]]
3416    if (ext_ldof.Size() == 0) { return; }
3417    ExtractSubVector(ext_ldof, x, ext_buf);
3418    // If the above kernel is executed asynchronously, we should wait for it to
3419    // complete
3420    if (mpi_gpu_aware) { MFEM_STREAM_SYNC; }
3421 }
3422 
ReduceLocalCopy(const Vector & x,Vector & y) const3423 void DeviceConformingProlongationOperator::ReduceLocalCopy(
3424    const Vector &x, Vector &y) const
3425 {
3426    // dst[i] = src[ltdof_ldof[i]]
3427    if (ltdof_ldof.Size() == 0) { return; }
3428    ExtractSubVector(ltdof_ldof, x, y);
3429 }
3430 
AddSubVector(const Array<int> & unique_dst_indices,const Array<int> & unique_to_src_offsets,const Array<int> & unique_to_src_indices,const Vector & src,Vector & dst)3431 static void AddSubVector(const Array<int> &unique_dst_indices,
3432                          const Array<int> &unique_to_src_offsets,
3433                          const Array<int> &unique_to_src_indices,
3434                          const Vector &src,
3435                          Vector &dst)
3436 {
3437    auto y = dst.ReadWrite();
3438    const auto x = src.Read();
3439    const auto DST_I = unique_dst_indices.Read();
3440    const auto SRC_O = unique_to_src_offsets.Read();
3441    const auto SRC_I = unique_to_src_indices.Read();
3442    MFEM_FORALL(i, unique_dst_indices.Size(),
3443    {
3444       const int dst_idx = DST_I[i];
3445       double sum = y[dst_idx];
3446       const int end = SRC_O[i+1];
3447       for (int j = SRC_O[i]; j != end; ++j) { sum += x[SRC_I[j]]; }
3448       y[dst_idx] = sum;
3449    });
3450 }
3451 
ReduceEndAssemble(Vector & y) const3452 void DeviceConformingProlongationOperator::ReduceEndAssemble(Vector &y) const
3453 {
3454    // dst[shr_ltdof[i]] += shr_buf[i]
3455    if (unq_ltdof.Size() == 0) { return; }
3456    AddSubVector(unq_ltdof, unq_shr_i, unq_shr_j, shr_buf, y);
3457 }
3458 
MultTranspose(const Vector & x,Vector & y) const3459 void DeviceConformingProlongationOperator::MultTranspose(const Vector &x,
3460                                                          Vector &y) const
3461 {
3462    const GroupTopology &gtopo = gc.GetGroupTopology();
3463    int req_counter = 0;
3464    if (!local)
3465    {
3466       ReduceBeginCopy(x); // copy to 'ext_buf'
3467       for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++)
3468       {
3469          const int send_offset = ext_buf_offsets[nbr];
3470          const int send_size = ext_buf_offsets[nbr+1] - send_offset;
3471          if (send_size > 0)
3472          {
3473             auto send_buf = mpi_gpu_aware ? ext_buf.Read() : ext_buf.HostRead();
3474             MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE,
3475                       gtopo.GetNeighborRank(nbr), 41823,
3476                       gtopo.GetComm(), &requests[req_counter++]);
3477          }
3478          const int recv_offset = shr_buf_offsets[nbr];
3479          const int recv_size = shr_buf_offsets[nbr+1] - recv_offset;
3480          if (recv_size > 0)
3481          {
3482             auto recv_buf = mpi_gpu_aware ? shr_buf.Write() : shr_buf.HostWrite();
3483             MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE,
3484                       gtopo.GetNeighborRank(nbr), 41823,
3485                       gtopo.GetComm(), &requests[req_counter++]);
3486          }
3487       }
3488    }
3489    ReduceLocalCopy(x, y);
3490    if (!local)
3491    {
3492       MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE);
3493       ReduceEndAssemble(y); // assemble from 'shr_buf'
3494    }
3495 }
3496 
3497 } // namespace mfem
3498 
3499 #endif
3500