1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11
12 #include "../config/config.hpp"
13
14 #ifdef MFEM_USE_MPI
15
16 #include "pfespace.hpp"
17 #include "prestriction.hpp"
18 #include "../general/forall.hpp"
19 #include "../general/sort_pairs.hpp"
20 #include "../mesh/mesh_headers.hpp"
21 #include "../general/binaryio.hpp"
22
23 #include <limits>
24 #include <list>
25
26 namespace mfem
27 {
28
ParFiniteElementSpace(const ParFiniteElementSpace & orig,ParMesh * pmesh,const FiniteElementCollection * fec)29 ParFiniteElementSpace::ParFiniteElementSpace(
30 const ParFiniteElementSpace &orig, ParMesh *pmesh,
31 const FiniteElementCollection *fec)
32 : FiniteElementSpace(orig, pmesh, fec)
33 {
34 ParInit(pmesh ? pmesh : orig.pmesh);
35 }
36
ParFiniteElementSpace(const FiniteElementSpace & orig,ParMesh & pmesh,const FiniteElementCollection * fec)37 ParFiniteElementSpace::ParFiniteElementSpace(
38 const FiniteElementSpace &orig, ParMesh &pmesh,
39 const FiniteElementCollection *fec)
40 : FiniteElementSpace(orig, &pmesh, fec)
41 {
42 ParInit(&pmesh);
43 }
44
ParFiniteElementSpace(ParMesh * pm,const FiniteElementSpace * global_fes,const int * partitioning,const FiniteElementCollection * f)45 ParFiniteElementSpace::ParFiniteElementSpace(
46 ParMesh *pm, const FiniteElementSpace *global_fes, const int *partitioning,
47 const FiniteElementCollection *f)
48 : FiniteElementSpace(pm, MakeLocalNURBSext(global_fes->GetNURBSext(),
49 pm->NURBSext),
50 f ? f : global_fes->FEColl(),
51 global_fes->GetVDim(), global_fes->GetOrdering())
52 {
53 ParInit(pm);
54 // For NURBS spaces, the variable-order data is contained in the
55 // NURBSExtension of 'global_fes' and inside the ParNURBSExtension of 'pm'.
56
57 // TODO: when general variable-order support is added, copy the local portion
58 // of the variable-order data from 'global_fes' to 'this'.
59 }
60
ParFiniteElementSpace(ParMesh * pm,const FiniteElementCollection * f,int dim,int ordering)61 ParFiniteElementSpace::ParFiniteElementSpace(
62 ParMesh *pm, const FiniteElementCollection *f, int dim, int ordering)
63 : FiniteElementSpace(pm, f, dim, ordering)
64 {
65 ParInit(pm);
66 }
67
ParFiniteElementSpace(ParMesh * pm,NURBSExtension * ext,const FiniteElementCollection * f,int dim,int ordering)68 ParFiniteElementSpace::ParFiniteElementSpace(
69 ParMesh *pm, NURBSExtension *ext, const FiniteElementCollection *f,
70 int dim, int ordering)
71 : FiniteElementSpace(pm, ext, f, dim, ordering)
72 {
73 ParInit(pm);
74 }
75
76 // static method
MakeLocalNURBSext(const NURBSExtension * globNURBSext,const NURBSExtension * parNURBSext)77 ParNURBSExtension *ParFiniteElementSpace::MakeLocalNURBSext(
78 const NURBSExtension *globNURBSext, const NURBSExtension *parNURBSext)
79 {
80 if (globNURBSext == NULL) { return NULL; }
81 const ParNURBSExtension *pNURBSext =
82 dynamic_cast<const ParNURBSExtension*>(parNURBSext);
83 MFEM_ASSERT(pNURBSext, "need a ParNURBSExtension");
84 // make a copy of globNURBSext:
85 NURBSExtension *tmp_globNURBSext = new NURBSExtension(*globNURBSext);
86 // tmp_globNURBSext will be deleted by the following ParNURBSExtension ctor:
87 return new ParNURBSExtension(tmp_globNURBSext, pNURBSext);
88 }
89
ParInit(ParMesh * pm)90 void ParFiniteElementSpace::ParInit(ParMesh *pm)
91 {
92 pmesh = pm;
93 pncmesh = NULL;
94
95 MyComm = pmesh->GetComm();
96 NRanks = pmesh->GetNRanks();
97 MyRank = pmesh->GetMyRank();
98
99 gcomm = NULL;
100
101 P = NULL;
102 Pconf = NULL;
103 nonconf_P = false;
104 Rconf = NULL;
105 R_transpose = NULL;
106 R = NULL;
107
108 num_face_nbr_dofs = -1;
109
110 if (NURBSext && !pNURBSext())
111 {
112 // This is necessary in some cases: e.g. when the FiniteElementSpace
113 // constructor creates a serial NURBSExtension of higher order than the
114 // mesh NURBSExtension.
115 MFEM_ASSERT(own_ext, "internal error");
116
117 ParNURBSExtension *pNe = new ParNURBSExtension(
118 NURBSext, dynamic_cast<ParNURBSExtension *>(pmesh->NURBSext));
119 // serial NURBSext is destroyed by the above constructor
120 NURBSext = pNe;
121 UpdateNURBS();
122 }
123
124 Construct(); // parallel version of Construct().
125
126 // Apply the ldof_signs to the elem_dof Table
127 if (Conforming() && !NURBSext)
128 {
129 ApplyLDofSigns(*elem_dof);
130 }
131 }
132
Construct()133 void ParFiniteElementSpace::Construct()
134 {
135 MFEM_VERIFY(!IsVariableOrder(), "variable orders are not implemented"
136 " for ParFiniteElementSpace yet.");
137
138 if (NURBSext)
139 {
140 ConstructTrueNURBSDofs();
141 GenerateGlobalOffsets();
142 }
143 else if (Conforming())
144 {
145 ConstructTrueDofs();
146 GenerateGlobalOffsets();
147 }
148 else // Nonconforming()
149 {
150 pncmesh = pmesh->pncmesh;
151
152 // Initialize 'gcomm' for the cut (aka "partially conforming") space.
153 // In the process, the array 'ldof_ltdof' is also initialized (for the cut
154 // space) and used; however, it will be overwritten below with the real
155 // true dofs. Also, 'ldof_sign' and 'ldof_group' are constructed for the
156 // cut space.
157 ConstructTrueDofs();
158
159 ngedofs = ngfdofs = 0;
160
161 // calculate number of ghost DOFs
162 ngvdofs = pncmesh->GetNGhostVertices()
163 * fec->DofForGeometry(Geometry::POINT);
164
165 if (pmesh->Dimension() > 1)
166 {
167 ngedofs = pncmesh->GetNGhostEdges()
168 * fec->DofForGeometry(Geometry::SEGMENT);
169 }
170
171 if (pmesh->Dimension() > 2)
172 {
173 int stride = fec->DofForGeometry(Geometry::SQUARE);
174 ngfdofs = pncmesh->GetNGhostFaces() * stride;
175 }
176
177 // total number of ghost DOFs. Ghost DOFs start at index 'ndofs', i.e.,
178 // after all regular DOFs
179 ngdofs = ngvdofs + ngedofs + ngfdofs;
180
181 // get P and R matrices, initialize DOF offsets, etc. NOTE: in the NC
182 // case this needs to be done here to get the number of true DOFs
183 ltdof_size = BuildParallelConformingInterpolation(
184 &P, &R, dof_offsets, tdof_offsets, &ldof_ltdof, false);
185
186 // TODO future: split BuildParallelConformingInterpolation into two parts
187 // to overlap its communication with processing between this constructor
188 // and the point where the P matrix is actually needed.
189 }
190 }
191
PrintPartitionStats()192 void ParFiniteElementSpace::PrintPartitionStats()
193 {
194 long ltdofs = ltdof_size;
195 long min_ltdofs, max_ltdofs, sum_ltdofs;
196
197 MPI_Reduce(<dofs, &min_ltdofs, 1, MPI_LONG, MPI_MIN, 0, MyComm);
198 MPI_Reduce(<dofs, &max_ltdofs, 1, MPI_LONG, MPI_MAX, 0, MyComm);
199 MPI_Reduce(<dofs, &sum_ltdofs, 1, MPI_LONG, MPI_SUM, 0, MyComm);
200
201 if (MyRank == 0)
202 {
203 double avg = double(sum_ltdofs) / NRanks;
204 mfem::out << "True DOF partitioning: min " << min_ltdofs
205 << ", avg " << std::fixed << std::setprecision(1) << avg
206 << ", max " << max_ltdofs
207 << ", (max-avg)/avg " << 100.0*(max_ltdofs - avg)/avg
208 << "%" << std::endl;
209 }
210
211 if (NRanks <= 32)
212 {
213 if (MyRank == 0)
214 {
215 mfem::out << "True DOFs by rank: " << ltdofs;
216 for (int i = 1; i < NRanks; i++)
217 {
218 MPI_Status status;
219 MPI_Recv(<dofs, 1, MPI_LONG, i, 123, MyComm, &status);
220 mfem::out << " " << ltdofs;
221 }
222 mfem::out << "\n";
223 }
224 else
225 {
226 MPI_Send(<dofs, 1, MPI_LONG, 0, 123, MyComm);
227 }
228 }
229 }
230
GetGroupComm(GroupCommunicator & gc,int ldof_type,Array<int> * ldof_sign)231 void ParFiniteElementSpace::GetGroupComm(
232 GroupCommunicator &gc, int ldof_type, Array<int> *ldof_sign)
233 {
234 int gr;
235 int ng = pmesh->GetNGroups();
236 int nvd, ned, ntd = 0, nqd = 0;
237 Array<int> dofs;
238
239 int group_ldof_counter;
240 Table &group_ldof = gc.GroupLDofTable();
241
242 nvd = fec->DofForGeometry(Geometry::POINT);
243 ned = fec->DofForGeometry(Geometry::SEGMENT);
244
245 if (mesh->Dimension() >= 3)
246 {
247 if (mesh->HasGeometry(Geometry::TRIANGLE))
248 {
249 ntd = fec->DofForGeometry(Geometry::TRIANGLE);
250 }
251 if (mesh->HasGeometry(Geometry::SQUARE))
252 {
253 nqd = fec->DofForGeometry(Geometry::SQUARE);
254 }
255 }
256
257 if (ldof_sign)
258 {
259 ldof_sign->SetSize(GetNDofs());
260 *ldof_sign = 1;
261 }
262
263 // count the number of ldofs in all groups (excluding the local group 0)
264 group_ldof_counter = 0;
265 for (gr = 1; gr < ng; gr++)
266 {
267 group_ldof_counter += nvd * pmesh->GroupNVertices(gr);
268 group_ldof_counter += ned * pmesh->GroupNEdges(gr);
269 group_ldof_counter += ntd * pmesh->GroupNTriangles(gr);
270 group_ldof_counter += nqd * pmesh->GroupNQuadrilaterals(gr);
271 }
272 if (ldof_type)
273 {
274 group_ldof_counter *= vdim;
275 }
276 // allocate the I and J arrays in group_ldof
277 group_ldof.SetDims(ng, group_ldof_counter);
278
279 // build the full group_ldof table
280 group_ldof_counter = 0;
281 group_ldof.GetI()[0] = group_ldof.GetI()[1] = 0;
282 for (gr = 1; gr < ng; gr++)
283 {
284 int j, k, l, m, o, nv, ne, nt, nq;
285 const int *ind;
286
287 nv = pmesh->GroupNVertices(gr);
288 ne = pmesh->GroupNEdges(gr);
289 nt = pmesh->GroupNTriangles(gr);
290 nq = pmesh->GroupNQuadrilaterals(gr);
291
292 // vertices
293 if (nvd > 0)
294 {
295 for (j = 0; j < nv; j++)
296 {
297 k = pmesh->GroupVertex(gr, j);
298
299 dofs.SetSize(nvd);
300 m = nvd * k;
301 for (l = 0; l < nvd; l++, m++)
302 {
303 dofs[l] = m;
304 }
305
306 if (ldof_type)
307 {
308 DofsToVDofs(dofs);
309 }
310
311 for (l = 0; l < dofs.Size(); l++)
312 {
313 group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
314 }
315 }
316 }
317
318 // edges
319 if (ned > 0)
320 {
321 for (j = 0; j < ne; j++)
322 {
323 pmesh->GroupEdge(gr, j, k, o);
324
325 dofs.SetSize(ned);
326 m = nvdofs+k*ned;
327 ind = fec->DofOrderForOrientation(Geometry::SEGMENT, o);
328 for (l = 0; l < ned; l++)
329 {
330 if (ind[l] < 0)
331 {
332 dofs[l] = m + (-1-ind[l]);
333 if (ldof_sign)
334 {
335 (*ldof_sign)[dofs[l]] = -1;
336 }
337 }
338 else
339 {
340 dofs[l] = m + ind[l];
341 }
342 }
343
344 if (ldof_type)
345 {
346 DofsToVDofs(dofs);
347 }
348
349 for (l = 0; l < dofs.Size(); l++)
350 {
351 group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
352 }
353 }
354 }
355
356 // triangles
357 if (ntd > 0)
358 {
359 for (j = 0; j < nt; j++)
360 {
361 pmesh->GroupTriangle(gr, j, k, o);
362
363 dofs.SetSize(ntd);
364 m = nvdofs + nedofs + FirstFaceDof(k);
365 ind = fec->DofOrderForOrientation(Geometry::TRIANGLE, o);
366 for (l = 0; l < ntd; l++)
367 {
368 if (ind[l] < 0)
369 {
370 dofs[l] = m + (-1-ind[l]);
371 if (ldof_sign)
372 {
373 (*ldof_sign)[dofs[l]] = -1;
374 }
375 }
376 else
377 {
378 dofs[l] = m + ind[l];
379 }
380 }
381
382 if (ldof_type)
383 {
384 DofsToVDofs(dofs);
385 }
386
387 for (l = 0; l < dofs.Size(); l++)
388 {
389 group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
390 }
391 }
392 }
393
394 // quadrilaterals
395 if (nqd > 0)
396 {
397 for (j = 0; j < nq; j++)
398 {
399 pmesh->GroupQuadrilateral(gr, j, k, o);
400
401 dofs.SetSize(nqd);
402 m = nvdofs + nedofs + FirstFaceDof(k);
403 ind = fec->DofOrderForOrientation(Geometry::SQUARE, o);
404 for (l = 0; l < nqd; l++)
405 {
406 if (ind[l] < 0)
407 {
408 dofs[l] = m + (-1-ind[l]);
409 if (ldof_sign)
410 {
411 (*ldof_sign)[dofs[l]] = -1;
412 }
413 }
414 else
415 {
416 dofs[l] = m + ind[l];
417 }
418 }
419
420 if (ldof_type)
421 {
422 DofsToVDofs(dofs);
423 }
424
425 for (l = 0; l < dofs.Size(); l++)
426 {
427 group_ldof.GetJ()[group_ldof_counter++] = dofs[l];
428 }
429 }
430 }
431
432 group_ldof.GetI()[gr+1] = group_ldof_counter;
433 }
434
435 gc.Finalize();
436 }
437
ApplyLDofSigns(Array<int> & dofs) const438 void ParFiniteElementSpace::ApplyLDofSigns(Array<int> &dofs) const
439 {
440 MFEM_ASSERT(Conforming(), "wrong code path");
441
442 for (int i = 0; i < dofs.Size(); i++)
443 {
444 if (dofs[i] < 0)
445 {
446 if (ldof_sign[-1-dofs[i]] < 0)
447 {
448 dofs[i] = -1-dofs[i];
449 }
450 }
451 else
452 {
453 if (ldof_sign[dofs[i]] < 0)
454 {
455 dofs[i] = -1-dofs[i];
456 }
457 }
458 }
459 }
460
ApplyLDofSigns(Table & el_dof) const461 void ParFiniteElementSpace::ApplyLDofSigns(Table &el_dof) const
462 {
463 Array<int> all_dofs(el_dof.GetJ(), el_dof.Size_of_connections());
464 ApplyLDofSigns(all_dofs);
465 }
466
GetElementDofs(int i,Array<int> & dofs) const467 void ParFiniteElementSpace::GetElementDofs(int i, Array<int> &dofs) const
468 {
469 if (elem_dof)
470 {
471 elem_dof->GetRow(i, dofs);
472 return;
473 }
474 FiniteElementSpace::GetElementDofs(i, dofs);
475 if (Conforming())
476 {
477 ApplyLDofSigns(dofs);
478 }
479 }
480
GetBdrElementDofs(int i,Array<int> & dofs) const481 void ParFiniteElementSpace::GetBdrElementDofs(int i, Array<int> &dofs) const
482 {
483 if (bdr_elem_dof)
484 {
485 bdr_elem_dof->GetRow(i, dofs);
486 return;
487 }
488 FiniteElementSpace::GetBdrElementDofs(i, dofs);
489 if (Conforming())
490 {
491 ApplyLDofSigns(dofs);
492 }
493 }
494
GetFaceDofs(int i,Array<int> & dofs,int variant) const495 int ParFiniteElementSpace::GetFaceDofs(int i, Array<int> &dofs,
496 int variant) const
497 {
498 if (face_dof && variant == 0)
499 {
500 face_dof->GetRow(i, dofs);
501 return fec->GetOrder();
502 }
503 int p = FiniteElementSpace::GetFaceDofs(i, dofs, variant);
504 if (Conforming())
505 {
506 ApplyLDofSigns(dofs);
507 }
508 return p;
509 }
510
GetFE(int i) const511 const FiniteElement *ParFiniteElementSpace::GetFE(int i) const
512 {
513 int ne = mesh->GetNE();
514 if (i >= ne) { return GetFaceNbrFE(i - ne); }
515 else { return FiniteElementSpace::GetFE(i); }
516 }
517
GetFaceRestriction(ElementDofOrdering e_ordering,FaceType type,L2FaceValues mul) const518 const FaceRestriction *ParFiniteElementSpace::GetFaceRestriction(
519 ElementDofOrdering e_ordering, FaceType type, L2FaceValues mul) const
520 {
521 const bool is_dg_space = IsDGSpace();
522 const L2FaceValues m = (is_dg_space && mul==L2FaceValues::DoubleValued) ?
523 L2FaceValues::DoubleValued : L2FaceValues::SingleValued;
524 auto key = std::make_tuple(is_dg_space, e_ordering, type, m);
525 auto itr = L2F.find(key);
526 if (itr != L2F.end())
527 {
528 return itr->second;
529 }
530 else
531 {
532 FaceRestriction *res;
533 if (is_dg_space)
534 {
535 res = new ParL2FaceRestriction(*this, e_ordering, type, m);
536 }
537 else
538 {
539 res = new H1FaceRestriction(*this, e_ordering, type);
540 }
541 L2F[key] = res;
542 return res;
543 }
544 }
545
GetSharedEdgeDofs(int group,int ei,Array<int> & dofs) const546 void ParFiniteElementSpace::GetSharedEdgeDofs(
547 int group, int ei, Array<int> &dofs) const
548 {
549 int l_edge, ori;
550 MFEM_ASSERT(0 <= ei && ei < pmesh->GroupNEdges(group), "invalid edge index");
551 pmesh->GroupEdge(group, ei, l_edge, ori);
552 if (ori > 0) // ori = +1 or -1
553 {
554 GetEdgeDofs(l_edge, dofs);
555 }
556 else
557 {
558 Array<int> rdofs;
559 fec->SubDofOrder(Geometry::SEGMENT, 1, 1, dofs);
560 GetEdgeDofs(l_edge, rdofs);
561 for (int i = 0; i < dofs.Size(); i++)
562 {
563 const int di = dofs[i];
564 dofs[i] = (di >= 0) ? rdofs[di] : -1-rdofs[-1-di];
565 }
566 }
567 }
568
GetSharedTriangleDofs(int group,int fi,Array<int> & dofs) const569 void ParFiniteElementSpace::GetSharedTriangleDofs(
570 int group, int fi, Array<int> &dofs) const
571 {
572 int l_face, ori;
573 MFEM_ASSERT(0 <= fi && fi < pmesh->GroupNTriangles(group),
574 "invalid triangular face index");
575 pmesh->GroupTriangle(group, fi, l_face, ori);
576 if (ori == 0)
577 {
578 GetFaceDofs(l_face, dofs);
579 }
580 else
581 {
582 Array<int> rdofs;
583 fec->SubDofOrder(Geometry::TRIANGLE, 2, ori, dofs);
584 GetFaceDofs(l_face, rdofs);
585 for (int i = 0; i < dofs.Size(); i++)
586 {
587 const int di = dofs[i];
588 dofs[i] = (di >= 0) ? rdofs[di] : -1-rdofs[-1-di];
589 }
590 }
591 }
592
GetSharedQuadrilateralDofs(int group,int fi,Array<int> & dofs) const593 void ParFiniteElementSpace::GetSharedQuadrilateralDofs(
594 int group, int fi, Array<int> &dofs) const
595 {
596 int l_face, ori;
597 MFEM_ASSERT(0 <= fi && fi < pmesh->GroupNQuadrilaterals(group),
598 "invalid quadrilateral face index");
599 pmesh->GroupQuadrilateral(group, fi, l_face, ori);
600 if (ori == 0)
601 {
602 GetFaceDofs(l_face, dofs);
603 }
604 else
605 {
606 Array<int> rdofs;
607 fec->SubDofOrder(Geometry::SQUARE, 2, ori, dofs);
608 GetFaceDofs(l_face, rdofs);
609 for (int i = 0; i < dofs.Size(); i++)
610 {
611 const int di = dofs[i];
612 dofs[i] = (di >= 0) ? rdofs[di] : -1-rdofs[-1-di];
613 }
614 }
615 }
616
GenerateGlobalOffsets() const617 void ParFiniteElementSpace::GenerateGlobalOffsets() const
618 {
619 MFEM_ASSERT(Conforming(), "wrong code path");
620
621 HYPRE_BigInt ldof[2];
622 Array<HYPRE_BigInt> *offsets[2] = { &dof_offsets, &tdof_offsets };
623
624 ldof[0] = GetVSize();
625 ldof[1] = TrueVSize();
626
627 pmesh->GenerateOffsets(2, ldof, offsets);
628
629 if (HYPRE_AssumedPartitionCheck())
630 {
631 // communicate the neighbor offsets in tdof_nb_offsets
632 GroupTopology > = GetGroupTopo();
633 int nsize = gt.GetNumNeighbors()-1;
634 MPI_Request *requests = new MPI_Request[2*nsize];
635 MPI_Status *statuses = new MPI_Status[2*nsize];
636 tdof_nb_offsets.SetSize(nsize+1);
637 tdof_nb_offsets[0] = tdof_offsets[0];
638
639 // send and receive neighbors' local tdof offsets
640 int request_counter = 0;
641 for (int i = 1; i <= nsize; i++)
642 {
643 MPI_Irecv(&tdof_nb_offsets[i], 1, HYPRE_MPI_BIG_INT,
644 gt.GetNeighborRank(i), 5365, MyComm,
645 &requests[request_counter++]);
646 }
647 for (int i = 1; i <= nsize; i++)
648 {
649 MPI_Isend(&tdof_nb_offsets[0], 1, HYPRE_MPI_BIG_INT,
650 gt.GetNeighborRank(i), 5365, MyComm,
651 &requests[request_counter++]);
652 }
653 MPI_Waitall(request_counter, requests, statuses);
654
655 delete [] statuses;
656 delete [] requests;
657 }
658 }
659
Build_Dof_TrueDof_Matrix() const660 void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P
661 {
662 MFEM_ASSERT(Conforming(), "wrong code path");
663
664 if (P) { return; }
665
666 int ldof = GetVSize();
667 int ltdof = TrueVSize();
668
669 HYPRE_Int *i_diag = Memory<HYPRE_Int>(ldof+1);
670 HYPRE_Int *j_diag = Memory<HYPRE_Int>(ltdof);
671 int diag_counter;
672
673 HYPRE_Int *i_offd = Memory<HYPRE_Int>(ldof+1);
674 HYPRE_Int *j_offd = Memory<HYPRE_Int>(ldof-ltdof);
675 int offd_counter;
676
677 HYPRE_BigInt *cmap = Memory<HYPRE_BigInt>(ldof-ltdof);
678
679 HYPRE_BigInt *col_starts = GetTrueDofOffsets();
680 HYPRE_BigInt *row_starts = GetDofOffsets();
681
682 Array<Pair<HYPRE_BigInt, int> > cmap_j_offd(ldof-ltdof);
683
684 i_diag[0] = i_offd[0] = 0;
685 diag_counter = offd_counter = 0;
686 for (int i = 0; i < ldof; i++)
687 {
688 int ltdof = GetLocalTDofNumber(i);
689 if (ltdof >= 0)
690 {
691 j_diag[diag_counter++] = ltdof;
692 }
693 else
694 {
695 cmap_j_offd[offd_counter].one = GetGlobalTDofNumber(i);
696 cmap_j_offd[offd_counter].two = offd_counter;
697 offd_counter++;
698 }
699 i_diag[i+1] = diag_counter;
700 i_offd[i+1] = offd_counter;
701 }
702
703 SortPairs<HYPRE_BigInt, int>(cmap_j_offd, offd_counter);
704
705 for (int i = 0; i < offd_counter; i++)
706 {
707 cmap[i] = cmap_j_offd[i].one;
708 j_offd[cmap_j_offd[i].two] = i;
709 }
710
711 P = new HypreParMatrix(MyComm, MyRank, NRanks, row_starts, col_starts,
712 i_diag, j_diag, i_offd, j_offd, cmap, offd_counter);
713
714 SparseMatrix Pdiag;
715 P->GetDiag(Pdiag);
716 R = Transpose(Pdiag);
717 }
718
GetPartialConformingInterpolation()719 HypreParMatrix *ParFiniteElementSpace::GetPartialConformingInterpolation()
720 {
721 HypreParMatrix *P_pc;
722 Array<HYPRE_BigInt> P_pc_row_starts, P_pc_col_starts;
723 BuildParallelConformingInterpolation(&P_pc, NULL, P_pc_row_starts,
724 P_pc_col_starts, NULL, true);
725 P_pc->CopyRowStarts();
726 P_pc->CopyColStarts();
727 return P_pc;
728 }
729
DivideByGroupSize(double * vec)730 void ParFiniteElementSpace::DivideByGroupSize(double *vec)
731 {
732 GroupTopology > = GetGroupTopo();
733 for (int i = 0; i < ldof_group.Size(); i++)
734 {
735 if (gt.IAmMaster(ldof_group[i])) // we are the master
736 {
737 if (ldof_ltdof[i] >= 0) // see note below
738 {
739 vec[ldof_ltdof[i]] /= gt.GetGroupSize(ldof_group[i]);
740 }
741 // NOTE: in NC meshes, ldof_ltdof generated for the gtopo
742 // groups by ConstructTrueDofs gets overwritten by
743 // BuildParallelConformingInterpolation. Some DOFs that are
744 // seen as true by the conforming code are actually slaves and
745 // end up with a -1 in ldof_ltdof.
746 }
747 }
748 }
749
ScalarGroupComm()750 GroupCommunicator *ParFiniteElementSpace::ScalarGroupComm()
751 {
752 GroupCommunicator *gc = new GroupCommunicator(GetGroupTopo());
753 if (NURBSext)
754 {
755 gc->Create(pNURBSext()->ldof_group);
756 }
757 else
758 {
759 GetGroupComm(*gc, 0);
760 }
761 return gc;
762 }
763
Synchronize(Array<int> & ldof_marker) const764 void ParFiniteElementSpace::Synchronize(Array<int> &ldof_marker) const
765 {
766 // For non-conforming mesh, synchronization is performed on the cut (aka
767 // "partially conforming") space.
768
769 MFEM_VERIFY(ldof_marker.Size() == GetVSize(), "invalid in/out array");
770
771 // implement allreduce(|) as reduce(|) + broadcast
772 gcomm->Reduce<int>(ldof_marker, GroupCommunicator::BitOR);
773 gcomm->Bcast(ldof_marker);
774 }
775
GetEssentialVDofs(const Array<int> & bdr_attr_is_ess,Array<int> & ess_dofs,int component) const776 void ParFiniteElementSpace::GetEssentialVDofs(const Array<int> &bdr_attr_is_ess,
777 Array<int> &ess_dofs,
778 int component) const
779 {
780 FiniteElementSpace::GetEssentialVDofs(bdr_attr_is_ess, ess_dofs, component);
781
782 if (Conforming())
783 {
784 // Make sure that processors without boundary elements mark
785 // their boundary dofs (if they have any).
786 Synchronize(ess_dofs);
787 }
788 }
789
GetEssentialTrueDofs(const Array<int> & bdr_attr_is_ess,Array<int> & ess_tdof_list,int component)790 void ParFiniteElementSpace::GetEssentialTrueDofs(const Array<int>
791 &bdr_attr_is_ess,
792 Array<int> &ess_tdof_list,
793 int component)
794 {
795 Array<int> ess_dofs, true_ess_dofs;
796
797 GetEssentialVDofs(bdr_attr_is_ess, ess_dofs, component);
798 GetRestrictionMatrix()->BooleanMult(ess_dofs, true_ess_dofs);
799
800 #ifdef MFEM_DEBUG
801 // Verify that in boolean arithmetic: P^T ess_dofs = R ess_dofs.
802 Array<int> true_ess_dofs2(true_ess_dofs.Size());
803 HypreParMatrix *Pt = Dof_TrueDof_Matrix()->Transpose();
804 const int *ess_dofs_data = ess_dofs.HostRead();
805 Pt->BooleanMult(1, ess_dofs_data, 0, true_ess_dofs2);
806 delete Pt;
807 int counter = 0;
808 const int *ted = true_ess_dofs.HostRead();
809 for (int i = 0; i < true_ess_dofs.Size(); i++)
810 {
811 if (bool(ted[i]) != bool(true_ess_dofs2[i])) { counter++; }
812 }
813 MFEM_VERIFY(counter == 0, "internal MFEM error: counter = " << counter);
814 #endif
815
816 MarkerToList(true_ess_dofs, ess_tdof_list);
817 }
818
GetLocalTDofNumber(int ldof) const819 int ParFiniteElementSpace::GetLocalTDofNumber(int ldof) const
820 {
821 if (Nonconforming())
822 {
823 Dof_TrueDof_Matrix(); // make sure P has been built
824
825 return ldof_ltdof[ldof]; // NOTE: contains -1 for slaves/DOFs we don't own
826 }
827 else
828 {
829 if (GetGroupTopo().IAmMaster(ldof_group[ldof]))
830 {
831 return ldof_ltdof[ldof];
832 }
833 else
834 {
835 return -1;
836 }
837 }
838 }
839
GetGlobalTDofNumber(int ldof) const840 HYPRE_BigInt ParFiniteElementSpace::GetGlobalTDofNumber(int ldof) const
841 {
842 if (Nonconforming())
843 {
844 MFEM_VERIFY(ldof_ltdof[ldof] >= 0, "ldof " << ldof << " not a true DOF.");
845
846 return GetMyTDofOffset() + ldof_ltdof[ldof];
847 }
848 else
849 {
850 if (HYPRE_AssumedPartitionCheck())
851 {
852 return ldof_ltdof[ldof] +
853 tdof_nb_offsets[GetGroupTopo().GetGroupMaster(ldof_group[ldof])];
854 }
855 else
856 {
857 return ldof_ltdof[ldof] +
858 tdof_offsets[GetGroupTopo().GetGroupMasterRank(ldof_group[ldof])];
859 }
860 }
861 }
862
GetGlobalScalarTDofNumber(int sldof)863 HYPRE_BigInt ParFiniteElementSpace::GetGlobalScalarTDofNumber(int sldof)
864 {
865 if (Nonconforming())
866 {
867 MFEM_ABORT("Not implemented for NC mesh.");
868 }
869
870 if (HYPRE_AssumedPartitionCheck())
871 {
872 if (ordering == Ordering::byNODES)
873 {
874 return ldof_ltdof[sldof] +
875 tdof_nb_offsets[GetGroupTopo().GetGroupMaster(
876 ldof_group[sldof])] / vdim;
877 }
878 else
879 {
880 return (ldof_ltdof[sldof*vdim] +
881 tdof_nb_offsets[GetGroupTopo().GetGroupMaster(
882 ldof_group[sldof*vdim])]) / vdim;
883 }
884 }
885
886 if (ordering == Ordering::byNODES)
887 {
888 return ldof_ltdof[sldof] +
889 tdof_offsets[GetGroupTopo().GetGroupMasterRank(
890 ldof_group[sldof])] / vdim;
891 }
892 else
893 {
894 return (ldof_ltdof[sldof*vdim] +
895 tdof_offsets[GetGroupTopo().GetGroupMasterRank(
896 ldof_group[sldof*vdim])]) / vdim;
897 }
898 }
899
GetMyDofOffset() const900 HYPRE_BigInt ParFiniteElementSpace::GetMyDofOffset() const
901 {
902 return HYPRE_AssumedPartitionCheck() ? dof_offsets[0] : dof_offsets[MyRank];
903 }
904
GetMyTDofOffset() const905 HYPRE_BigInt ParFiniteElementSpace::GetMyTDofOffset() const
906 {
907 return HYPRE_AssumedPartitionCheck()? tdof_offsets[0] : tdof_offsets[MyRank];
908 }
909
GetProlongationMatrix() const910 const Operator *ParFiniteElementSpace::GetProlongationMatrix() const
911 {
912 if (Conforming())
913 {
914 if (Pconf) { return Pconf; }
915
916 if (NRanks == 1)
917 {
918 Pconf = new IdentityOperator(GetTrueVSize());
919 }
920 else
921 {
922 if (!Device::Allows(Backend::DEVICE_MASK))
923 {
924 Pconf = new ConformingProlongationOperator(*this);
925 }
926 else
927 {
928 Pconf = new DeviceConformingProlongationOperator(*this);
929 }
930 }
931 return Pconf;
932 }
933 else
934 {
935 return Dof_TrueDof_Matrix();
936 }
937 }
938
GetRestrictionOperator() const939 const Operator *ParFiniteElementSpace::GetRestrictionOperator() const
940 {
941 if (Conforming())
942 {
943 if (Rconf) { return Rconf; }
944
945 if (NRanks == 1)
946 {
947 R_transpose = new IdentityOperator(GetTrueVSize());
948 }
949 else
950 {
951 if (!Device::Allows(Backend::DEVICE_MASK))
952 {
953 R_transpose = new ConformingProlongationOperator(*this, true);
954 }
955 else
956 {
957 R_transpose =
958 new DeviceConformingProlongationOperator(*this, true);
959 }
960 }
961 Rconf = new TransposeOperator(R_transpose);
962 return Rconf;
963 }
964 else
965 {
966 Dof_TrueDof_Matrix();
967 R_transpose = new TransposeOperator(R);
968 return R;
969 }
970 }
971
GetRestrictionTransposeOperator() const972 const Operator *ParFiniteElementSpace::GetRestrictionTransposeOperator() const
973 {
974 GetRestrictionOperator();
975 return R_transpose;
976 }
977
ExchangeFaceNbrData()978 void ParFiniteElementSpace::ExchangeFaceNbrData()
979 {
980 if (num_face_nbr_dofs >= 0) { return; }
981
982 pmesh->ExchangeFaceNbrData();
983
984 int num_face_nbrs = pmesh->GetNFaceNeighbors();
985
986 if (num_face_nbrs == 0)
987 {
988 num_face_nbr_dofs = 0;
989 return;
990 }
991
992 MPI_Request *requests = new MPI_Request[2*num_face_nbrs];
993 MPI_Request *send_requests = requests;
994 MPI_Request *recv_requests = requests + num_face_nbrs;
995 MPI_Status *statuses = new MPI_Status[num_face_nbrs];
996
997 Array<int> ldofs;
998 Array<int> ldof_marker(GetVSize());
999 ldof_marker = -1;
1000
1001 Table send_nbr_elem_dof;
1002
1003 send_nbr_elem_dof.MakeI(pmesh->send_face_nbr_elements.Size_of_connections());
1004 send_face_nbr_ldof.MakeI(num_face_nbrs);
1005 face_nbr_ldof.MakeI(num_face_nbrs);
1006 int *send_el_off = pmesh->send_face_nbr_elements.GetI();
1007 int *recv_el_off = pmesh->face_nbr_elements_offset;
1008 for (int fn = 0; fn < num_face_nbrs; fn++)
1009 {
1010 int *my_elems = pmesh->send_face_nbr_elements.GetRow(fn);
1011 int num_my_elems = pmesh->send_face_nbr_elements.RowSize(fn);
1012
1013 for (int i = 0; i < num_my_elems; i++)
1014 {
1015 GetElementVDofs(my_elems[i], ldofs);
1016 for (int j = 0; j < ldofs.Size(); j++)
1017 {
1018 int ldof = (ldofs[j] >= 0 ? ldofs[j] : -1-ldofs[j]);
1019
1020 if (ldof_marker[ldof] != fn)
1021 {
1022 ldof_marker[ldof] = fn;
1023 send_face_nbr_ldof.AddAColumnInRow(fn);
1024 }
1025 }
1026 send_nbr_elem_dof.AddColumnsInRow(send_el_off[fn] + i, ldofs.Size());
1027 }
1028
1029 int nbr_rank = pmesh->GetFaceNbrRank(fn);
1030 int tag = 0;
1031 MPI_Isend(&send_face_nbr_ldof.GetI()[fn], 1, MPI_INT, nbr_rank, tag,
1032 MyComm, &send_requests[fn]);
1033
1034 MPI_Irecv(&face_nbr_ldof.GetI()[fn], 1, MPI_INT, nbr_rank, tag,
1035 MyComm, &recv_requests[fn]);
1036 }
1037
1038 MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1039 face_nbr_ldof.MakeJ();
1040
1041 num_face_nbr_dofs = face_nbr_ldof.Size_of_connections();
1042
1043 MPI_Waitall(num_face_nbrs, send_requests, statuses);
1044 send_face_nbr_ldof.MakeJ();
1045
1046 // send/receive the I arrays of send_nbr_elem_dof/face_nbr_element_dof,
1047 // respectively (they contain the number of dofs for each face-neighbor
1048 // element)
1049 face_nbr_element_dof.MakeI(recv_el_off[num_face_nbrs]);
1050
1051 int *send_I = send_nbr_elem_dof.GetI();
1052 int *recv_I = face_nbr_element_dof.GetI();
1053 for (int fn = 0; fn < num_face_nbrs; fn++)
1054 {
1055 int nbr_rank = pmesh->GetFaceNbrRank(fn);
1056 int tag = 0;
1057 MPI_Isend(send_I + send_el_off[fn], send_el_off[fn+1] - send_el_off[fn],
1058 MPI_INT, nbr_rank, tag, MyComm, &send_requests[fn]);
1059
1060 MPI_Irecv(recv_I + recv_el_off[fn], recv_el_off[fn+1] - recv_el_off[fn],
1061 MPI_INT, nbr_rank, tag, MyComm, &recv_requests[fn]);
1062 }
1063
1064 MPI_Waitall(num_face_nbrs, send_requests, statuses);
1065 send_nbr_elem_dof.MakeJ();
1066
1067 ldof_marker = -1;
1068
1069 for (int fn = 0; fn < num_face_nbrs; fn++)
1070 {
1071 int *my_elems = pmesh->send_face_nbr_elements.GetRow(fn);
1072 int num_my_elems = pmesh->send_face_nbr_elements.RowSize(fn);
1073
1074 for (int i = 0; i < num_my_elems; i++)
1075 {
1076 GetElementVDofs(my_elems[i], ldofs);
1077 for (int j = 0; j < ldofs.Size(); j++)
1078 {
1079 int ldof = (ldofs[j] >= 0 ? ldofs[j] : -1-ldofs[j]);
1080
1081 if (ldof_marker[ldof] != fn)
1082 {
1083 ldof_marker[ldof] = fn;
1084 send_face_nbr_ldof.AddConnection(fn, ldofs[j]);
1085 }
1086 }
1087 send_nbr_elem_dof.AddConnections(
1088 send_el_off[fn] + i, ldofs, ldofs.Size());
1089 }
1090 }
1091 send_face_nbr_ldof.ShiftUpI();
1092 send_nbr_elem_dof.ShiftUpI();
1093
1094 // convert the ldof indices in send_nbr_elem_dof
1095 int *send_J = send_nbr_elem_dof.GetJ();
1096 for (int fn = 0, j = 0; fn < num_face_nbrs; fn++)
1097 {
1098 int num_ldofs = send_face_nbr_ldof.RowSize(fn);
1099 int *ldofs = send_face_nbr_ldof.GetRow(fn);
1100 int j_end = send_I[send_el_off[fn+1]];
1101
1102 for (int i = 0; i < num_ldofs; i++)
1103 {
1104 int ldof = (ldofs[i] >= 0 ? ldofs[i] : -1-ldofs[i]);
1105 ldof_marker[ldof] = i;
1106 }
1107
1108 for ( ; j < j_end; j++)
1109 {
1110 int ldof = (send_J[j] >= 0 ? send_J[j] : -1-send_J[j]);
1111 send_J[j] = (send_J[j] >= 0 ? ldof_marker[ldof] : -1-ldof_marker[ldof]);
1112 }
1113 }
1114
1115 MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1116 face_nbr_element_dof.MakeJ();
1117
1118 // send/receive the J arrays of send_nbr_elem_dof/face_nbr_element_dof,
1119 // respectively (they contain the element dofs in enumeration local for
1120 // the face-neighbor pair)
1121 int *recv_J = face_nbr_element_dof.GetJ();
1122 for (int fn = 0; fn < num_face_nbrs; fn++)
1123 {
1124 int nbr_rank = pmesh->GetFaceNbrRank(fn);
1125 int tag = 0;
1126
1127 MPI_Isend(send_J + send_I[send_el_off[fn]],
1128 send_I[send_el_off[fn+1]] - send_I[send_el_off[fn]],
1129 MPI_INT, nbr_rank, tag, MyComm, &send_requests[fn]);
1130
1131 MPI_Irecv(recv_J + recv_I[recv_el_off[fn]],
1132 recv_I[recv_el_off[fn+1]] - recv_I[recv_el_off[fn]],
1133 MPI_INT, nbr_rank, tag, MyComm, &recv_requests[fn]);
1134 }
1135
1136 MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1137
1138 // shift the J array of face_nbr_element_dof
1139 for (int fn = 0, j = 0; fn < num_face_nbrs; fn++)
1140 {
1141 int shift = face_nbr_ldof.GetI()[fn];
1142 int j_end = recv_I[recv_el_off[fn+1]];
1143
1144 for ( ; j < j_end; j++)
1145 {
1146 if (recv_J[j] >= 0)
1147 {
1148 recv_J[j] += shift;
1149 }
1150 else
1151 {
1152 recv_J[j] -= shift;
1153 }
1154 }
1155 }
1156
1157 MPI_Waitall(num_face_nbrs, send_requests, statuses);
1158
1159 // send/receive the J arrays of send_face_nbr_ldof/face_nbr_ldof,
1160 // respectively
1161 for (int fn = 0; fn < num_face_nbrs; fn++)
1162 {
1163 int nbr_rank = pmesh->GetFaceNbrRank(fn);
1164 int tag = 0;
1165
1166 MPI_Isend(send_face_nbr_ldof.GetRow(fn),
1167 send_face_nbr_ldof.RowSize(fn),
1168 MPI_INT, nbr_rank, tag, MyComm, &send_requests[fn]);
1169
1170 MPI_Irecv(face_nbr_ldof.GetRow(fn),
1171 face_nbr_ldof.RowSize(fn),
1172 MPI_INT, nbr_rank, tag, MyComm, &recv_requests[fn]);
1173 }
1174
1175 MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1176 MPI_Waitall(num_face_nbrs, send_requests, statuses);
1177
1178 // send my_dof_offset (i.e. my_ldof_offset) to face neighbors and receive
1179 // their offset in dof_face_nbr_offsets, used to define face_nbr_glob_dof_map
1180 face_nbr_glob_dof_map.SetSize(num_face_nbr_dofs);
1181 Array<HYPRE_BigInt> dof_face_nbr_offsets(num_face_nbrs);
1182 HYPRE_BigInt my_dof_offset = GetMyDofOffset();
1183 for (int fn = 0; fn < num_face_nbrs; fn++)
1184 {
1185 int nbr_rank = pmesh->GetFaceNbrRank(fn);
1186 int tag = 0;
1187
1188 MPI_Isend(&my_dof_offset, 1, HYPRE_MPI_BIG_INT, nbr_rank, tag,
1189 MyComm, &send_requests[fn]);
1190
1191 MPI_Irecv(&dof_face_nbr_offsets[fn], 1, HYPRE_MPI_BIG_INT, nbr_rank, tag,
1192 MyComm, &recv_requests[fn]);
1193 }
1194
1195 MPI_Waitall(num_face_nbrs, recv_requests, statuses);
1196
1197 // set the array face_nbr_glob_dof_map which holds the global ldof indices of
1198 // the face-neighbor dofs
1199 for (int fn = 0, j = 0; fn < num_face_nbrs; fn++)
1200 {
1201 for (int j_end = face_nbr_ldof.GetI()[fn+1]; j < j_end; j++)
1202 {
1203 int ldof = face_nbr_ldof.GetJ()[j];
1204 if (ldof < 0)
1205 {
1206 ldof = -1-ldof;
1207 }
1208
1209 face_nbr_glob_dof_map[j] = dof_face_nbr_offsets[fn] + ldof;
1210 }
1211 }
1212
1213 MPI_Waitall(num_face_nbrs, send_requests, statuses);
1214
1215 delete [] statuses;
1216 delete [] requests;
1217 }
1218
GetFaceNbrElementVDofs(int i,Array<int> & vdofs) const1219 void ParFiniteElementSpace::GetFaceNbrElementVDofs(
1220 int i, Array<int> &vdofs) const
1221 {
1222 face_nbr_element_dof.GetRow(i, vdofs);
1223 }
1224
GetFaceNbrFaceVDofs(int i,Array<int> & vdofs) const1225 void ParFiniteElementSpace::GetFaceNbrFaceVDofs(int i, Array<int> &vdofs) const
1226 {
1227 // Works for NC mesh where 'i' is an index returned by
1228 // ParMesh::GetSharedFace() such that i >= Mesh::GetNumFaces(), i.e. 'i' is
1229 // the index of a ghost face.
1230 MFEM_ASSERT(Nonconforming() && i >= pmesh->GetNumFaces(), "");
1231 int el1, el2, inf1, inf2;
1232 pmesh->GetFaceElements(i, &el1, &el2);
1233 el2 = -1 - el2;
1234 pmesh->GetFaceInfos(i, &inf1, &inf2);
1235 MFEM_ASSERT(0 <= el2 && el2 < face_nbr_element_dof.Size(), "");
1236 const int nd = face_nbr_element_dof.RowSize(el2);
1237 const int *vol_vdofs = face_nbr_element_dof.GetRow(el2);
1238 const Element *face_nbr_el = pmesh->face_nbr_elements[el2];
1239 Geometry::Type geom = face_nbr_el->GetGeometryType();
1240 const int face_dim = Geometry::Dimension[geom]-1;
1241
1242 fec->SubDofOrder(geom, face_dim, inf2, vdofs);
1243 // Convert local dofs to local vdofs.
1244 Ordering::DofsToVDofs<Ordering::byNODES>(nd/vdim, vdim, vdofs);
1245 // Convert local vdofs to global vdofs.
1246 for (int j = 0; j < vdofs.Size(); j++)
1247 {
1248 const int ldof = vdofs[j];
1249 vdofs[j] = (ldof >= 0) ? vol_vdofs[ldof] : -1-vol_vdofs[-1-ldof];
1250 }
1251 }
1252
GetFaceNbrFE(int i) const1253 const FiniteElement *ParFiniteElementSpace::GetFaceNbrFE(int i) const
1254 {
1255 const FiniteElement *FE =
1256 fec->FiniteElementForGeometry(
1257 pmesh->face_nbr_elements[i]->GetGeometryType());
1258
1259 if (NURBSext)
1260 {
1261 mfem_error("ParFiniteElementSpace::GetFaceNbrFE"
1262 " does not support NURBS!");
1263 }
1264 return FE;
1265 }
1266
GetFaceNbrFaceFE(int i) const1267 const FiniteElement *ParFiniteElementSpace::GetFaceNbrFaceFE(int i) const
1268 {
1269 // Works for NC mesh where 'i' is an index returned by
1270 // ParMesh::GetSharedFace() such that i >= Mesh::GetNumFaces(), i.e. 'i' is
1271 // the index of a ghost face.
1272 // Works in tandem with GetFaceNbrFaceVDofs() defined above.
1273
1274 MFEM_ASSERT(Nonconforming() && !NURBSext, "");
1275 Geometry::Type face_geom = pmesh->GetFaceGeometryType(i);
1276 return fec->FiniteElementForGeometry(face_geom);
1277 }
1278
Lose_Dof_TrueDof_Matrix()1279 void ParFiniteElementSpace::Lose_Dof_TrueDof_Matrix()
1280 {
1281 hypre_ParCSRMatrix *csrP = (hypre_ParCSRMatrix*)(*P);
1282 hypre_ParCSRMatrixOwnsRowStarts(csrP) = 1;
1283 hypre_ParCSRMatrixOwnsColStarts(csrP) = 1;
1284 P -> StealData();
1285 dof_offsets.LoseData();
1286 tdof_offsets.LoseData();
1287 }
1288
ConstructTrueDofs()1289 void ParFiniteElementSpace::ConstructTrueDofs()
1290 {
1291 int i, gr, n = GetVSize();
1292 GroupTopology > = pmesh->gtopo;
1293 gcomm = new GroupCommunicator(gt);
1294 Table &group_ldof = gcomm->GroupLDofTable();
1295
1296 GetGroupComm(*gcomm, 1, &ldof_sign);
1297
1298 // Define ldof_group and mark ldof_ltdof with
1299 // -1 for ldof that is ours
1300 // -2 for ldof that is in a group with another master
1301 ldof_group.SetSize(n);
1302 ldof_ltdof.SetSize(n);
1303 ldof_group = 0;
1304 ldof_ltdof = -1;
1305
1306 for (gr = 1; gr < group_ldof.Size(); gr++)
1307 {
1308 const int *ldofs = group_ldof.GetRow(gr);
1309 const int nldofs = group_ldof.RowSize(gr);
1310 for (i = 0; i < nldofs; i++)
1311 {
1312 ldof_group[ldofs[i]] = gr;
1313 }
1314
1315 if (!gt.IAmMaster(gr)) // we are not the master
1316 {
1317 for (i = 0; i < nldofs; i++)
1318 {
1319 ldof_ltdof[ldofs[i]] = -2;
1320 }
1321 }
1322 }
1323
1324 // count ltdof_size
1325 ltdof_size = 0;
1326 for (i = 0; i < n; i++)
1327 {
1328 if (ldof_ltdof[i] == -1)
1329 {
1330 ldof_ltdof[i] = ltdof_size++;
1331 }
1332 }
1333 gcomm->SetLTDofTable(ldof_ltdof);
1334
1335 // have the group masters broadcast their ltdofs to the rest of the group
1336 gcomm->Bcast(ldof_ltdof);
1337 }
1338
ConstructTrueNURBSDofs()1339 void ParFiniteElementSpace::ConstructTrueNURBSDofs()
1340 {
1341 int n = GetVSize();
1342 GroupTopology > = pNURBSext()->gtopo;
1343 gcomm = new GroupCommunicator(gt);
1344
1345 // pNURBSext()->ldof_group is for scalar space!
1346 if (vdim == 1)
1347 {
1348 ldof_group.MakeRef(pNURBSext()->ldof_group);
1349 }
1350 else
1351 {
1352 const int *scalar_ldof_group = pNURBSext()->ldof_group;
1353 ldof_group.SetSize(n);
1354 for (int i = 0; i < n; i++)
1355 {
1356 ldof_group[i] = scalar_ldof_group[VDofToDof(i)];
1357 }
1358 }
1359
1360 gcomm->Create(ldof_group);
1361
1362 // ldof_sign.SetSize(n);
1363 // ldof_sign = 1;
1364 ldof_sign.DeleteAll();
1365
1366 ltdof_size = 0;
1367 ldof_ltdof.SetSize(n);
1368 for (int i = 0; i < n; i++)
1369 {
1370 if (gt.IAmMaster(ldof_group[i]))
1371 {
1372 ldof_ltdof[i] = ltdof_size;
1373 ltdof_size++;
1374 }
1375 else
1376 {
1377 ldof_ltdof[i] = -2;
1378 }
1379 }
1380 gcomm->SetLTDofTable(ldof_ltdof);
1381
1382 // have the group masters broadcast their ltdofs to the rest of the group
1383 gcomm->Bcast(ldof_ltdof);
1384 }
1385
GetGhostVertexDofs(const MeshId & id,Array<int> & dofs) const1386 void ParFiniteElementSpace::GetGhostVertexDofs(const MeshId &id,
1387 Array<int> &dofs) const
1388 {
1389 int nv = fec->DofForGeometry(Geometry::POINT);
1390 dofs.SetSize(nv);
1391 for (int j = 0; j < nv; j++)
1392 {
1393 dofs[j] = ndofs + nv*id.index + j;
1394 }
1395 }
1396
GetGhostEdgeDofs(const MeshId & edge_id,Array<int> & dofs) const1397 void ParFiniteElementSpace::GetGhostEdgeDofs(const MeshId &edge_id,
1398 Array<int> &dofs) const
1399 {
1400 int nv = fec->DofForGeometry(Geometry::POINT);
1401 int ne = fec->DofForGeometry(Geometry::SEGMENT);
1402 dofs.SetSize(2*nv + ne);
1403
1404 int V[2], ghost = pncmesh->GetNVertices();
1405 pmesh->pncmesh->GetEdgeVertices(edge_id, V);
1406
1407 for (int i = 0; i < 2; i++)
1408 {
1409 int k = (V[i] < ghost) ? V[i]*nv : (ndofs + (V[i] - ghost)*nv);
1410 for (int j = 0; j < nv; j++)
1411 {
1412 dofs[i*nv + j] = k++;
1413 }
1414 }
1415
1416 int k = ndofs + ngvdofs + (edge_id.index - pncmesh->GetNEdges())*ne;
1417 for (int j = 0; j < ne; j++)
1418 {
1419 dofs[2*nv + j] = k++;
1420 }
1421 }
1422
GetGhostFaceDofs(const MeshId & face_id,Array<int> & dofs) const1423 void ParFiniteElementSpace::GetGhostFaceDofs(const MeshId &face_id,
1424 Array<int> &dofs) const
1425 {
1426 int nfv, V[4], E[4], Eo[4];
1427 nfv = pmesh->pncmesh->GetFaceVerticesEdges(face_id, V, E, Eo);
1428
1429 int nv = fec->DofForGeometry(Geometry::POINT);
1430 int ne = fec->DofForGeometry(Geometry::SEGMENT);
1431 int nf_tri = fec->DofForGeometry(Geometry::TRIANGLE);
1432 int nf_quad = fec->DofForGeometry(Geometry::SQUARE);
1433 int nf = (nfv == 3) ? nf_tri : nf_quad;
1434
1435 dofs.SetSize(nfv*(nv + ne) + nf);
1436
1437 int offset = 0;
1438 for (int i = 0; i < nfv; i++)
1439 {
1440 int ghost = pncmesh->GetNVertices();
1441 int first = (V[i] < ghost) ? V[i]*nv : (ndofs + (V[i] - ghost)*nv);
1442 for (int j = 0; j < nv; j++)
1443 {
1444 dofs[offset++] = first + j;
1445 }
1446 }
1447
1448 for (int i = 0; i < nfv; i++)
1449 {
1450 int ghost = pncmesh->GetNEdges();
1451 int first = (E[i] < ghost) ? nvdofs + E[i]*ne
1452 /* */ : ndofs + ngvdofs + (E[i] - ghost)*ne;
1453 const int *ind = fec->DofOrderForOrientation(Geometry::SEGMENT, Eo[i]);
1454 for (int j = 0; j < ne; j++)
1455 {
1456 dofs[offset++] = (ind[j] >= 0) ? (first + ind[j])
1457 /* */ : (-1 - (first + (-1 - ind[j])));
1458 }
1459 }
1460
1461 const int ghost_face_index = face_id.index - pncmesh->GetNFaces();
1462 int first = ndofs + ngvdofs + ngedofs + nf_quad*ghost_face_index;
1463
1464 for (int j = 0; j < nf; j++)
1465 {
1466 dofs[offset++] = first + j;
1467 }
1468 }
1469
GetGhostDofs(int entity,const MeshId & id,Array<int> & dofs) const1470 void ParFiniteElementSpace::GetGhostDofs(int entity, const MeshId &id,
1471 Array<int> &dofs) const
1472 {
1473 // helper to get ghost vertex, ghost edge or ghost face DOFs
1474 switch (entity)
1475 {
1476 case 0: GetGhostVertexDofs(id, dofs); break;
1477 case 1: GetGhostEdgeDofs(id, dofs); break;
1478 case 2: GetGhostFaceDofs(id, dofs); break;
1479 }
1480 }
1481
GetBareDofs(int entity,int index,Array<int> & dofs) const1482 void ParFiniteElementSpace::GetBareDofs(int entity, int index,
1483 Array<int> &dofs) const
1484 {
1485 int ned, ghost, first;
1486 switch (entity)
1487 {
1488 case 0:
1489 ned = fec->DofForGeometry(Geometry::POINT);
1490 ghost = pncmesh->GetNVertices();
1491 first = (index < ghost)
1492 ? index*ned // regular vertex
1493 : ndofs + (index - ghost)*ned; // ghost vertex
1494 break;
1495
1496 case 1:
1497 ned = fec->DofForGeometry(Geometry::SEGMENT);
1498 ghost = pncmesh->GetNEdges();
1499 first = (index < ghost)
1500 ? nvdofs + index*ned // regular edge
1501 : ndofs + ngvdofs + (index - ghost)*ned; // ghost edge
1502 break;
1503
1504 default:
1505 Geometry::Type geom = pncmesh->GetFaceGeometry(index);
1506 MFEM_ASSERT(geom == Geometry::SQUARE ||
1507 geom == Geometry::TRIANGLE, "");
1508
1509 ned = fec->DofForGeometry(geom);
1510 ghost = pncmesh->GetNFaces();
1511
1512 if (index < ghost) // regular face
1513 {
1514 first = nvdofs + nedofs + FirstFaceDof(index);
1515 }
1516 else // ghost face
1517 {
1518 index -= ghost;
1519 int stride = fec->DofForGeometry(Geometry::SQUARE);
1520 first = ndofs + ngvdofs + ngedofs + index*stride;
1521 }
1522 break;
1523 }
1524
1525 dofs.SetSize(ned);
1526 for (int i = 0; i < ned; i++)
1527 {
1528 dofs[i] = first + i;
1529 }
1530 }
1531
PackDof(int entity,int index,int edof) const1532 int ParFiniteElementSpace::PackDof(int entity, int index, int edof) const
1533 {
1534 // DOFs are ordered as follows:
1535 // vertices | edges | faces | internal | ghost vert. | g. edges | g. faces
1536
1537 int ghost, ned;
1538 switch (entity)
1539 {
1540 case 0:
1541 ghost = pncmesh->GetNVertices();
1542 ned = fec->DofForGeometry(Geometry::POINT);
1543
1544 return (index < ghost)
1545 ? index*ned + edof // regular vertex
1546 : ndofs + (index - ghost)*ned + edof; // ghost vertex
1547
1548 case 1:
1549 ghost = pncmesh->GetNEdges();
1550 ned = fec->DofForGeometry(Geometry::SEGMENT);
1551
1552 return (index < ghost)
1553 ? nvdofs + index*ned + edof // regular edge
1554 : ndofs + ngvdofs + (index - ghost)*ned + edof; // ghost edge
1555
1556 default:
1557 ghost = pncmesh->GetNFaces();
1558 ned = fec->DofForGeometry(pncmesh->GetFaceGeometry(index));
1559
1560 if (index < ghost) // regular face
1561 {
1562 return nvdofs + nedofs + FirstFaceDof(index) + edof;
1563 }
1564 else // ghost face
1565 {
1566 index -= ghost;
1567 int stride = fec->DofForGeometry(Geometry::SQUARE);
1568 return ndofs + ngvdofs + ngedofs + index*stride + edof;
1569 }
1570 }
1571 }
1572
bisect(const int * array,int size,int value)1573 static int bisect(const int* array, int size, int value)
1574 {
1575 const int* end = array + size;
1576 const int* pos = std::lower_bound(array, end, value);
1577 MFEM_VERIFY(pos != end, "value not found");
1578 return pos - array;
1579 }
1580
1581 /** Dissect a DOF number to obtain the entity type (0=vertex, 1=edge, 2=face),
1582 * entity index and the DOF number within the entity.
1583 */
UnpackDof(int dof,int & entity,int & index,int & edof) const1584 void ParFiniteElementSpace::UnpackDof(int dof,
1585 int &entity, int &index, int &edof) const
1586 {
1587 MFEM_VERIFY(dof >= 0, "");
1588 if (dof < ndofs)
1589 {
1590 if (dof < nvdofs) // regular vertex
1591 {
1592 int nv = fec->DofForGeometry(Geometry::POINT);
1593 entity = 0, index = dof / nv, edof = dof % nv;
1594 return;
1595 }
1596 dof -= nvdofs;
1597 if (dof < nedofs) // regular edge
1598 {
1599 int ne = fec->DofForGeometry(Geometry::SEGMENT);
1600 entity = 1, index = dof / ne, edof = dof % ne;
1601 return;
1602 }
1603 dof -= nedofs;
1604 if (dof < nfdofs) // regular face
1605 {
1606 if (uni_fdof >= 0) // uniform faces
1607 {
1608 int nf = fec->DofForGeometry(pncmesh->GetFaceGeometry(0));
1609 index = dof / nf, edof = dof % nf;
1610 }
1611 else // mixed faces or var-order space
1612 {
1613 const Table &table = var_face_dofs;
1614 MFEM_ASSERT(table.Size(), "");
1615 int jpos = bisect(table.GetJ(), table.Size_of_connections(), dof);
1616 index = bisect(table.GetI(), table.Size(), jpos);
1617 edof = dof - table.GetRow(index)[0];
1618 }
1619 entity = 2;
1620 return;
1621 }
1622 MFEM_ABORT("Cannot unpack internal DOF");
1623 }
1624 else
1625 {
1626 dof -= ndofs;
1627 if (dof < ngvdofs) // ghost vertex
1628 {
1629 int nv = fec->DofForGeometry(Geometry::POINT);
1630 entity = 0, index = pncmesh->GetNVertices() + dof / nv, edof = dof % nv;
1631 return;
1632 }
1633 dof -= ngvdofs;
1634 if (dof < ngedofs) // ghost edge
1635 {
1636 int ne = fec->DofForGeometry(Geometry::SEGMENT);
1637 entity = 1, index = pncmesh->GetNEdges() + dof / ne, edof = dof % ne;
1638 return;
1639 }
1640 dof -= ngedofs;
1641 if (dof < ngfdofs) // ghost face
1642 {
1643 int stride = fec->DofForGeometry(Geometry::SQUARE);
1644 index = pncmesh->GetNFaces() + dof / stride, edof = dof % stride;
1645 entity = 2;
1646 return;
1647 }
1648 MFEM_ABORT("Out of range DOF.");
1649 }
1650 }
1651
1652 /** Represents an element of the P matrix. The column number is global and
1653 * corresponds to vector dimension 0. The other dimension columns are offset
1654 * by 'stride'.
1655 */
1656 struct PMatrixElement
1657 {
1658 HYPRE_BigInt column;
1659 int stride;
1660 double value;
1661
PMatrixElementmfem::PMatrixElement1662 PMatrixElement(HYPRE_BigInt col = 0, int str = 0, double val = 0)
1663 : column(col), stride(str), value(val) {}
1664
operator <mfem::PMatrixElement1665 bool operator<(const PMatrixElement &other) const
1666 { return column < other.column; }
1667
1668 typedef std::vector<PMatrixElement> List;
1669 };
1670
1671 /** Represents one row of the P matrix, for the construction code below.
1672 * The row is complete: diagonal and offdiagonal elements are not distinguished.
1673 */
1674 struct PMatrixRow
1675 {
1676 PMatrixElement::List elems;
1677
1678 /// Add other row, times 'coef'.
AddRowmfem::PMatrixRow1679 void AddRow(const PMatrixRow &other, double coef)
1680 {
1681 elems.reserve(elems.size() + other.elems.size());
1682 for (unsigned i = 0; i < other.elems.size(); i++)
1683 {
1684 const PMatrixElement &oei = other.elems[i];
1685 elems.push_back(
1686 PMatrixElement(oei.column, oei.stride, coef * oei.value));
1687 }
1688 }
1689
1690 /// Remove duplicate columns and sum their values.
Collapsemfem::PMatrixRow1691 void Collapse()
1692 {
1693 if (!elems.size()) { return; }
1694 std::sort(elems.begin(), elems.end());
1695
1696 int j = 0;
1697 for (unsigned i = 1; i < elems.size(); i++)
1698 {
1699 if (elems[j].column == elems[i].column)
1700 {
1701 elems[j].value += elems[i].value;
1702 }
1703 else
1704 {
1705 elems[++j] = elems[i];
1706 }
1707 }
1708 elems.resize(j+1);
1709 }
1710
writemfem::PMatrixRow1711 void write(std::ostream &os, double sign) const
1712 {
1713 bin_io::write<int>(os, elems.size());
1714 for (unsigned i = 0; i < elems.size(); i++)
1715 {
1716 const PMatrixElement &e = elems[i];
1717 bin_io::write<HYPRE_BigInt>(os, e.column);
1718 bin_io::write<int>(os, e.stride);
1719 bin_io::write<double>(os, e.value * sign);
1720 }
1721 }
1722
readmfem::PMatrixRow1723 void read(std::istream &is, double sign)
1724 {
1725 elems.resize(bin_io::read<int>(is));
1726 for (unsigned i = 0; i < elems.size(); i++)
1727 {
1728 PMatrixElement &e = elems[i];
1729 e.column = bin_io::read<HYPRE_BigInt>(is);
1730 e.stride = bin_io::read<int>(is);
1731 e.value = bin_io::read<double>(is) * sign;
1732 }
1733 }
1734 };
1735
1736 /** Represents a message to another processor containing P matrix rows.
1737 * Used by ParFiniteElementSpace::ParallelConformingInterpolation.
1738 */
1739 class NeighborRowMessage : public VarMessage<314>
1740 {
1741 public:
1742 typedef NCMesh::MeshId MeshId;
1743 typedef ParNCMesh::GroupId GroupId;
1744
1745 struct RowInfo
1746 {
1747 int entity, index, edof;
1748 GroupId group;
1749 PMatrixRow row;
1750
RowInfomfem::NeighborRowMessage::RowInfo1751 RowInfo(int ent, int idx, int edof, GroupId grp, const PMatrixRow &row)
1752 : entity(ent), index(idx), edof(edof), group(grp), row(row) {}
1753
RowInfomfem::NeighborRowMessage::RowInfo1754 RowInfo(int ent, int idx, int edof, GroupId grp)
1755 : entity(ent), index(idx), edof(edof), group(grp) {}
1756
1757 typedef std::vector<RowInfo> List;
1758 };
1759
NeighborRowMessage()1760 NeighborRowMessage() : pncmesh(NULL) {}
1761
AddRow(int entity,int index,int edof,GroupId group,const PMatrixRow & row)1762 void AddRow(int entity, int index, int edof, GroupId group,
1763 const PMatrixRow &row)
1764 {
1765 rows.push_back(RowInfo(entity, index, edof, group, row));
1766 }
1767
GetRows() const1768 const RowInfo::List& GetRows() const { return rows; }
1769
SetNCMesh(ParNCMesh * pnc)1770 void SetNCMesh(ParNCMesh* pnc) { pncmesh = pnc; }
SetFEC(const FiniteElementCollection * fec)1771 void SetFEC(const FiniteElementCollection* fec) { this->fec = fec; }
1772
1773 typedef std::map<int, NeighborRowMessage> Map;
1774
1775 protected:
1776 RowInfo::List rows;
1777
1778 ParNCMesh *pncmesh;
1779 const FiniteElementCollection* fec;
1780
1781 virtual void Encode(int rank);
1782 virtual void Decode(int);
1783 };
1784
1785
Encode(int rank)1786 void NeighborRowMessage::Encode(int rank)
1787 {
1788 std::ostringstream stream;
1789
1790 Array<MeshId> ent_ids[3];
1791 Array<GroupId> group_ids[3];
1792 Array<int> row_idx[3];
1793
1794 // encode MeshIds and groups
1795 for (unsigned i = 0; i < rows.size(); i++)
1796 {
1797 const RowInfo &ri = rows[i];
1798 const MeshId &id = pncmesh->GetNCList(ri.entity).LookUp(ri.index);
1799 ent_ids[ri.entity].Append(id);
1800 row_idx[ri.entity].Append(i);
1801 group_ids[ri.entity].Append(ri.group);
1802 }
1803
1804 Array<GroupId> all_group_ids;
1805 all_group_ids.Reserve(rows.size());
1806 for (int i = 0; i < 3; i++)
1807 {
1808 all_group_ids.Append(group_ids[i]);
1809 }
1810
1811 pncmesh->AdjustMeshIds(ent_ids, rank);
1812 pncmesh->EncodeMeshIds(stream, ent_ids);
1813 pncmesh->EncodeGroups(stream, all_group_ids);
1814
1815 // write all rows to the stream
1816 for (int ent = 0; ent < 3; ent++)
1817 {
1818 const Array<MeshId> &ids = ent_ids[ent];
1819 for (int i = 0; i < ids.Size(); i++)
1820 {
1821 const MeshId &id = ids[i];
1822 const RowInfo &ri = rows[row_idx[ent][i]];
1823 MFEM_ASSERT(ent == ri.entity, "");
1824
1825 #ifdef MFEM_DEBUG_PMATRIX
1826 mfem::out << "Rank " << pncmesh->MyRank << " sending to " << rank
1827 << ": ent " << ri.entity << ", index " << ri.index
1828 << ", edof " << ri.edof << " (id " << id.element << "/"
1829 << int(id.local) << ")" << std::endl;
1830 #endif
1831
1832 // handle orientation and sign change
1833 int edof = ri.edof;
1834 double s = 1.0;
1835 if (ent == 1)
1836 {
1837 int eo = pncmesh->GetEdgeNCOrientation(id);
1838 const int* ind = fec->DofOrderForOrientation(Geometry::SEGMENT, eo);
1839 if ((edof = ind[edof]) < 0)
1840 {
1841 edof = -1 - edof;
1842 s = -1;
1843 }
1844 }
1845
1846 bin_io::write<int>(stream, edof);
1847 ri.row.write(stream, s);
1848 }
1849 }
1850
1851 rows.clear();
1852 stream.str().swap(data);
1853 }
1854
Decode(int rank)1855 void NeighborRowMessage::Decode(int rank)
1856 {
1857 std::istringstream stream(data);
1858
1859 Array<MeshId> ent_ids[3];
1860 Array<GroupId> group_ids;
1861
1862 // decode vertex/edge/face IDs and groups
1863 pncmesh->DecodeMeshIds(stream, ent_ids);
1864 pncmesh->DecodeGroups(stream, group_ids);
1865
1866 int nrows = ent_ids[0].Size() + ent_ids[1].Size() + ent_ids[2].Size();
1867 MFEM_ASSERT(nrows == group_ids.Size(), "");
1868
1869 rows.clear();
1870 rows.reserve(nrows);
1871
1872 // read rows
1873 for (int ent = 0, gi = 0; ent < 3; ent++)
1874 {
1875 const Array<MeshId> &ids = ent_ids[ent];
1876 for (int i = 0; i < ids.Size(); i++)
1877 {
1878 const MeshId &id = ids[i];
1879 int edof = bin_io::read<int>(stream);
1880
1881 // handle orientation and sign change
1882 const int *ind = NULL;
1883 if (ent == 1)
1884 {
1885 int eo = pncmesh->GetEdgeNCOrientation(id);
1886 ind = fec->DofOrderForOrientation(Geometry::SEGMENT, eo);
1887 }
1888 else if (ent == 2)
1889 {
1890 Geometry::Type geom = pncmesh->GetFaceGeometry(id.index);
1891 int fo = pncmesh->GetFaceOrientation(id.index);
1892 ind = fec->DofOrderForOrientation(geom, fo);
1893 }
1894
1895 double s = 1.0;
1896 if (ind && (edof = ind[edof]) < 0)
1897 {
1898 edof = -1 - edof;
1899 s = -1.0;
1900 }
1901
1902 rows.push_back(RowInfo(ent, id.index, edof, group_ids[gi++]));
1903 rows.back().row.read(stream, s);
1904
1905 #ifdef MFEM_DEBUG_PMATRIX
1906 mfem::out << "Rank " << pncmesh->MyRank << " receiving from " << rank
1907 << ": ent " << rows.back().entity << ", index "
1908 << rows.back().index << ", edof " << rows.back().edof
1909 << std::endl;
1910 #endif
1911 }
1912 }
1913 }
1914
1915 void
ScheduleSendRow(const PMatrixRow & row,int dof,GroupId group_id,NeighborRowMessage::Map & send_msg) const1916 ParFiniteElementSpace::ScheduleSendRow(const PMatrixRow &row, int dof,
1917 GroupId group_id,
1918 NeighborRowMessage::Map &send_msg) const
1919 {
1920 int ent, idx, edof;
1921 UnpackDof(dof, ent, idx, edof);
1922
1923 const ParNCMesh::CommGroup &group = pncmesh->GetGroup(group_id);
1924 for (unsigned i = 0; i < group.size(); i++)
1925 {
1926 int rank = group[i];
1927 if (rank != MyRank)
1928 {
1929 NeighborRowMessage &msg = send_msg[rank];
1930 msg.AddRow(ent, idx, edof, group_id, row);
1931 msg.SetNCMesh(pncmesh);
1932 msg.SetFEC(fec);
1933 #ifdef MFEM_PMATRIX_STATS
1934 n_rows_sent++;
1935 #endif
1936 }
1937 }
1938 }
1939
ForwardRow(const PMatrixRow & row,int dof,GroupId group_sent_id,GroupId group_id,NeighborRowMessage::Map & send_msg) const1940 void ParFiniteElementSpace::ForwardRow(const PMatrixRow &row, int dof,
1941 GroupId group_sent_id, GroupId group_id,
1942 NeighborRowMessage::Map &send_msg) const
1943 {
1944 int ent, idx, edof;
1945 UnpackDof(dof, ent, idx, edof);
1946
1947 const ParNCMesh::CommGroup &group = pncmesh->GetGroup(group_id);
1948 for (unsigned i = 0; i < group.size(); i++)
1949 {
1950 int rank = group[i];
1951 if (rank != MyRank && !pncmesh->GroupContains(group_sent_id, rank))
1952 {
1953 NeighborRowMessage &msg = send_msg[rank];
1954 GroupId invalid = -1; // to prevent forwarding again
1955 msg.AddRow(ent, idx, edof, invalid, row);
1956 msg.SetNCMesh(pncmesh);
1957 msg.SetFEC(fec);
1958 #ifdef MFEM_PMATRIX_STATS
1959 n_rows_fwd++;
1960 #endif
1961 #ifdef MFEM_DEBUG_PMATRIX
1962 mfem::out << "Rank " << pncmesh->GetMyRank() << " forwarding to "
1963 << rank << ": ent " << ent << ", index" << idx
1964 << ", edof " << edof << std::endl;
1965 #endif
1966 }
1967 }
1968 }
1969
1970 #ifdef MFEM_DEBUG_PMATRIX
1971 void ParFiniteElementSpace
DebugDumpDOFs(std::ostream & os,const SparseMatrix & deps,const Array<GroupId> & dof_group,const Array<GroupId> & dof_owner,const Array<bool> & finalized) const1972 ::DebugDumpDOFs(std::ostream &os,
1973 const SparseMatrix &deps,
1974 const Array<GroupId> &dof_group,
1975 const Array<GroupId> &dof_owner,
1976 const Array<bool> &finalized) const
1977 {
1978 for (int i = 0; i < dof_group.Size(); i++)
1979 {
1980 os << i << ": ";
1981 if (i < (nvdofs + nedofs + nfdofs) || i >= ndofs)
1982 {
1983 int ent, idx, edof;
1984 UnpackDof(i, ent, idx, edof);
1985
1986 os << edof << " @ ";
1987 if (i > ndofs) { os << "ghost "; }
1988 switch (ent)
1989 {
1990 case 0: os << "vertex "; break;
1991 case 1: os << "edge "; break;
1992 default: os << "face "; break;
1993 }
1994 os << idx << "; ";
1995
1996 if (i < deps.Height() && deps.RowSize(i))
1997 {
1998 os << "depends on ";
1999 for (int j = 0; j < deps.RowSize(i); j++)
2000 {
2001 os << deps.GetRowColumns(i)[j] << " ("
2002 << deps.GetRowEntries(i)[j] << ")";
2003 if (j < deps.RowSize(i)-1) { os << ", "; }
2004 }
2005 os << "; ";
2006 }
2007 else
2008 {
2009 os << "no deps; ";
2010 }
2011
2012 os << "group " << dof_group[i] << " (";
2013 const ParNCMesh::CommGroup &g = pncmesh->GetGroup(dof_group[i]);
2014 for (unsigned j = 0; j < g.size(); j++)
2015 {
2016 if (j) { os << ", "; }
2017 os << g[j];
2018 }
2019
2020 os << "), owner " << dof_owner[i] << " (rank "
2021 << pncmesh->GetGroup(dof_owner[i])[0] << "); "
2022 << (finalized[i] ? "finalized" : "NOT finalized");
2023 }
2024 else
2025 {
2026 os << "internal";
2027 }
2028 os << "\n";
2029 }
2030 }
2031 #endif
2032
2033 int ParFiniteElementSpace
BuildParallelConformingInterpolation(HypreParMatrix ** P,SparseMatrix ** R,Array<HYPRE_BigInt> & dof_offs,Array<HYPRE_BigInt> & tdof_offs,Array<int> * dof_tdof,bool partial) const2034 ::BuildParallelConformingInterpolation(HypreParMatrix **P, SparseMatrix **R,
2035 Array<HYPRE_BigInt> &dof_offs,
2036 Array<HYPRE_BigInt> &tdof_offs,
2037 Array<int> *dof_tdof,
2038 bool partial) const
2039 {
2040 bool dg = (nvdofs == 0 && nedofs == 0 && nfdofs == 0);
2041
2042 #ifdef MFEM_PMATRIX_STATS
2043 n_msgs_sent = n_msgs_recv = 0;
2044 n_rows_sent = n_rows_recv = n_rows_fwd = 0;
2045 #endif
2046
2047 // *** STEP 1: build master-slave dependency lists ***
2048
2049 int total_dofs = ndofs + ngdofs;
2050 SparseMatrix deps(ndofs, total_dofs);
2051
2052 if (!dg && !partial)
2053 {
2054 Array<int> master_dofs, slave_dofs;
2055
2056 // loop through *all* master edges/faces, constrain their slaves
2057 for (int entity = 0; entity <= 2; entity++)
2058 {
2059 const NCMesh::NCList &list = pncmesh->GetNCList(entity);
2060 if (!list.masters.Size()) { continue; }
2061
2062 IsoparametricTransformation T;
2063 DenseMatrix I;
2064
2065 // process masters that we own or that affect our edges/faces
2066 for (int mi = 0; mi < list.masters.Size(); mi++)
2067 {
2068 const NCMesh::Master &mf = list.masters[mi];
2069
2070 // get master DOFs
2071 if (pncmesh->IsGhost(entity, mf.index))
2072 {
2073 GetGhostDofs(entity, mf, master_dofs);
2074 }
2075 else
2076 {
2077 GetEntityDofs(entity, mf.index, master_dofs, mf.Geom());
2078 }
2079
2080 if (!master_dofs.Size()) { continue; }
2081
2082 const FiniteElement* fe = fec->FiniteElementForGeometry(mf.Geom());
2083 if (!fe) { continue; }
2084
2085 switch (mf.Geom())
2086 {
2087 case Geometry::SQUARE: T.SetFE(&QuadrilateralFE); break;
2088 case Geometry::TRIANGLE: T.SetFE(&TriangleFE); break;
2089 case Geometry::SEGMENT: T.SetFE(&SegmentFE); break;
2090 default: MFEM_ABORT("unsupported geometry");
2091 }
2092
2093 // constrain slaves that exist in our mesh
2094 for (int si = mf.slaves_begin; si < mf.slaves_end; si++)
2095 {
2096 const NCMesh::Slave &sf = list.slaves[si];
2097 if (pncmesh->IsGhost(entity, sf.index)) { continue; }
2098
2099 const int variant = 0; // TODO parallel var-order
2100 GetEntityDofs(entity, sf.index, slave_dofs, mf.Geom(), variant);
2101 if (!slave_dofs.Size()) { continue; }
2102
2103 list.OrientedPointMatrix(sf, T.GetPointMat());
2104 fe->GetLocalInterpolation(T, I);
2105
2106 // make each slave DOF dependent on all master DOFs
2107 AddDependencies(deps, master_dofs, slave_dofs, I);
2108 }
2109 }
2110 }
2111
2112 deps.Finalize();
2113 }
2114
2115 // *** STEP 2: initialize group and owner ID for each DOF ***
2116
2117 Array<GroupId> dof_group(total_dofs);
2118 Array<GroupId> dof_owner(total_dofs);
2119 dof_group = 0;
2120 dof_owner = 0;
2121
2122 if (!dg)
2123 {
2124 Array<int> dofs;
2125
2126 // initialize dof_group[], dof_owner[]
2127 for (int entity = 0; entity <= 2; entity++)
2128 {
2129 const NCMesh::NCList &list = pncmesh->GetNCList(entity);
2130
2131 int lsize[3] =
2132 { list.conforming.Size(), list.masters.Size(), list.slaves.Size() };
2133
2134 for (int l = 0; l < 3; l++)
2135 {
2136 for (int i = 0; i < lsize[l]; i++)
2137 {
2138 const MeshId &id =
2139 (l == 0) ? list.conforming[i] :
2140 (l == 1) ? (const MeshId&) list.masters[i]
2141 /* */ : (const MeshId&) list.slaves[i];
2142
2143 if (id.index < 0) { continue; }
2144
2145 GroupId owner = pncmesh->GetEntityOwnerId(entity, id.index);
2146 GroupId group = pncmesh->GetEntityGroupId(entity, id.index);
2147
2148 GetBareDofs(entity, id.index, dofs);
2149
2150 for (int j = 0; j < dofs.Size(); j++)
2151 {
2152 int dof = dofs[j];
2153 dof_owner[dof] = owner;
2154 dof_group[dof] = group;
2155 }
2156 }
2157 }
2158 }
2159 }
2160
2161 // *** STEP 3: count true DOFs and calculate P row/column partitions ***
2162
2163 Array<bool> finalized(total_dofs);
2164 finalized = false;
2165
2166 // DOFs that stayed independent and are ours are true DOFs
2167 int num_true_dofs = 0;
2168 for (int i = 0; i < ndofs; i++)
2169 {
2170 if (dof_owner[i] == 0 && deps.RowSize(i) == 0)
2171 {
2172 num_true_dofs++;
2173 finalized[i] = true;
2174 }
2175 }
2176
2177 // calculate global offsets
2178 HYPRE_BigInt loc_sizes[2] = { ndofs*vdim, num_true_dofs*vdim };
2179 Array<HYPRE_BigInt>* offsets[2] = { &dof_offs, &tdof_offs };
2180 pmesh->GenerateOffsets(2, loc_sizes, offsets); // calls MPI_Scan, MPI_Bcast
2181
2182 HYPRE_BigInt my_tdof_offset =
2183 tdof_offs[HYPRE_AssumedPartitionCheck() ? 0 : MyRank];
2184
2185 if (R)
2186 {
2187 // initialize the restriction matrix (also parallel but block-diagonal)
2188 *R = new SparseMatrix(num_true_dofs*vdim, ndofs*vdim);
2189 }
2190 if (dof_tdof)
2191 {
2192 dof_tdof->SetSize(ndofs*vdim);
2193 *dof_tdof = -1;
2194 }
2195
2196 std::vector<PMatrixRow> pmatrix(total_dofs);
2197
2198 bool bynodes = (ordering == Ordering::byNODES);
2199 int vdim_factor = bynodes ? 1 : vdim;
2200 int dof_stride = bynodes ? ndofs : 1;
2201 int tdof_stride = bynodes ? num_true_dofs : 1;
2202
2203 // big container for all messages we send (the list is for iterations)
2204 std::list<NeighborRowMessage::Map> send_msg;
2205 send_msg.push_back(NeighborRowMessage::Map());
2206
2207 // put identity in P and R for true DOFs, set ldof_ltdof
2208 for (int dof = 0, tdof = 0; dof < ndofs; dof++)
2209 {
2210 if (finalized[dof])
2211 {
2212 pmatrix[dof].elems.push_back(
2213 PMatrixElement(my_tdof_offset + vdim_factor*tdof, tdof_stride, 1.));
2214
2215 // prepare messages to neighbors with identity rows
2216 if (dof_group[dof] != 0)
2217 {
2218 ScheduleSendRow(pmatrix[dof], dof, dof_group[dof], send_msg.back());
2219 }
2220
2221 for (int vd = 0; vd < vdim; vd++)
2222 {
2223 int vdof = dof*vdim_factor + vd*dof_stride;
2224 int vtdof = tdof*vdim_factor + vd*tdof_stride;
2225
2226 if (R) { (*R)->Add(vtdof, vdof, 1.0); }
2227 if (dof_tdof) { (*dof_tdof)[vdof] = vtdof; }
2228 }
2229 tdof++;
2230 }
2231 }
2232
2233 // send identity rows
2234 NeighborRowMessage::IsendAll(send_msg.back(), MyComm);
2235 #ifdef MFEM_PMATRIX_STATS
2236 n_msgs_sent += send_msg.back().size();
2237 #endif
2238
2239 if (R) { (*R)->Finalize(); }
2240
2241 // *** STEP 4: main loop ***
2242
2243 // a single instance (recv_msg) is reused for all incoming messages
2244 NeighborRowMessage recv_msg;
2245 recv_msg.SetNCMesh(pncmesh);
2246 recv_msg.SetFEC(fec);
2247
2248 int num_finalized = num_true_dofs;
2249 PMatrixRow buffer;
2250 buffer.elems.reserve(1024);
2251
2252 while (num_finalized < ndofs)
2253 {
2254 // prepare a new round of send buffers
2255 if (send_msg.back().size())
2256 {
2257 send_msg.push_back(NeighborRowMessage::Map());
2258 }
2259
2260 // check for incoming messages, receive PMatrixRows
2261 int rank, size;
2262 while (NeighborRowMessage::IProbe(rank, size, MyComm))
2263 {
2264 recv_msg.Recv(rank, size, MyComm);
2265 #ifdef MFEM_PMATRIX_STATS
2266 n_msgs_recv++;
2267 n_rows_recv += recv_msg.GetRows().size();
2268 #endif
2269
2270 const NeighborRowMessage::RowInfo::List &rows = recv_msg.GetRows();
2271 for (unsigned i = 0; i < rows.size(); i++)
2272 {
2273 const NeighborRowMessage::RowInfo &ri = rows[i];
2274 int dof = PackDof(ri.entity, ri.index, ri.edof);
2275 pmatrix[dof] = ri.row;
2276
2277 if (dof < ndofs && !finalized[dof]) { num_finalized++; }
2278 finalized[dof] = true;
2279
2280 if (ri.group >= 0 && dof_group[dof] != ri.group)
2281 {
2282 // the sender didn't see the complete group, forward the message
2283 ForwardRow(ri.row, dof, ri.group, dof_group[dof], send_msg.back());
2284 }
2285 }
2286 }
2287
2288 // finalize all rows that can currently be finalized
2289 bool done = false;
2290 while (!done)
2291 {
2292 done = true;
2293 for (int dof = 0; dof < ndofs; dof++)
2294 {
2295 if (finalized[dof]) { continue; }
2296
2297 bool owned = (dof_owner[dof] == 0);
2298 bool shared = (dof_group[dof] != 0);
2299
2300 if (owned && DofFinalizable(dof, finalized, deps))
2301 {
2302 const int* dep_col = deps.GetRowColumns(dof);
2303 const double* dep_coef = deps.GetRowEntries(dof);
2304 int num_dep = deps.RowSize(dof);
2305
2306 // form linear combination of rows
2307 buffer.elems.clear();
2308 for (int j = 0; j < num_dep; j++)
2309 {
2310 buffer.AddRow(pmatrix[dep_col[j]], dep_coef[j]);
2311 }
2312 buffer.Collapse();
2313 pmatrix[dof] = buffer;
2314
2315 finalized[dof] = true;
2316 num_finalized++;
2317 done = false;
2318
2319 // send row to neighbors who need it
2320 if (shared)
2321 {
2322 ScheduleSendRow(pmatrix[dof], dof, dof_group[dof],
2323 send_msg.back());
2324 }
2325 }
2326 }
2327 }
2328
2329 #ifdef MFEM_DEBUG_PMATRIX
2330 /*static int dump = 0;
2331 if (dump < 10)
2332 {
2333 char fname[100];
2334 sprintf(fname, "dofs%02d.txt", MyRank);
2335 std::ofstream f(fname);
2336 DebugDumpDOFs(f, deps, dof_group, dof_owner, finalized);
2337 dump++;
2338 }*/
2339 #endif
2340
2341 // send current batch of messages
2342 NeighborRowMessage::IsendAll(send_msg.back(), MyComm);
2343 #ifdef MFEM_PMATRIX_STATS
2344 n_msgs_sent += send_msg.back().size();
2345 #endif
2346 }
2347
2348 if (P)
2349 {
2350 *P = MakeVDimHypreMatrix(pmatrix, ndofs, num_true_dofs,
2351 dof_offs, tdof_offs);
2352 }
2353
2354 // clean up possible remaining messages in the queue to avoid receiving
2355 // them erroneously in the next run
2356 int rank, size;
2357 while (NeighborRowMessage::IProbe(rank, size, MyComm))
2358 {
2359 recv_msg.RecvDrop(rank, size, MyComm);
2360 }
2361
2362 // make sure we can discard all send buffers
2363 for (std::list<NeighborRowMessage::Map>::iterator
2364 it = send_msg.begin(); it != send_msg.end(); ++it)
2365 {
2366 NeighborRowMessage::WaitAllSent(*it);
2367 }
2368
2369 #ifdef MFEM_PMATRIX_STATS
2370 int n_rounds = send_msg.size();
2371 int glob_rounds, glob_msgs_sent, glob_msgs_recv;
2372 int glob_rows_sent, glob_rows_recv, glob_rows_fwd;
2373
2374 MPI_Reduce(&n_rounds, &glob_rounds, 1, MPI_INT, MPI_SUM, 0, MyComm);
2375 MPI_Reduce(&n_msgs_sent, &glob_msgs_sent, 1, MPI_INT, MPI_SUM, 0, MyComm);
2376 MPI_Reduce(&n_msgs_recv, &glob_msgs_recv, 1, MPI_INT, MPI_SUM, 0, MyComm);
2377 MPI_Reduce(&n_rows_sent, &glob_rows_sent, 1, MPI_INT, MPI_SUM, 0, MyComm);
2378 MPI_Reduce(&n_rows_recv, &glob_rows_recv, 1, MPI_INT, MPI_SUM, 0, MyComm);
2379 MPI_Reduce(&n_rows_fwd, &glob_rows_fwd, 1, MPI_INT, MPI_SUM, 0, MyComm);
2380
2381 if (MyRank == 0)
2382 {
2383 mfem::out << "P matrix stats (avg per rank): "
2384 << double(glob_rounds)/NRanks << " rounds, "
2385 << double(glob_msgs_sent)/NRanks << " msgs sent, "
2386 << double(glob_msgs_recv)/NRanks << " msgs recv, "
2387 << double(glob_rows_sent)/NRanks << " rows sent, "
2388 << double(glob_rows_recv)/NRanks << " rows recv, "
2389 << double(glob_rows_fwd)/NRanks << " rows forwarded."
2390 << std::endl;
2391 }
2392 #endif
2393
2394 return num_true_dofs*vdim;
2395 }
2396
2397
2398 HypreParMatrix* ParFiniteElementSpace
MakeVDimHypreMatrix(const std::vector<PMatrixRow> & rows,int local_rows,int local_cols,Array<HYPRE_BigInt> & row_starts,Array<HYPRE_BigInt> & col_starts) const2399 ::MakeVDimHypreMatrix(const std::vector<PMatrixRow> &rows,
2400 int local_rows, int local_cols,
2401 Array<HYPRE_BigInt> &row_starts,
2402 Array<HYPRE_BigInt> &col_starts) const
2403 {
2404 bool assumed = HYPRE_AssumedPartitionCheck();
2405 bool bynodes = (ordering == Ordering::byNODES);
2406
2407 HYPRE_BigInt first_col = col_starts[assumed ? 0 : MyRank];
2408 HYPRE_BigInt next_col = col_starts[assumed ? 1 : MyRank+1];
2409
2410 // count nonzeros in diagonal/offdiagonal parts
2411 HYPRE_Int nnz_diag = 0, nnz_offd = 0;
2412 std::map<HYPRE_BigInt, int> col_map;
2413 for (int i = 0; i < local_rows; i++)
2414 {
2415 for (unsigned j = 0; j < rows[i].elems.size(); j++)
2416 {
2417 const PMatrixElement &elem = rows[i].elems[j];
2418 HYPRE_BigInt col = elem.column;
2419 if (col >= first_col && col < next_col)
2420 {
2421 nnz_diag += vdim;
2422 }
2423 else
2424 {
2425 nnz_offd += vdim;
2426 for (int vd = 0; vd < vdim; vd++)
2427 {
2428 col_map[col] = -1;
2429 col += elem.stride;
2430 }
2431 }
2432 }
2433 }
2434
2435 // create offd column mapping
2436 HYPRE_BigInt *cmap = Memory<HYPRE_BigInt>(col_map.size());
2437 int offd_col = 0;
2438 for (auto it = col_map.begin(); it != col_map.end(); ++it)
2439 {
2440 cmap[offd_col] = it->first;
2441 it->second = offd_col++;
2442 }
2443
2444 HYPRE_Int *I_diag = Memory<HYPRE_Int>(vdim*local_rows + 1);
2445 HYPRE_Int *I_offd = Memory<HYPRE_Int>(vdim*local_rows + 1);
2446
2447 HYPRE_Int *J_diag = Memory<HYPRE_Int>(nnz_diag);
2448 HYPRE_Int *J_offd = Memory<HYPRE_Int>(nnz_offd);
2449
2450 double *A_diag = Memory<double>(nnz_diag);
2451 double *A_offd = Memory<double>(nnz_offd);
2452
2453 int vdim1 = bynodes ? vdim : 1;
2454 int vdim2 = bynodes ? 1 : vdim;
2455 int vdim_offset = bynodes ? local_cols : 1;
2456
2457 // copy the diag/offd elements
2458 nnz_diag = nnz_offd = 0;
2459 int vrow = 0;
2460 for (int vd1 = 0; vd1 < vdim1; vd1++)
2461 {
2462 for (int i = 0; i < local_rows; i++)
2463 {
2464 for (int vd2 = 0; vd2 < vdim2; vd2++)
2465 {
2466 I_diag[vrow] = nnz_diag;
2467 I_offd[vrow++] = nnz_offd;
2468
2469 int vd = bynodes ? vd1 : vd2;
2470 for (unsigned j = 0; j < rows[i].elems.size(); j++)
2471 {
2472 const PMatrixElement &elem = rows[i].elems[j];
2473 if (elem.column >= first_col && elem.column < next_col)
2474 {
2475 J_diag[nnz_diag] = elem.column + vd*vdim_offset - first_col;
2476 A_diag[nnz_diag++] = elem.value;
2477 }
2478 else
2479 {
2480 J_offd[nnz_offd] = col_map[elem.column + vd*elem.stride];
2481 A_offd[nnz_offd++] = elem.value;
2482 }
2483 }
2484 }
2485 }
2486 }
2487 MFEM_ASSERT(vrow == vdim*local_rows, "");
2488 I_diag[vrow] = nnz_diag;
2489 I_offd[vrow] = nnz_offd;
2490
2491 return new HypreParMatrix(MyComm,
2492 row_starts.Last(), col_starts.Last(),
2493 row_starts.GetData(), col_starts.GetData(),
2494 I_diag, J_diag, A_diag,
2495 I_offd, J_offd, A_offd,
2496 col_map.size(), cmap);
2497 }
2498
2499 template <typename int_type>
make_i_array(int nrows)2500 static int_type* make_i_array(int nrows)
2501 {
2502 int_type *I = Memory<int_type>(nrows+1);
2503 for (int i = 0; i <= nrows; i++) { I[i] = -1; }
2504 return I;
2505 }
2506
2507 template <typename int_type>
make_j_array(int_type * I,int nrows)2508 static int_type* make_j_array(int_type* I, int nrows)
2509 {
2510 int nnz = 0;
2511 for (int i = 0; i < nrows; i++)
2512 {
2513 if (I[i] >= 0) { nnz++; }
2514 }
2515 int_type *J = Memory<int_type>(nnz);
2516
2517 I[nrows] = -1;
2518 for (int i = 0, k = 0; i <= nrows; i++)
2519 {
2520 int_type col = I[i];
2521 I[i] = k;
2522 if (col >= 0) { J[k++] = col; }
2523 }
2524 return J;
2525 }
2526
2527 HypreParMatrix*
RebalanceMatrix(int old_ndofs,const Table * old_elem_dof)2528 ParFiniteElementSpace::RebalanceMatrix(int old_ndofs,
2529 const Table* old_elem_dof)
2530 {
2531 MFEM_VERIFY(Nonconforming(), "Only supported for nonconforming meshes.");
2532 MFEM_VERIFY(old_dof_offsets.Size(), "ParFiniteElementSpace::Update needs to "
2533 "be called before ParFiniteElementSpace::RebalanceMatrix");
2534
2535 HYPRE_BigInt old_offset = HYPRE_AssumedPartitionCheck()
2536 ? old_dof_offsets[0] : old_dof_offsets[MyRank];
2537
2538 // send old DOFs of elements we used to own
2539 ParNCMesh* pncmesh = pmesh->pncmesh;
2540 pncmesh->SendRebalanceDofs(old_ndofs, *old_elem_dof, old_offset, this);
2541
2542 Array<int> dofs;
2543 int vsize = GetVSize();
2544
2545 const Array<int> &old_index = pncmesh->GetRebalanceOldIndex();
2546 MFEM_VERIFY(old_index.Size() == pmesh->GetNE(),
2547 "Mesh::Rebalance was not called before "
2548 "ParFiniteElementSpace::RebalanceMatrix");
2549
2550 // prepare the local (diagonal) part of the matrix
2551 HYPRE_Int* i_diag = make_i_array<HYPRE_Int>(vsize);
2552 for (int i = 0; i < pmesh->GetNE(); i++)
2553 {
2554 if (old_index[i] >= 0) // we had this element before
2555 {
2556 const int* old_dofs = old_elem_dof->GetRow(old_index[i]);
2557 GetElementDofs(i, dofs);
2558
2559 for (int vd = 0; vd < vdim; vd++)
2560 {
2561 for (int j = 0; j < dofs.Size(); j++)
2562 {
2563 int row = DofToVDof(dofs[j], vd);
2564 if (row < 0) { row = -1 - row; }
2565
2566 int col = DofToVDof(old_dofs[j], vd, old_ndofs);
2567 if (col < 0) { col = -1 - col; }
2568
2569 i_diag[row] = col;
2570 }
2571 }
2572 }
2573 }
2574 HYPRE_Int* j_diag = make_j_array(i_diag, vsize);
2575
2576 // receive old DOFs for elements we obtained from others in Rebalance
2577 Array<int> new_elements;
2578 Array<long> old_remote_dofs;
2579 pncmesh->RecvRebalanceDofs(new_elements, old_remote_dofs);
2580
2581 // create the offdiagonal part of the matrix
2582 HYPRE_BigInt* i_offd = make_i_array<HYPRE_BigInt>(vsize);
2583 for (int i = 0, pos = 0; i < new_elements.Size(); i++)
2584 {
2585 GetElementDofs(new_elements[i], dofs);
2586 const long* old_dofs = &old_remote_dofs[pos];
2587 pos += dofs.Size() * vdim;
2588
2589 for (int vd = 0; vd < vdim; vd++)
2590 {
2591 for (int j = 0; j < dofs.Size(); j++)
2592 {
2593 int row = DofToVDof(dofs[j], vd);
2594 if (row < 0) { row = -1 - row; }
2595
2596 if (i_diag[row] == i_diag[row+1]) // diag row empty?
2597 {
2598 i_offd[row] = old_dofs[j + vd * dofs.Size()];
2599 }
2600 }
2601 }
2602 }
2603 HYPRE_BigInt* j_offd = make_j_array(i_offd, vsize);
2604
2605 #ifndef HYPRE_MIXEDINT
2606 HYPRE_Int *i_offd_hi = i_offd;
2607 #else
2608 // Copy of i_offd array as array of HYPRE_Int
2609 HYPRE_Int *i_offd_hi = Memory<HYPRE_Int>(vsize + 1);
2610 std::copy(i_offd, i_offd + vsize + 1, i_offd_hi);
2611 Memory<HYPRE_BigInt>(i_offd, vsize + 1, true).Delete();
2612 #endif
2613
2614 // create the offd column map
2615 int offd_cols = i_offd_hi[vsize];
2616 Array<Pair<HYPRE_BigInt, int> > cmap_offd(offd_cols);
2617 for (int i = 0; i < offd_cols; i++)
2618 {
2619 cmap_offd[i].one = j_offd[i];
2620 cmap_offd[i].two = i;
2621 }
2622
2623 #ifndef HYPRE_MIXEDINT
2624 HYPRE_Int *j_offd_hi = j_offd;
2625 #else
2626 HYPRE_Int *j_offd_hi = Memory<HYPRE_Int>(offd_cols);
2627 Memory<HYPRE_BigInt>(j_offd, offd_cols, true).Delete();
2628 #endif
2629
2630 SortPairs<HYPRE_BigInt, int>(cmap_offd, offd_cols);
2631
2632 HYPRE_BigInt* cmap = Memory<HYPRE_BigInt>(offd_cols);
2633 for (int i = 0; i < offd_cols; i++)
2634 {
2635 cmap[i] = cmap_offd[i].one;
2636 j_offd_hi[cmap_offd[i].two] = i;
2637 }
2638
2639 HypreParMatrix *M;
2640 M = new HypreParMatrix(MyComm, MyRank, NRanks, dof_offsets, old_dof_offsets,
2641 i_diag, j_diag, i_offd_hi, j_offd_hi, cmap, offd_cols);
2642 return M;
2643 }
2644
2645
2646 struct DerefDofMessage
2647 {
2648 std::vector<HYPRE_BigInt> dofs;
2649 MPI_Request request;
2650 };
2651
2652 HypreParMatrix*
ParallelDerefinementMatrix(int old_ndofs,const Table * old_elem_dof)2653 ParFiniteElementSpace::ParallelDerefinementMatrix(int old_ndofs,
2654 const Table* old_elem_dof)
2655 {
2656 int nrk = HYPRE_AssumedPartitionCheck() ? 2 : NRanks;
2657
2658 MFEM_VERIFY(Nonconforming(), "Not implemented for conforming meshes.");
2659 MFEM_VERIFY(old_dof_offsets[nrk], "Missing previous (finer) space.");
2660
2661 #if 0 // check no longer seems to work with NC tet refinement
2662 MFEM_VERIFY(dof_offsets[nrk] <= old_dof_offsets[nrk],
2663 "Previous space is not finer.");
2664 #endif
2665
2666 // Note to the reader: please make sure you first read
2667 // FiniteElementSpace::RefinementMatrix, then
2668 // FiniteElementSpace::DerefinementMatrix, and only then this function.
2669 // You have been warned! :-)
2670
2671 Mesh::GeometryList elem_geoms(*mesh);
2672
2673 Array<int> dofs, old_dofs, old_vdofs;
2674 Vector row;
2675
2676 ParNCMesh* pncmesh = pmesh->pncmesh;
2677
2678 int ldof[Geometry::NumGeom];
2679 for (int i = 0; i < Geometry::NumGeom; i++)
2680 {
2681 ldof[i] = 0;
2682 }
2683 for (int i = 0; i < elem_geoms.Size(); i++)
2684 {
2685 Geometry::Type geom = elem_geoms[i];
2686 ldof[geom] = fec->FiniteElementForGeometry(geom)->GetDof();
2687 }
2688
2689 const CoarseFineTransformations &dtrans = pncmesh->GetDerefinementTransforms();
2690 const Array<int> &old_ranks = pncmesh->GetDerefineOldRanks();
2691
2692 std::map<int, DerefDofMessage> messages;
2693
2694 HYPRE_BigInt old_offset = HYPRE_AssumedPartitionCheck()
2695 ? old_dof_offsets[0] : old_dof_offsets[MyRank];
2696
2697 // communicate DOFs for derefinements that straddle processor boundaries,
2698 // note that this is infrequent due to the way elements are ordered
2699 for (int k = 0; k < dtrans.embeddings.Size(); k++)
2700 {
2701 const Embedding &emb = dtrans.embeddings[k];
2702
2703 int fine_rank = old_ranks[k];
2704 int coarse_rank = (emb.parent < 0) ? (-1 - emb.parent)
2705 : pncmesh->ElementRank(emb.parent);
2706
2707 if (coarse_rank != MyRank && fine_rank == MyRank)
2708 {
2709 old_elem_dof->GetRow(k, dofs);
2710 DofsToVDofs(dofs, old_ndofs);
2711
2712 DerefDofMessage &msg = messages[k];
2713 msg.dofs.resize(dofs.Size());
2714 for (int i = 0; i < dofs.Size(); i++)
2715 {
2716 msg.dofs[i] = old_offset + dofs[i];
2717 }
2718
2719 MPI_Isend(&msg.dofs[0], msg.dofs.size(), HYPRE_MPI_BIG_INT,
2720 coarse_rank, 291, MyComm, &msg.request);
2721 }
2722 else if (coarse_rank == MyRank && fine_rank != MyRank)
2723 {
2724 MFEM_ASSERT(emb.parent >= 0, "");
2725 Geometry::Type geom = mesh->GetElementBaseGeometry(emb.parent);
2726
2727 DerefDofMessage &msg = messages[k];
2728 msg.dofs.resize(ldof[geom]*vdim);
2729
2730 MPI_Irecv(&msg.dofs[0], ldof[geom]*vdim, HYPRE_MPI_BIG_INT,
2731 fine_rank, 291, MyComm, &msg.request);
2732 }
2733 // TODO: coalesce Isends/Irecvs to the same rank. Typically, on uniform
2734 // derefinement, there should be just one send to MyRank-1 and one recv
2735 // from MyRank+1
2736 }
2737
2738 DenseTensor localR[Geometry::NumGeom];
2739 for (int i = 0; i < elem_geoms.Size(); i++)
2740 {
2741 GetLocalDerefinementMatrices(elem_geoms[i], localR[elem_geoms[i]]);
2742 }
2743
2744 // create the diagonal part of the derefinement matrix
2745 SparseMatrix *diag = new SparseMatrix(ndofs*vdim, old_ndofs*vdim);
2746
2747 Array<char> mark(diag->Height());
2748 mark = 0;
2749
2750 for (int k = 0; k < dtrans.embeddings.Size(); k++)
2751 {
2752 const Embedding &emb = dtrans.embeddings[k];
2753 if (emb.parent < 0) { continue; }
2754
2755 int coarse_rank = pncmesh->ElementRank(emb.parent);
2756 int fine_rank = old_ranks[k];
2757
2758 if (coarse_rank == MyRank && fine_rank == MyRank)
2759 {
2760 Geometry::Type geom = mesh->GetElementBaseGeometry(emb.parent);
2761 DenseMatrix &lR = localR[geom](emb.matrix);
2762
2763 elem_dof->GetRow(emb.parent, dofs);
2764 old_elem_dof->GetRow(k, old_dofs);
2765
2766 for (int vd = 0; vd < vdim; vd++)
2767 {
2768 old_dofs.Copy(old_vdofs);
2769 DofsToVDofs(vd, old_vdofs, old_ndofs);
2770
2771 for (int i = 0; i < lR.Height(); i++)
2772 {
2773 if (!std::isfinite(lR(i, 0))) { continue; }
2774
2775 int r = DofToVDof(dofs[i], vd);
2776 int m = (r >= 0) ? r : (-1 - r);
2777
2778 if (!mark[m])
2779 {
2780 lR.GetRow(i, row);
2781 diag->SetRow(r, old_vdofs, row);
2782 mark[m] = 1;
2783 }
2784 }
2785 }
2786 }
2787 }
2788 diag->Finalize();
2789
2790 // wait for all sends/receives to complete
2791 for (auto it = messages.begin(); it != messages.end(); ++it)
2792 {
2793 MPI_Wait(&it->second.request, MPI_STATUS_IGNORE);
2794 }
2795
2796 // create the offdiagonal part of the derefinement matrix
2797 SparseMatrix *offd = new SparseMatrix(ndofs*vdim, 1);
2798
2799 std::map<HYPRE_BigInt, int> col_map;
2800 for (int k = 0; k < dtrans.embeddings.Size(); k++)
2801 {
2802 const Embedding &emb = dtrans.embeddings[k];
2803 if (emb.parent < 0) { continue; }
2804
2805 int coarse_rank = pncmesh->ElementRank(emb.parent);
2806 int fine_rank = old_ranks[k];
2807
2808 if (coarse_rank == MyRank && fine_rank != MyRank)
2809 {
2810 Geometry::Type geom = mesh->GetElementBaseGeometry(emb.parent);
2811 DenseMatrix &lR = localR[geom](emb.matrix);
2812
2813 elem_dof->GetRow(emb.parent, dofs);
2814
2815 DerefDofMessage &msg = messages[k];
2816 MFEM_ASSERT(msg.dofs.size(), "");
2817
2818 for (int vd = 0; vd < vdim; vd++)
2819 {
2820 MFEM_ASSERT(ldof[geom], "");
2821 HYPRE_BigInt* remote_dofs = &msg.dofs[vd*ldof[geom]];
2822
2823 for (int i = 0; i < lR.Height(); i++)
2824 {
2825 if (!std::isfinite(lR(i, 0))) { continue; }
2826
2827 int r = DofToVDof(dofs[i], vd);
2828 int m = (r >= 0) ? r : (-1 - r);
2829
2830 if (!mark[m])
2831 {
2832 lR.GetRow(i, row);
2833 MFEM_ASSERT(ldof[geom] == row.Size(), "");
2834 for (int j = 0; j < ldof[geom]; j++)
2835 {
2836 if (row[j] == 0.0) { continue; } // NOTE: lR thresholded
2837 int &lcol = col_map[remote_dofs[j]];
2838 if (!lcol) { lcol = col_map.size(); }
2839 offd->_Set_(m, lcol-1, row[j]);
2840 }
2841 mark[m] = 1;
2842 }
2843 }
2844 }
2845 }
2846 }
2847 messages.clear();
2848 offd->Finalize(0);
2849 offd->SetWidth(col_map.size());
2850
2851 // create offd column mapping for use by hypre
2852 HYPRE_BigInt *cmap = Memory<HYPRE_BigInt>(offd->Width());
2853 for (auto it = col_map.begin(); it != col_map.end(); ++it)
2854 {
2855 cmap[it->second-1] = it->first;
2856 }
2857
2858 // reorder offd columns so that 'cmap' is monotonic
2859 // NOTE: this is easier and probably faster (offd is small) than making
2860 // sure cmap is determined and sorted before the offd matrix is created
2861 {
2862 int width = offd->Width();
2863 Array<Pair<HYPRE_BigInt, int> > reorder(width);
2864 for (int i = 0; i < width; i++)
2865 {
2866 reorder[i].one = cmap[i];
2867 reorder[i].two = i;
2868 }
2869 reorder.Sort();
2870
2871 Array<int> reindex(width);
2872 for (int i = 0; i < width; i++)
2873 {
2874 reindex[reorder[i].two] = i;
2875 cmap[i] = reorder[i].one;
2876 }
2877
2878 int *J = offd->GetJ();
2879 for (int i = 0; i < offd->NumNonZeroElems(); i++)
2880 {
2881 J[i] = reindex[J[i]];
2882 }
2883 offd->SortColumnIndices();
2884 }
2885
2886 HypreParMatrix* R;
2887 R = new HypreParMatrix(MyComm, dof_offsets[nrk], old_dof_offsets[nrk],
2888 dof_offsets, old_dof_offsets, diag, offd, cmap,
2889 true);
2890
2891 R->SetOwnerFlags(R->OwnsDiag(), R->OwnsOffd(), 1);
2892
2893 return R;
2894 }
2895
Destroy()2896 void ParFiniteElementSpace::Destroy()
2897 {
2898 ldof_group.DeleteAll();
2899 ldof_ltdof.DeleteAll();
2900 dof_offsets.DeleteAll();
2901 tdof_offsets.DeleteAll();
2902 tdof_nb_offsets.DeleteAll();
2903 // preserve old_dof_offsets
2904 ldof_sign.DeleteAll();
2905
2906 delete P; P = NULL;
2907 delete Pconf; Pconf = NULL;
2908 delete Rconf; Rconf = NULL;
2909 delete R_transpose; R_transpose = NULL;
2910 delete R; R = NULL;
2911
2912 delete gcomm; gcomm = NULL;
2913
2914 num_face_nbr_dofs = -1;
2915 face_nbr_element_dof.Clear();
2916 face_nbr_ldof.Clear();
2917 face_nbr_glob_dof_map.DeleteAll();
2918 send_face_nbr_ldof.Clear();
2919 }
2920
CopyProlongationAndRestriction(const FiniteElementSpace & fes,const Array<int> * perm)2921 void ParFiniteElementSpace::CopyProlongationAndRestriction(
2922 const FiniteElementSpace &fes, const Array<int> *perm)
2923 {
2924 const ParFiniteElementSpace *pfes
2925 = dynamic_cast<const ParFiniteElementSpace*>(&fes);
2926 MFEM_VERIFY(pfes != NULL, "");
2927 MFEM_VERIFY(P == NULL, "");
2928 MFEM_VERIFY(R == NULL, "");
2929
2930 SparseMatrix *perm_mat = NULL, *perm_mat_tr = NULL;
2931 if (perm)
2932 {
2933 int n = perm->Size();
2934 perm_mat = new SparseMatrix(n, n);
2935 for (int i=0; i<n; ++i)
2936 {
2937 double s;
2938 int j = DecodeDof((*perm)[i], s);
2939 perm_mat->Set(i, j, s);
2940 }
2941 perm_mat->Finalize();
2942 perm_mat_tr = Transpose(*perm_mat);
2943 }
2944
2945 if (pfes->P != NULL)
2946 {
2947 if (perm) { P = pfes->P->LeftDiagMult(*perm_mat); }
2948 else { P = new HypreParMatrix(*pfes->P); }
2949 nonconf_P = true;
2950 }
2951 if (pfes->R != NULL)
2952 {
2953 if (perm) { R = Mult(*pfes->R, *perm_mat_tr); }
2954 else { R = new SparseMatrix(*pfes->R); }
2955 }
2956
2957 delete perm_mat;
2958 delete perm_mat_tr;
2959 }
2960
GetTrueTransferOperator(const FiniteElementSpace & coarse_fes,OperatorHandle & T) const2961 void ParFiniteElementSpace::GetTrueTransferOperator(
2962 const FiniteElementSpace &coarse_fes, OperatorHandle &T) const
2963 {
2964 OperatorHandle Tgf(T.Type() == Operator::Hypre_ParCSR ?
2965 Operator::MFEM_SPARSEMAT : Operator::ANY_TYPE);
2966 GetTransferOperator(coarse_fes, Tgf);
2967 Dof_TrueDof_Matrix(); // Make sure R is built - we need R in all cases.
2968 if (T.Type() == Operator::Hypre_ParCSR)
2969 {
2970 const ParFiniteElementSpace *c_pfes =
2971 dynamic_cast<const ParFiniteElementSpace *>(&coarse_fes);
2972 MFEM_ASSERT(c_pfes != NULL, "coarse_fes must be a parallel space");
2973 SparseMatrix *RA = mfem::Mult(*R, *Tgf.As<SparseMatrix>());
2974 Tgf.Clear();
2975 T.Reset(c_pfes->Dof_TrueDof_Matrix()->
2976 LeftDiagMult(*RA, GetTrueDofOffsets()));
2977 delete RA;
2978 }
2979 else
2980 {
2981 T.Reset(new TripleProductOperator(R, Tgf.Ptr(),
2982 coarse_fes.GetProlongationMatrix(),
2983 false, Tgf.OwnsOperator(), false));
2984 Tgf.SetOperatorOwner(false);
2985 }
2986 }
2987
Update(bool want_transform)2988 void ParFiniteElementSpace::Update(bool want_transform)
2989 {
2990 MFEM_VERIFY(!IsVariableOrder(),
2991 "Parallel variable order space not supported yet.");
2992
2993 if (mesh->GetSequence() == mesh_sequence)
2994 {
2995 return; // no need to update, no-op
2996 }
2997 if (want_transform && mesh->GetSequence() != mesh_sequence + 1)
2998 {
2999 MFEM_ABORT("Error in update sequence. Space needs to be updated after "
3000 "each mesh modification.");
3001 }
3002
3003 if (NURBSext)
3004 {
3005 UpdateNURBS();
3006 return;
3007 }
3008
3009 Table* old_elem_dof = NULL;
3010 int old_ndofs;
3011
3012 // save old DOF table
3013 if (want_transform)
3014 {
3015 old_elem_dof = elem_dof;
3016 elem_dof = NULL;
3017 old_ndofs = ndofs;
3018 Swap(dof_offsets, old_dof_offsets);
3019 }
3020
3021 Destroy();
3022 FiniteElementSpace::Destroy(); // calls Th.Clear()
3023
3024 FiniteElementSpace::Construct();
3025 Construct();
3026
3027 BuildElementToDofTable();
3028
3029 if (want_transform)
3030 {
3031 // calculate appropriate GridFunction transformation
3032 switch (mesh->GetLastOperation())
3033 {
3034 case Mesh::REFINE:
3035 {
3036 if (Th.Type() != Operator::MFEM_SPARSEMAT)
3037 {
3038 Th.Reset(new RefinementOperator(this, old_elem_dof, old_ndofs));
3039 // The RefinementOperator takes ownership of 'old_elem_dofs', so
3040 // we no longer own it:
3041 old_elem_dof = NULL;
3042 }
3043 else
3044 {
3045 // calculate fully assembled matrix
3046 Th.Reset(RefinementMatrix(old_ndofs, old_elem_dof));
3047 }
3048 break;
3049 }
3050
3051 case Mesh::DEREFINE:
3052 {
3053 Th.Reset(ParallelDerefinementMatrix(old_ndofs, old_elem_dof));
3054 if (Nonconforming())
3055 {
3056 Th.SetOperatorOwner(false);
3057 Th.Reset(new TripleProductOperator(P, R, Th.Ptr(),
3058 false, false, true));
3059 }
3060 break;
3061 }
3062
3063 case Mesh::REBALANCE:
3064 {
3065 Th.Reset(RebalanceMatrix(old_ndofs, old_elem_dof));
3066 break;
3067 }
3068
3069 default:
3070 break;
3071 }
3072
3073 delete old_elem_dof;
3074 }
3075 }
3076
UpdateMeshPointer(Mesh * new_mesh)3077 void ParFiniteElementSpace::UpdateMeshPointer(Mesh *new_mesh)
3078 {
3079 ParMesh *new_pmesh = dynamic_cast<ParMesh*>(new_mesh);
3080 MFEM_VERIFY(new_pmesh != NULL,
3081 "ParFiniteElementSpace::UpdateMeshPointer(...) must be a ParMesh");
3082 mesh = new_mesh;
3083 pmesh = new_pmesh;
3084 }
3085
ConformingProlongationOperator(int lsize,const GroupCommunicator & gc_,bool local_)3086 ConformingProlongationOperator::ConformingProlongationOperator(
3087 int lsize, const GroupCommunicator &gc_, bool local_)
3088 : gc(gc_), local(local_)
3089 {
3090 const Table &group_ldof = gc.GroupLDofTable();
3091
3092 int n_external = 0;
3093 for (int g=1; g<group_ldof.Size(); ++g)
3094 {
3095 if (!gc.GetGroupTopology().IAmMaster(g))
3096 {
3097 n_external += group_ldof.RowSize(g);
3098 }
3099 }
3100 int tsize = lsize - n_external;
3101
3102 height = lsize;
3103 width = tsize;
3104
3105 external_ldofs.Reserve(n_external);
3106 for (int gr = 1; gr < group_ldof.Size(); gr++)
3107 {
3108 if (!gc.GetGroupTopology().IAmMaster(gr))
3109 {
3110 external_ldofs.Append(group_ldof.GetRow(gr), group_ldof.RowSize(gr));
3111 }
3112 }
3113 external_ldofs.Sort();
3114 }
3115
GetGroupCommunicator() const3116 const GroupCommunicator &ConformingProlongationOperator::GetGroupCommunicator()
3117 const
3118 {
3119 return gc;
3120 }
3121
ConformingProlongationOperator(const ParFiniteElementSpace & pfes,bool local_)3122 ConformingProlongationOperator::ConformingProlongationOperator(
3123 const ParFiniteElementSpace &pfes, bool local_)
3124 : Operator(pfes.GetVSize(), pfes.GetTrueVSize()),
3125 external_ldofs(),
3126 gc(pfes.GroupComm()),
3127 local(local_)
3128 {
3129 MFEM_VERIFY(pfes.Conforming(), "");
3130 const Table &group_ldof = gc.GroupLDofTable();
3131 external_ldofs.Reserve(Height()-Width());
3132 for (int gr = 1; gr < group_ldof.Size(); gr++)
3133 {
3134 if (!gc.GetGroupTopology().IAmMaster(gr))
3135 {
3136 external_ldofs.Append(group_ldof.GetRow(gr), group_ldof.RowSize(gr));
3137 }
3138 }
3139 external_ldofs.Sort();
3140 MFEM_ASSERT(external_ldofs.Size() == Height()-Width(), "");
3141 #ifdef MFEM_DEBUG
3142 for (int j = 1; j < external_ldofs.Size(); j++)
3143 {
3144 // Check for repeated ldofs.
3145 MFEM_VERIFY(external_ldofs[j-1] < external_ldofs[j], "");
3146 }
3147 int j = 0;
3148 for (int i = 0; i < external_ldofs.Size(); i++)
3149 {
3150 const int end = external_ldofs[i];
3151 for ( ; j < end; j++)
3152 {
3153 MFEM_VERIFY(j-i == pfes.GetLocalTDofNumber(j), "");
3154 }
3155 j = end+1;
3156 }
3157 for ( ; j < Height(); j++)
3158 {
3159 MFEM_VERIFY(j-external_ldofs.Size() == pfes.GetLocalTDofNumber(j), "");
3160 }
3161 // gc.PrintInfo();
3162 // pfes.Dof_TrueDof_Matrix()->PrintCommPkg();
3163 #endif
3164 }
3165
Mult(const Vector & x,Vector & y) const3166 void ConformingProlongationOperator::Mult(const Vector &x, Vector &y) const
3167 {
3168 MFEM_ASSERT(x.Size() == Width(), "");
3169 MFEM_ASSERT(y.Size() == Height(), "");
3170
3171 const double *xdata = x.HostRead();
3172 double *ydata = y.HostWrite();
3173 const int m = external_ldofs.Size();
3174
3175 const int in_layout = 2; // 2 - input is ltdofs array
3176 if (local)
3177 {
3178 y = 0.0;
3179 }
3180 else
3181 {
3182 gc.BcastBegin(const_cast<double*>(xdata), in_layout);
3183 }
3184
3185 int j = 0;
3186 for (int i = 0; i < m; i++)
3187 {
3188 const int end = external_ldofs[i];
3189 std::copy(xdata+j-i, xdata+end-i, ydata+j);
3190 j = end+1;
3191 }
3192 std::copy(xdata+j-m, xdata+Width(), ydata+j);
3193
3194 const int out_layout = 0; // 0 - output is ldofs array
3195 if (!local)
3196 {
3197 gc.BcastEnd(ydata, out_layout);
3198 }
3199 }
3200
MultTranspose(const Vector & x,Vector & y) const3201 void ConformingProlongationOperator::MultTranspose(
3202 const Vector &x, Vector &y) const
3203 {
3204 MFEM_ASSERT(x.Size() == Height(), "");
3205 MFEM_ASSERT(y.Size() == Width(), "");
3206
3207 const double *xdata = x.HostRead();
3208 double *ydata = y.HostWrite();
3209 const int m = external_ldofs.Size();
3210
3211 if (!local)
3212 {
3213 gc.ReduceBegin(xdata);
3214 }
3215
3216 int j = 0;
3217 for (int i = 0; i < m; i++)
3218 {
3219 const int end = external_ldofs[i];
3220 std::copy(xdata+j, xdata+end, ydata+j-i);
3221 j = end+1;
3222 }
3223 std::copy(xdata+j, xdata+Height(), ydata+j-m);
3224
3225 const int out_layout = 2; // 2 - output is an array on all ltdofs
3226 if (!local)
3227 {
3228 gc.ReduceEnd<double>(ydata, out_layout, GroupCommunicator::Sum);
3229 }
3230 }
3231
DeviceConformingProlongationOperator(const GroupCommunicator & gc_,const SparseMatrix * R,bool local_)3232 DeviceConformingProlongationOperator::DeviceConformingProlongationOperator(
3233 const GroupCommunicator &gc_, const SparseMatrix *R, bool local_)
3234 : ConformingProlongationOperator(R->Width(), gc_, local_),
3235 mpi_gpu_aware(Device::GetGPUAwareMPI())
3236 {
3237 MFEM_ASSERT(R->Finalized(), "");
3238 const int tdofs = R->Height();
3239 MFEM_ASSERT(tdofs == R->HostReadI()[tdofs], "");
3240 ltdof_ldof = Array<int>(const_cast<int*>(R->HostReadJ()), tdofs);
3241 ltdof_ldof.UseDevice();
3242 {
3243 Table nbr_ltdof;
3244 gc.GetNeighborLTDofTable(nbr_ltdof);
3245 const int nb_connections = nbr_ltdof.Size_of_connections();
3246 shr_ltdof.SetSize(nb_connections);
3247 shr_ltdof.CopyFrom(nbr_ltdof.GetJ());
3248 shr_buf.SetSize(nb_connections);
3249 shr_buf.UseDevice(true);
3250 shr_buf_offsets = nbr_ltdof.GetIMemory();
3251 {
3252 Array<int> shr_ltdof(nbr_ltdof.GetJ(), nb_connections);
3253 Array<int> unique_ltdof(shr_ltdof);
3254 unique_ltdof.Sort();
3255 unique_ltdof.Unique();
3256 // Note: the next loop modifies the J array of nbr_ltdof
3257 for (int i = 0; i < shr_ltdof.Size(); i++)
3258 {
3259 shr_ltdof[i] = unique_ltdof.FindSorted(shr_ltdof[i]);
3260 MFEM_ASSERT(shr_ltdof[i] != -1, "internal error");
3261 }
3262 Table unique_shr;
3263 Transpose(shr_ltdof, unique_shr, unique_ltdof.Size());
3264 unq_ltdof = Array<int>(unique_ltdof, unique_ltdof.Size());
3265 unq_shr_i = Array<int>(unique_shr.GetI(), unique_shr.Size()+1);
3266 unq_shr_j = Array<int>(unique_shr.GetJ(), unique_shr.Size_of_connections());
3267 }
3268 nbr_ltdof.GetJMemory().Delete();
3269 nbr_ltdof.LoseData();
3270 }
3271 {
3272 Table nbr_ldof;
3273 gc.GetNeighborLDofTable(nbr_ldof);
3274 const int nb_connections = nbr_ldof.Size_of_connections();
3275 ext_ldof.SetSize(nb_connections);
3276 ext_ldof.CopyFrom(nbr_ldof.GetJ());
3277 ext_ldof.GetMemory().UseDevice(true);
3278 ext_buf.SetSize(nb_connections);
3279 ext_buf.UseDevice(true);
3280 ext_buf_offsets = nbr_ldof.GetIMemory();
3281 nbr_ldof.GetJMemory().Delete();
3282 nbr_ldof.LoseData();
3283 }
3284 const GroupTopology >opo = gc.GetGroupTopology();
3285 int req_counter = 0;
3286 for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++)
3287 {
3288 const int send_offset = shr_buf_offsets[nbr];
3289 const int send_size = shr_buf_offsets[nbr+1] - send_offset;
3290 if (send_size > 0) { req_counter++; }
3291
3292 const int recv_offset = ext_buf_offsets[nbr];
3293 const int recv_size = ext_buf_offsets[nbr+1] - recv_offset;
3294 if (recv_size > 0) { req_counter++; }
3295 }
3296 requests = new MPI_Request[req_counter];
3297 }
3298
DeviceConformingProlongationOperator(const ParFiniteElementSpace & pfes,bool local_)3299 DeviceConformingProlongationOperator::DeviceConformingProlongationOperator(
3300 const ParFiniteElementSpace &pfes, bool local_)
3301 : DeviceConformingProlongationOperator(pfes.GroupComm(),
3302 pfes.GetRestrictionMatrix(),
3303 local_)
3304 {
3305 MFEM_ASSERT(pfes.Conforming(), "internal error");
3306 MFEM_ASSERT(pfes.GetRestrictionMatrix()->Height() == pfes.GetTrueVSize(), "");
3307 }
3308
ExtractSubVector(const Array<int> & indices,const Vector & in,Vector & out)3309 static void ExtractSubVector(const Array<int> &indices,
3310 const Vector &in, Vector &out)
3311 {
3312 MFEM_ASSERT(indices.Size() == out.Size(), "incompatible sizes!");
3313 auto y = out.Write();
3314 const auto x = in.Read();
3315 const auto I = indices.Read();
3316 MFEM_FORALL(i, indices.Size(), y[i] = x[I[i]];); // indices can be repeated
3317 }
3318
BcastBeginCopy(const Vector & x) const3319 void DeviceConformingProlongationOperator::BcastBeginCopy(
3320 const Vector &x) const
3321 {
3322 // shr_buf[i] = src[shr_ltdof[i]]
3323 if (shr_ltdof.Size() == 0) { return; }
3324 ExtractSubVector(shr_ltdof, x, shr_buf);
3325 // If the above kernel is executed asynchronously, we should wait for it to
3326 // complete
3327 if (mpi_gpu_aware) { MFEM_STREAM_SYNC; }
3328 }
3329
SetSubVector(const Array<int> & indices,const Vector & in,Vector & out)3330 static void SetSubVector(const Array<int> &indices,
3331 const Vector &in, Vector &out)
3332 {
3333 MFEM_ASSERT(indices.Size() == in.Size(), "incompatible sizes!");
3334 // Use ReadWrite() since we modify only a subset of the indices:
3335 auto y = out.ReadWrite();
3336 const auto x = in.Read();
3337 const auto I = indices.Read();
3338 MFEM_FORALL(i, indices.Size(), y[I[i]] = x[i];);
3339 }
3340
BcastLocalCopy(const Vector & x,Vector & y) const3341 void DeviceConformingProlongationOperator::BcastLocalCopy(
3342 const Vector &x, Vector &y) const
3343 {
3344 // dst[ltdof_ldof[i]] = src[i]
3345 if (ltdof_ldof.Size() == 0) { return; }
3346 SetSubVector(ltdof_ldof, x, y);
3347 }
3348
BcastEndCopy(Vector & y) const3349 void DeviceConformingProlongationOperator::BcastEndCopy(
3350 Vector &y) const
3351 {
3352 // dst[ext_ldof[i]] = ext_buf[i]
3353 if (ext_ldof.Size() == 0) { return; }
3354 SetSubVector(ext_ldof, ext_buf, y);
3355 }
3356
Mult(const Vector & x,Vector & y) const3357 void DeviceConformingProlongationOperator::Mult(const Vector &x,
3358 Vector &y) const
3359 {
3360 const GroupTopology >opo = gc.GetGroupTopology();
3361 int req_counter = 0;
3362 // Make sure 'y' is marked as valid on device and for use on device.
3363 // This ensures that there is no unnecessary host to device copy when the
3364 // input 'y' is valid on host (in 'y.SetSubVector(ext_ldof, 0.0)' when local
3365 // is true) or BcastLocalCopy (when local is false).
3366 y.Write();
3367 if (local)
3368 {
3369 // done on device since we've marked ext_ldof for use on device:
3370 y.SetSubVector(ext_ldof, 0.0);
3371 }
3372 else
3373 {
3374 BcastBeginCopy(x); // copy to 'shr_buf'
3375 for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++)
3376 {
3377 const int send_offset = shr_buf_offsets[nbr];
3378 const int send_size = shr_buf_offsets[nbr+1] - send_offset;
3379 if (send_size > 0)
3380 {
3381 auto send_buf = mpi_gpu_aware ? shr_buf.Read() : shr_buf.HostRead();
3382 MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE,
3383 gtopo.GetNeighborRank(nbr), 41822,
3384 gtopo.GetComm(), &requests[req_counter++]);
3385 }
3386 const int recv_offset = ext_buf_offsets[nbr];
3387 const int recv_size = ext_buf_offsets[nbr+1] - recv_offset;
3388 if (recv_size > 0)
3389 {
3390 auto recv_buf = mpi_gpu_aware ? ext_buf.Write() : ext_buf.HostWrite();
3391 MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE,
3392 gtopo.GetNeighborRank(nbr), 41822,
3393 gtopo.GetComm(), &requests[req_counter++]);
3394 }
3395 }
3396 }
3397 BcastLocalCopy(x, y);
3398 if (!local)
3399 {
3400 MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE);
3401 BcastEndCopy(y); // copy from 'ext_buf'
3402 }
3403 }
3404
~DeviceConformingProlongationOperator()3405 DeviceConformingProlongationOperator::~DeviceConformingProlongationOperator()
3406 {
3407 delete [] requests;
3408 ext_buf_offsets.Delete();
3409 shr_buf_offsets.Delete();
3410 }
3411
ReduceBeginCopy(const Vector & x) const3412 void DeviceConformingProlongationOperator::ReduceBeginCopy(
3413 const Vector &x) const
3414 {
3415 // ext_buf[i] = src[ext_ldof[i]]
3416 if (ext_ldof.Size() == 0) { return; }
3417 ExtractSubVector(ext_ldof, x, ext_buf);
3418 // If the above kernel is executed asynchronously, we should wait for it to
3419 // complete
3420 if (mpi_gpu_aware) { MFEM_STREAM_SYNC; }
3421 }
3422
ReduceLocalCopy(const Vector & x,Vector & y) const3423 void DeviceConformingProlongationOperator::ReduceLocalCopy(
3424 const Vector &x, Vector &y) const
3425 {
3426 // dst[i] = src[ltdof_ldof[i]]
3427 if (ltdof_ldof.Size() == 0) { return; }
3428 ExtractSubVector(ltdof_ldof, x, y);
3429 }
3430
AddSubVector(const Array<int> & unique_dst_indices,const Array<int> & unique_to_src_offsets,const Array<int> & unique_to_src_indices,const Vector & src,Vector & dst)3431 static void AddSubVector(const Array<int> &unique_dst_indices,
3432 const Array<int> &unique_to_src_offsets,
3433 const Array<int> &unique_to_src_indices,
3434 const Vector &src,
3435 Vector &dst)
3436 {
3437 auto y = dst.ReadWrite();
3438 const auto x = src.Read();
3439 const auto DST_I = unique_dst_indices.Read();
3440 const auto SRC_O = unique_to_src_offsets.Read();
3441 const auto SRC_I = unique_to_src_indices.Read();
3442 MFEM_FORALL(i, unique_dst_indices.Size(),
3443 {
3444 const int dst_idx = DST_I[i];
3445 double sum = y[dst_idx];
3446 const int end = SRC_O[i+1];
3447 for (int j = SRC_O[i]; j != end; ++j) { sum += x[SRC_I[j]]; }
3448 y[dst_idx] = sum;
3449 });
3450 }
3451
ReduceEndAssemble(Vector & y) const3452 void DeviceConformingProlongationOperator::ReduceEndAssemble(Vector &y) const
3453 {
3454 // dst[shr_ltdof[i]] += shr_buf[i]
3455 if (unq_ltdof.Size() == 0) { return; }
3456 AddSubVector(unq_ltdof, unq_shr_i, unq_shr_j, shr_buf, y);
3457 }
3458
MultTranspose(const Vector & x,Vector & y) const3459 void DeviceConformingProlongationOperator::MultTranspose(const Vector &x,
3460 Vector &y) const
3461 {
3462 const GroupTopology >opo = gc.GetGroupTopology();
3463 int req_counter = 0;
3464 if (!local)
3465 {
3466 ReduceBeginCopy(x); // copy to 'ext_buf'
3467 for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++)
3468 {
3469 const int send_offset = ext_buf_offsets[nbr];
3470 const int send_size = ext_buf_offsets[nbr+1] - send_offset;
3471 if (send_size > 0)
3472 {
3473 auto send_buf = mpi_gpu_aware ? ext_buf.Read() : ext_buf.HostRead();
3474 MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE,
3475 gtopo.GetNeighborRank(nbr), 41823,
3476 gtopo.GetComm(), &requests[req_counter++]);
3477 }
3478 const int recv_offset = shr_buf_offsets[nbr];
3479 const int recv_size = shr_buf_offsets[nbr+1] - recv_offset;
3480 if (recv_size > 0)
3481 {
3482 auto recv_buf = mpi_gpu_aware ? shr_buf.Write() : shr_buf.HostWrite();
3483 MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE,
3484 gtopo.GetNeighborRank(nbr), 41823,
3485 gtopo.GetComm(), &requests[req_counter++]);
3486 }
3487 }
3488 }
3489 ReduceLocalCopy(x, y);
3490 if (!local)
3491 {
3492 MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE);
3493 ReduceEndAssemble(y); // assemble from 'shr_buf'
3494 }
3495 }
3496
3497 } // namespace mfem
3498
3499 #endif
3500