1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2018 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #ifndef dealii_face_setup_internal_h
18 #define dealii_face_setup_internal_h
19 
20 #include <deal.II/base/config.h>
21 
22 #include <deal.II/base/memory_consumption.h>
23 #include <deal.II/base/utilities.h>
24 
25 #include <deal.II/distributed/tria_base.h>
26 
27 #include <deal.II/grid/tria.h>
28 #include <deal.II/grid/tria_accessor.h>
29 
30 #include <deal.II/matrix_free/face_info.h>
31 #include <deal.II/matrix_free/task_info.h>
32 
33 #include <fstream>
34 
35 
36 DEAL_II_NAMESPACE_OPEN
37 
38 
39 namespace internal
40 {
41   namespace MatrixFreeFunctions
42   {
43     /**
44      * A struct that is used to represent a collection of faces of a process
45      * with one of its neighbor within the setup done in struct FaceInfo.
46      */
47     struct FaceIdentifier
48     {
FaceIdentifierFaceIdentifier49       FaceIdentifier()
50         : n_hanging_faces_smaller_subdomain(0)
51         , n_hanging_faces_larger_subdomain(0)
52       {}
53 
54       std::vector<std::pair<CellId, CellId>> shared_faces;
55       unsigned int                           n_hanging_faces_smaller_subdomain;
56       unsigned int                           n_hanging_faces_larger_subdomain;
57     };
58 
59 
60 
61     /**
62      * A struct that extracts the faces relevant to a given set of cells,
63      * including the assignment of which of the two neighboring processors at
64      * a subdomain boundary with MPI should do the integration (from both
65      * sides). This data structure is used for the setup of the connectivity
66      * between faces and cells and for identification of the dof indices to be
67      * used for face integrals.
68      */
69     template <int dim>
70     struct FaceSetup
71     {
72       FaceSetup();
73 
74       /**
75        * Perform the initial detection of faces before reading the indices on
76        * the cells. This does not add the faces yet but only decides on
77        * whether some of the faces should be considered for processing
78        * locally.
79        */
80       void
81       initialize(
82         const dealii::Triangulation<dim> &triangulation,
83         const unsigned int                mg_level,
84         const bool                        hold_all_faces_to_owned_cells,
85         std::vector<std::pair<unsigned int, unsigned int>> &cell_levels);
86 
87       /**
88        * Upon completion of the dof indices, this function extracts the
89        * information relevant for FaceToCellTopology and categorizes the faces
90        * into interior faces, boundary faces, and ghost faces (not processed
91        * locally but adjacent to some of the cells present locally).
92        */
93       void
94       generate_faces(
95         const dealii::Triangulation<dim> &                        triangulation,
96         const std::vector<std::pair<unsigned int, unsigned int>> &cell_levels,
97         TaskInfo &                                                task_info);
98 
99       /**
100        * Fills the information about the cell, the face number, and numbers
101        * within the plain array representation in MatrixFree into
102        * FaceToCellTopology (without vectorization, which is something applied
103        * later).
104        */
105       FaceToCellTopology<1>
106       create_face(
107         const unsigned int                                        face_no,
108         const typename dealii::Triangulation<dim>::cell_iterator &cell,
109         const unsigned int number_cell_interior,
110         const typename dealii::Triangulation<dim>::cell_iterator &neighbor,
111         const unsigned int number_cell_exterior);
112 
113       bool use_active_cells;
114 
115       /**
116        * A type that categorizes faces in the first initialize() function such
117        * that we can later get their correct value in generate_faces().
118        */
119       enum class FaceCategory : char
120       {
121         locally_active_at_boundary,
122         locally_active_done_here,
123         locally_active_done_elsewhere,
124         ghosted,
125         multigrid_refinement_edge
126       };
127 
128       std::vector<FaceCategory>          face_is_owned;
129       std::vector<bool>                  at_processor_boundary;
130       std::vector<FaceToCellTopology<1>> inner_faces;
131       std::vector<FaceToCellTopology<1>> boundary_faces;
132       std::vector<FaceToCellTopology<1>> inner_ghost_faces;
133       std::vector<FaceToCellTopology<1>> refinement_edge_faces;
134     };
135 
136 
137 
138     /**
139      * Actually form the batches for vectorized execution of face integrals.
140      */
141     template <int vectorization_width>
142     void
143     collect_faces_vectorization(
144       const std::vector<FaceToCellTopology<1>> &faces_in,
145       const std::vector<bool> &                 hard_vectorization_boundary,
146       std::vector<unsigned int> &               face_partition_data,
147       std::vector<FaceToCellTopology<vectorization_width>> &faces_out);
148 
149 
150 
151     /* -------------------------------------------------------------------- */
152 
153 #ifndef DOXYGEN
154 
155     template <int dim>
FaceSetup()156     FaceSetup<dim>::FaceSetup()
157       : use_active_cells(true)
158     {}
159 
160 
161 
162     template <int dim>
163     void
initialize(const dealii::Triangulation<dim> & triangulation,const unsigned int mg_level,const bool hold_all_faces_to_owned_cells,std::vector<std::pair<unsigned int,unsigned int>> & cell_levels)164     FaceSetup<dim>::initialize(
165       const dealii::Triangulation<dim> &triangulation,
166       const unsigned int                mg_level,
167       const bool                        hold_all_faces_to_owned_cells,
168       std::vector<std::pair<unsigned int, unsigned int>> &cell_levels)
169     {
170       use_active_cells = mg_level == numbers::invalid_unsigned_int;
171 
172 #  ifdef DEBUG
173       // safety check
174       if (use_active_cells)
175         for (const auto &cell_level : cell_levels)
176           {
177             typename dealii::Triangulation<dim>::cell_iterator dcell(
178               &triangulation, cell_level.first, cell_level.second);
179             Assert(dcell->is_active(), ExcInternalError());
180           }
181 #  endif
182 
183       // step 1: add ghost cells for those cells that we identify as
184       // interesting
185 
186       at_processor_boundary.resize(cell_levels.size(), false);
187       face_is_owned.resize(dim > 1 ? triangulation.n_raw_faces() :
188                                      triangulation.n_vertices(),
189                            FaceCategory::locally_active_done_elsewhere);
190 
191       // go through the mesh and divide the faces on the processor
192       // boundaries as evenly as possible between the processors
193       std::map<types::subdomain_id, FaceIdentifier>
194         inner_faces_at_proc_boundary;
195       if (triangulation.locally_owned_subdomain() !=
196           numbers::invalid_subdomain_id)
197         {
198           const types::subdomain_id my_domain =
199             triangulation.locally_owned_subdomain();
200           for (unsigned int i = 0; i < cell_levels.size(); ++i)
201             {
202               if (i > 0 && cell_levels[i] == cell_levels[i - 1])
203                 continue;
204               typename dealii::Triangulation<dim>::cell_iterator dcell(
205                 &triangulation, cell_levels[i].first, cell_levels[i].second);
206               for (const unsigned int f : dcell->face_indices())
207                 {
208                   if (dcell->at_boundary(f) && !dcell->has_periodic_neighbor(f))
209                     continue;
210                   typename dealii::Triangulation<dim>::cell_iterator neighbor =
211                     dcell->neighbor_or_periodic_neighbor(f);
212 
213                   // faces at hanging nodes are always treated by the processor
214                   // who owns the element on the fine side. but we need to count
215                   // the number of inner faces in order to balance the remaining
216                   // faces properly
217                   const CellId id_mine = dcell->id();
218                   if (use_active_cells && neighbor->has_children())
219                     for (unsigned int c = 0;
220                          c < (dcell->has_periodic_neighbor(f) ?
221                                 dcell->periodic_neighbor(f)
222                                   ->face(dcell->periodic_neighbor_face_no(f))
223                                   ->n_children() :
224                                 dcell->face(f)->n_children());
225                          ++c)
226                       {
227                         typename dealii::Triangulation<dim>::cell_iterator
228                           neighbor_c =
229                             dcell->at_boundary(f) ?
230                               dcell->periodic_neighbor_child_on_subface(f, c) :
231                               dcell->neighbor_child_on_subface(f, c);
232                         const types::subdomain_id neigh_domain =
233                           neighbor_c->subdomain_id();
234                         if (my_domain < neigh_domain)
235                           inner_faces_at_proc_boundary[neigh_domain]
236                             .n_hanging_faces_larger_subdomain++;
237                         else if (my_domain > neigh_domain)
238                           inner_faces_at_proc_boundary[neigh_domain]
239                             .n_hanging_faces_smaller_subdomain++;
240                       }
241                   else
242                     {
243                       const types::subdomain_id neigh_domain =
244                         use_active_cells ? neighbor->subdomain_id() :
245                                            neighbor->level_subdomain_id();
246                       if (neighbor->level() < dcell->level() &&
247                           use_active_cells)
248                         {
249                           if (my_domain < neigh_domain)
250                             inner_faces_at_proc_boundary[neigh_domain]
251                               .n_hanging_faces_smaller_subdomain++;
252                           else if (my_domain > neigh_domain)
253                             inner_faces_at_proc_boundary[neigh_domain]
254                               .n_hanging_faces_larger_subdomain++;
255                         }
256                       else if (neighbor->level() == dcell->level() &&
257                                my_domain != neigh_domain)
258                         {
259                           // always list the cell whose owner has the lower
260                           // subdomain id first. this applies to both processors
261                           // involved, so both processors will generate the same
262                           // list that we will later order
263                           const CellId id_neigh = neighbor->id();
264                           if (my_domain < neigh_domain)
265                             inner_faces_at_proc_boundary[neigh_domain]
266                               .shared_faces.emplace_back(id_mine, id_neigh);
267                           else
268                             inner_faces_at_proc_boundary[neigh_domain]
269                               .shared_faces.emplace_back(id_neigh, id_mine);
270                         }
271                     }
272                 }
273             }
274 
275           // sort the cell ids related to each neighboring processor. This
276           // algorithm is symmetric so every processor combination should
277           // arrive here and no deadlock should be possible
278           for (auto &inner_face : inner_faces_at_proc_boundary)
279             {
280               Assert(inner_face.first != my_domain,
281                      ExcInternalError("Should not send info to myself"));
282               std::sort(inner_face.second.shared_faces.begin(),
283                         inner_face.second.shared_faces.end());
284               inner_face.second.shared_faces.erase(
285                 std::unique(inner_face.second.shared_faces.begin(),
286                             inner_face.second.shared_faces.end()),
287                 inner_face.second.shared_faces.end());
288 
289               // safety check: both involved processors should see the same list
290               // because the pattern of ghosting is symmetric. We test this by
291               // looking at the length of the lists of faces
292 #  if defined(DEAL_II_WITH_MPI) && defined(DEBUG)
293               MPI_Comm comm = MPI_COMM_SELF;
294               if (const dealii::parallel::TriangulationBase<dim> *ptria =
295                     dynamic_cast<const dealii::parallel::TriangulationBase<dim>
296                                    *>(&triangulation))
297                 comm = ptria->get_communicator();
298 
299               MPI_Status   status;
300               unsigned int mysize    = inner_face.second.shared_faces.size();
301               unsigned int othersize = numbers::invalid_unsigned_int;
302               MPI_Sendrecv(&mysize,
303                            1,
304                            MPI_UNSIGNED,
305                            inner_face.first,
306                            600 + my_domain,
307                            &othersize,
308                            1,
309                            MPI_UNSIGNED,
310                            inner_face.first,
311                            600 + inner_face.first,
312                            comm,
313                            &status);
314               AssertDimension(mysize, othersize);
315               mysize = inner_face.second.n_hanging_faces_smaller_subdomain;
316               MPI_Sendrecv(&mysize,
317                            1,
318                            MPI_UNSIGNED,
319                            inner_face.first,
320                            700 + my_domain,
321                            &othersize,
322                            1,
323                            MPI_UNSIGNED,
324                            inner_face.first,
325                            700 + inner_face.first,
326                            comm,
327                            &status);
328               AssertDimension(mysize, othersize);
329               mysize = inner_face.second.n_hanging_faces_larger_subdomain;
330               MPI_Sendrecv(&mysize,
331                            1,
332                            MPI_UNSIGNED,
333                            inner_face.first,
334                            800 + my_domain,
335                            &othersize,
336                            1,
337                            MPI_UNSIGNED,
338                            inner_face.first,
339                            800 + inner_face.first,
340                            comm,
341                            &status);
342               AssertDimension(mysize, othersize);
343 #  endif
344 
345               // Arrange the face "ownership" such that cells that are access
346               // by more than one face (think of a cell in a corner) get
347               // ghosted. This arrangement has the advantage that we need to
348               // send less data because the same data is used twice. The
349               // strategy applied here is to ensure the same order of face
350               // pairs on both processors that share some faces, and make the
351               // same decision on both sides.
352 
353               // Create a vector with cell ids sorted over the processor with
354               // the larger rank. In the code below we need to be able to
355               // identify the same cell once for the processor with higher
356               // rank and once for the processor with the lower rank. The
357               // format for the processor with the higher rank is already
358               // contained in `shared_faces`, whereas we need a copy that we
359               // sort differently for the other way around.
360               std::vector<std::tuple<CellId, CellId, unsigned int>> other_range(
361                 inner_face.second.shared_faces.size());
362               for (unsigned int i = 0; i < other_range.size(); ++i)
363                 other_range[i] =
364                   std::make_tuple(inner_face.second.shared_faces[i].second,
365                                   inner_face.second.shared_faces[i].first,
366                                   i);
367               std::sort(other_range.begin(), other_range.end());
368 
369               // the vector 'assignment' sets whether a particular cell
370               // appears more often and acts as a pre-selection of the rank. A
371               // value of 1 means that the process with the higher rank gets
372               // those faces, a value -1 means that the process with the lower
373               // rank gets it, whereas a value 0 means that the decision can
374               // be made in an arbitrary way.
375               unsigned int      n_faces_lower_proc = 0, n_faces_higher_proc = 0;
376               std::vector<char> assignment(other_range.size(), 0);
377               if (inner_face.second.shared_faces.size() > 0)
378                 {
379                   // identify faces that go to the processor with the higher
380                   // rank
381                   unsigned int count = 0;
382                   for (unsigned int i = 1;
383                        i < inner_face.second.shared_faces.size();
384                        ++i)
385                     if (inner_face.second.shared_faces[i].first ==
386                         inner_face.second.shared_faces[i - 1 - count].first)
387                       ++count;
388                     else
389                       {
390                         AssertThrow(count < 2 * dim, ExcInternalError());
391                         if (count > 0)
392                           {
393                             for (unsigned int k = 0; k <= count; ++k)
394                               assignment[i - 1 - k] = 1;
395                             n_faces_higher_proc += count + 1;
396                           }
397                         count = 0;
398                       }
399 
400                   // identify faces that definitely go to the processor with
401                   // the lower rank - this must use the sorting of CellId
402                   // variables from the processor with the higher rank, i.e.,
403                   // other_range rather than `shared_faces`.
404                   count = 0;
405                   for (unsigned int i = 1; i < other_range.size(); ++i)
406                     if (std::get<0>(other_range[i]) ==
407                         std::get<0>(other_range[i - 1 - count]))
408                       ++count;
409                     else
410                       {
411                         AssertThrow(count < 2 * dim, ExcInternalError());
412                         if (count > 0)
413                           {
414                             for (unsigned int k = 0; k <= count; ++k)
415                               {
416                                 Assert(inner_face.second
417                                            .shared_faces[std::get<2>(
418                                              other_range[i - 1])]
419                                            .second ==
420                                          inner_face.second
421                                            .shared_faces[std::get<2>(
422                                              other_range[i - 1 - k])]
423                                            .second,
424                                        ExcInternalError());
425                                 // only assign to -1 if higher rank was not
426                                 // yet set
427                                 if (assignment[std::get<2>(
428                                       other_range[i - 1 - k])] == 0)
429                                   {
430                                     assignment[std::get<2>(
431                                       other_range[i - 1 - k])] = -1;
432                                     ++n_faces_lower_proc;
433                                   }
434                               }
435                           }
436                         count = 0;
437                       }
438                 }
439 
440 
441               // divide the faces evenly between the two processors. the
442               // processor with small rank takes the first half, the processor
443               // with larger rank the second half. Adjust for the hanging
444               // faces that always get assigned to one side, and the faces we
445               // have already assigned due to the criterion above
446               n_faces_lower_proc +=
447                 inner_face.second.n_hanging_faces_smaller_subdomain;
448               n_faces_higher_proc +=
449                 inner_face.second.n_hanging_faces_larger_subdomain;
450               const unsigned int n_total_faces_at_proc_boundary =
451                 (inner_face.second.shared_faces.size() +
452                  inner_face.second.n_hanging_faces_smaller_subdomain +
453                  inner_face.second.n_hanging_faces_larger_subdomain);
454               unsigned int split_index = n_total_faces_at_proc_boundary / 2;
455               if (split_index < n_faces_lower_proc)
456                 split_index = 0;
457               else if (split_index <
458                        n_total_faces_at_proc_boundary - n_faces_higher_proc)
459                 split_index -= n_faces_lower_proc;
460               else
461                 split_index = n_total_faces_at_proc_boundary -
462                               n_faces_higher_proc - n_faces_lower_proc;
463 
464                 // make sure the splitting is consistent between both sides
465 #  if defined(DEAL_II_WITH_MPI) && defined(DEBUG)
466               MPI_Sendrecv(&split_index,
467                            1,
468                            MPI_UNSIGNED,
469                            inner_face.first,
470                            900 + my_domain,
471                            &othersize,
472                            1,
473                            MPI_UNSIGNED,
474                            inner_face.first,
475                            900 + inner_face.first,
476                            comm,
477                            &status);
478               AssertDimension(split_index, othersize);
479               MPI_Sendrecv(&n_faces_lower_proc,
480                            1,
481                            MPI_UNSIGNED,
482                            inner_face.first,
483                            1000 + my_domain,
484                            &othersize,
485                            1,
486                            MPI_UNSIGNED,
487                            inner_face.first,
488                            1000 + inner_face.first,
489                            comm,
490                            &status);
491               AssertDimension(n_faces_lower_proc, othersize);
492               MPI_Sendrecv(&n_faces_higher_proc,
493                            1,
494                            MPI_UNSIGNED,
495                            inner_face.first,
496                            1100 + my_domain,
497                            &othersize,
498                            1,
499                            MPI_UNSIGNED,
500                            inner_face.first,
501                            1100 + inner_face.first,
502                            comm,
503                            &status);
504               AssertDimension(n_faces_higher_proc, othersize);
505 #  endif
506 
507               // collect the faces on both sides
508               std::vector<std::pair<CellId, CellId>> owned_faces_lower,
509                 owned_faces_higher;
510               for (unsigned int i = 0; i < assignment.size(); ++i)
511                 if (assignment[i] < 0)
512                   owned_faces_lower.push_back(
513                     inner_face.second.shared_faces[i]);
514                 else if (assignment[i] > 0)
515                   owned_faces_higher.push_back(
516                     inner_face.second.shared_faces[i]);
517               AssertIndexRange(split_index,
518                                inner_face.second.shared_faces.size() + 1 -
519                                  owned_faces_lower.size() -
520                                  owned_faces_higher.size());
521 
522               unsigned int i = 0, c = 0;
523               for (; i < assignment.size() && c < split_index; ++i)
524                 if (assignment[i] == 0)
525                   {
526                     owned_faces_lower.push_back(
527                       inner_face.second.shared_faces[i]);
528                     ++c;
529                   }
530               for (; i < assignment.size(); ++i)
531                 if (assignment[i] == 0)
532                   {
533                     owned_faces_higher.push_back(
534                       inner_face.second.shared_faces[i]);
535                   }
536 
537 #  ifdef DEBUG
538               // check consistency of faces on both sides
539               std::vector<std::pair<CellId, CellId>> check_faces;
540               check_faces.insert(check_faces.end(),
541                                  owned_faces_lower.begin(),
542                                  owned_faces_lower.end());
543               check_faces.insert(check_faces.end(),
544                                  owned_faces_higher.begin(),
545                                  owned_faces_higher.end());
546               std::sort(check_faces.begin(), check_faces.end());
547               AssertDimension(check_faces.size(),
548                               inner_face.second.shared_faces.size());
549               for (unsigned int i = 0; i < check_faces.size(); ++i)
550                 Assert(check_faces[i] == inner_face.second.shared_faces[i],
551                        ExcInternalError());
552 #  endif
553 
554               // now only set half of the faces as the ones to keep
555               if (my_domain < inner_face.first)
556                 inner_face.second.shared_faces.swap(owned_faces_lower);
557               else
558                 inner_face.second.shared_faces.swap(owned_faces_higher);
559 
560               std::sort(inner_face.second.shared_faces.begin(),
561                         inner_face.second.shared_faces.end());
562             }
563         }
564 
565       // fill in the additional cells that we need access to via ghosting to
566       // cell_levels
567       std::set<std::pair<unsigned int, unsigned int>> ghost_cells;
568       for (unsigned int i = 0; i < cell_levels.size(); ++i)
569         {
570           typename dealii::Triangulation<dim>::cell_iterator dcell(
571             &triangulation, cell_levels[i].first, cell_levels[i].second);
572           if (use_active_cells)
573             Assert(dcell->is_active(), ExcNotImplemented());
574           for (const auto f : dcell->face_indices())
575             {
576               if (dcell->at_boundary(f) && !dcell->has_periodic_neighbor(f))
577                 face_is_owned[dcell->face(f)->index()] =
578                   FaceCategory::locally_active_at_boundary;
579 
580               // treat boundaries of cells of different refinement level
581               // inside the domain in case of multigrid separately
582               else if ((dcell->at_boundary(f) == false ||
583                         dcell->has_periodic_neighbor(f)) &&
584                        mg_level != numbers::invalid_unsigned_int &&
585                        dcell->neighbor_or_periodic_neighbor(f)->level() <
586                          dcell->level())
587                 {
588                   face_is_owned[dcell->face(f)->index()] =
589                     FaceCategory::multigrid_refinement_edge;
590                 }
591               else
592                 {
593                   typename dealii::Triangulation<dim>::cell_iterator neighbor =
594                     dcell->neighbor_or_periodic_neighbor(f);
595 
596                   // neighbor is refined -> face will be treated by neighbor
597                   if (use_active_cells && neighbor->has_children() &&
598                       hold_all_faces_to_owned_cells == false)
599                     continue;
600 
601                   bool add_to_ghost = false;
602                   const types::subdomain_id
603                     id1 = use_active_cells ? dcell->subdomain_id() :
604                                              dcell->level_subdomain_id(),
605                     id2 = use_active_cells ?
606                             (neighbor->has_children() ?
607                                dcell->neighbor_child_on_subface(f, 0)
608                                  ->subdomain_id() :
609                                neighbor->subdomain_id()) :
610                             neighbor->level_subdomain_id();
611 
612                   // Check whether the current face should be processed
613                   // locally (instead of being processed from the other
614                   // side). We process a face locally when we are more refined
615                   // (in the active cell case) or when the face is listed in
616                   // the `shared_faces` data structure that we built above.
617                   if ((id1 == id2 &&
618                        (use_active_cells == false || neighbor->is_active())) ||
619                       dcell->level() > neighbor->level() ||
620                       std::binary_search(
621                         inner_faces_at_proc_boundary[id2].shared_faces.begin(),
622                         inner_faces_at_proc_boundary[id2].shared_faces.end(),
623                         std::make_pair(id1 < id2 ? dcell->id() : neighbor->id(),
624                                        id1 < id2 ? neighbor->id() :
625                                                    dcell->id())))
626                     {
627                       face_is_owned[dcell->face(f)->index()] =
628                         FaceCategory::locally_active_done_here;
629                       if (dcell->level() == neighbor->level())
630                         face_is_owned
631                           [neighbor
632                              ->face(dcell->has_periodic_neighbor(f) ?
633                                       dcell->periodic_neighbor_face_no(f) :
634                                       dcell->neighbor_face_no(f))
635                              ->index()] =
636                             FaceCategory::locally_active_done_here;
637 
638                       // If neighbor is a ghost element (i.e.
639                       // dcell->subdomain_id !
640                       // dcell->neighbor(f)->subdomain_id()), we need to add its
641                       // index into cell level list.
642                       if (use_active_cells)
643                         add_to_ghost =
644                           (dcell->subdomain_id() != neighbor->subdomain_id());
645                       else
646                         add_to_ghost = (dcell->level_subdomain_id() !=
647                                         neighbor->level_subdomain_id());
648                     }
649                   else if (hold_all_faces_to_owned_cells == true)
650                     {
651                       // add all cells to ghost layer...
652                       face_is_owned[dcell->face(f)->index()] =
653                         FaceCategory::ghosted;
654                       if (use_active_cells)
655                         {
656                           if (neighbor->has_children())
657                             for (unsigned int s = 0;
658                                  s < dcell->face(f)->n_children();
659                                  ++s)
660                               if (dcell->at_boundary(f))
661                                 {
662                                   if (dcell
663                                         ->periodic_neighbor_child_on_subface(f,
664                                                                              s)
665                                         ->subdomain_id() !=
666                                       dcell->subdomain_id())
667                                     add_to_ghost = true;
668                                 }
669                               else
670                                 {
671                                   if (dcell->neighbor_child_on_subface(f, s)
672                                         ->subdomain_id() !=
673                                       dcell->subdomain_id())
674                                     add_to_ghost = true;
675                                 }
676                           else
677                             add_to_ghost = (dcell->subdomain_id() !=
678                                             neighbor->subdomain_id());
679                         }
680                       else
681                         add_to_ghost = (dcell->level_subdomain_id() !=
682                                         neighbor->level_subdomain_id());
683                     }
684 
685                   if (add_to_ghost)
686                     {
687                       if (use_active_cells && neighbor->has_children())
688                         for (unsigned int s = 0;
689                              s < dcell->face(f)->n_children();
690                              ++s)
691                           {
692                             typename dealii::Triangulation<dim>::cell_iterator
693                               neighbor_child =
694                                 dcell->at_boundary(f) ?
695                                   dcell->periodic_neighbor_child_on_subface(f,
696                                                                             s) :
697                                   dcell->neighbor_child_on_subface(f, s);
698                             if (neighbor_child->subdomain_id() !=
699                                 dcell->subdomain_id())
700                               ghost_cells.insert(
701                                 std::pair<unsigned int, unsigned int>(
702                                   neighbor_child->level(),
703                                   neighbor_child->index()));
704                           }
705                       else
706                         ghost_cells.insert(
707                           std::pair<unsigned int, unsigned int>(
708                             neighbor->level(), neighbor->index()));
709                       at_processor_boundary[i] = true;
710                     }
711                 }
712             }
713         }
714 
715       // step 2: append the ghost cells at the end of the locally owned
716       // cells
717       for (const auto &ghost_cell : ghost_cells)
718         cell_levels.push_back(ghost_cell);
719     }
720 
721 
722 
723     template <int dim>
724     void
generate_faces(const dealii::Triangulation<dim> & triangulation,const std::vector<std::pair<unsigned int,unsigned int>> & cell_levels,TaskInfo & task_info)725     FaceSetup<dim>::generate_faces(
726       const dealii::Triangulation<dim> &                        triangulation,
727       const std::vector<std::pair<unsigned int, unsigned int>> &cell_levels,
728       TaskInfo &                                                task_info)
729     {
730       // step 1: create the inverse map between cell iterators and the
731       // cell_level_index field
732       std::map<std::pair<unsigned int, unsigned int>, unsigned int>
733         map_to_vectorized;
734       for (unsigned int cell = 0; cell < cell_levels.size(); ++cell)
735         if (cell == 0 || cell_levels[cell] != cell_levels[cell - 1])
736           {
737             typename dealii::Triangulation<dim>::cell_iterator dcell(
738               &triangulation,
739               cell_levels[cell].first,
740               cell_levels[cell].second);
741             std::pair<unsigned int, unsigned int> level_index(dcell->level(),
742                                                               dcell->index());
743             map_to_vectorized[level_index] = cell;
744           }
745 
746       // step 2: fill the information about inner faces and boundary faces
747       const unsigned int vectorization_length = task_info.vectorization_length;
748       task_info.face_partition_data.resize(
749         task_info.cell_partition_data.size() - 1, 0);
750       task_info.boundary_partition_data.resize(
751         task_info.cell_partition_data.size() - 1, 0);
752       std::vector<unsigned char> face_visited(face_is_owned.size(), 0);
753       for (unsigned int partition = 0;
754            partition < task_info.cell_partition_data.size() - 2;
755            ++partition)
756         {
757           unsigned int boundary_counter = 0;
758           unsigned int inner_counter    = 0;
759           for (unsigned int cell = task_info.cell_partition_data[partition] *
760                                    vectorization_length;
761                cell < task_info.cell_partition_data[partition + 1] *
762                         vectorization_length;
763                ++cell)
764             if (cell == 0 || cell_levels[cell] != cell_levels[cell - 1])
765               {
766                 typename dealii::Triangulation<dim>::cell_iterator dcell(
767                   &triangulation,
768                   cell_levels[cell].first,
769                   cell_levels[cell].second);
770                 for (const auto f : dcell->face_indices())
771                   {
772                     // boundary face
773                     if (face_is_owned[dcell->face(f)->index()] ==
774                         FaceCategory::locally_active_at_boundary)
775                       {
776                         Assert(dcell->at_boundary(f), ExcInternalError());
777                         ++boundary_counter;
778                         FaceToCellTopology<1> info;
779                         info.cells_interior[0] = cell;
780                         info.cells_exterior[0] = numbers::invalid_unsigned_int;
781                         info.interior_face_no  = f;
782                         info.exterior_face_no  = dcell->face(f)->boundary_id();
783                         info.subface_index =
784                           GeometryInfo<dim>::max_children_per_cell;
785                         info.face_orientation = 0;
786                         boundary_faces.push_back(info);
787 
788                         face_visited[dcell->face(f)->index()]++;
789                       }
790                     // interior face, including faces over periodic boundaries
791                     else
792                       {
793                         typename dealii::Triangulation<dim>::cell_iterator
794                           neighbor = dcell->neighbor_or_periodic_neighbor(f);
795                         if (use_active_cells && neighbor->has_children())
796                           {
797                             for (unsigned int c = 0;
798                                  c < dcell->face(f)->n_children();
799                                  ++c)
800                               {
801                                 typename dealii::Triangulation<
802                                   dim>::cell_iterator neighbor_c =
803                                   dcell->at_boundary(f) ?
804                                     dcell->periodic_neighbor_child_on_subface(
805                                       f, c) :
806                                     dcell->neighbor_child_on_subface(f, c);
807                                 const types::subdomain_id neigh_domain =
808                                   neighbor_c->subdomain_id();
809                                 const unsigned int neighbor_face_no =
810                                   dcell->has_periodic_neighbor(f) ?
811                                     dcell->periodic_neighbor_face_no(f) :
812                                     dcell->neighbor_face_no(f);
813                                 if (neigh_domain != dcell->subdomain_id() ||
814                                     face_visited
815                                         [dcell->face(f)->child(c)->index()] ==
816                                       1)
817                                   {
818                                     std::pair<unsigned int, unsigned int>
819                                       level_index(neighbor_c->level(),
820                                                   neighbor_c->index());
821                                     if (face_is_owned
822                                           [dcell->face(f)->child(c)->index()] ==
823                                         FaceCategory::locally_active_done_here)
824                                       {
825                                         ++inner_counter;
826                                         inner_faces.push_back(create_face(
827                                           neighbor_face_no,
828                                           neighbor_c,
829                                           map_to_vectorized[level_index],
830                                           dcell,
831                                           cell));
832                                       }
833                                     else if (face_is_owned[dcell->face(f)
834                                                              ->child(c)
835                                                              ->index()] ==
836                                              FaceCategory::ghosted)
837                                       {
838                                         inner_ghost_faces.push_back(create_face(
839                                           neighbor_face_no,
840                                           neighbor_c,
841                                           map_to_vectorized[level_index],
842                                           dcell,
843                                           cell));
844                                       }
845                                     else
846                                       Assert(
847                                         face_is_owned[dcell->face(f)
848                                                         ->index()] ==
849                                             FaceCategory::
850                                               locally_active_done_elsewhere ||
851                                           face_is_owned[dcell->face(f)
852                                                           ->index()] ==
853                                             FaceCategory::ghosted,
854                                         ExcInternalError());
855                                   }
856                                 else
857                                   {
858                                     face_visited
859                                       [dcell->face(f)->child(c)->index()] = 1;
860                                   }
861                               }
862                           }
863                         else
864                           {
865                             const types::subdomain_id my_domain =
866                               use_active_cells ? dcell->subdomain_id() :
867                                                  dcell->level_subdomain_id();
868                             const types::subdomain_id neigh_domain =
869                               use_active_cells ? neighbor->subdomain_id() :
870                                                  neighbor->level_subdomain_id();
871                             if (neigh_domain != my_domain ||
872                                 face_visited[dcell->face(f)->index()] == 1)
873                               {
874                                 std::pair<unsigned int, unsigned int>
875                                   level_index(neighbor->level(),
876                                               neighbor->index());
877                                 if (face_is_owned[dcell->face(f)->index()] ==
878                                     FaceCategory::locally_active_done_here)
879                                   {
880                                     Assert(use_active_cells ||
881                                              dcell->level() ==
882                                                neighbor->level(),
883                                            ExcInternalError());
884                                     ++inner_counter;
885                                     inner_faces.push_back(create_face(
886                                       f,
887                                       dcell,
888                                       cell,
889                                       neighbor,
890                                       map_to_vectorized[level_index]));
891                                   }
892                                 else if (face_is_owned[dcell->face(f)
893                                                          ->index()] ==
894                                          FaceCategory::ghosted)
895                                   {
896                                     inner_ghost_faces.push_back(create_face(
897                                       f,
898                                       dcell,
899                                       cell,
900                                       neighbor,
901                                       map_to_vectorized[level_index]));
902                                   }
903                               }
904                             else
905                               {
906                                 face_visited[dcell->face(f)->index()] = 1;
907                                 if (dcell->has_periodic_neighbor(f))
908                                   face_visited
909                                     [neighbor
910                                        ->face(
911                                          dcell->periodic_neighbor_face_no(f))
912                                        ->index()] = 1;
913                               }
914                             if (face_is_owned[dcell->face(f)->index()] ==
915                                 FaceCategory::multigrid_refinement_edge)
916                               {
917                                 refinement_edge_faces.push_back(
918                                   create_face(f,
919                                               dcell,
920                                               cell,
921                                               neighbor,
922                                               refinement_edge_faces.size()));
923                               }
924                           }
925                       }
926                   }
927               }
928           task_info.face_partition_data[partition + 1] =
929             task_info.face_partition_data[partition] + inner_counter;
930           task_info.boundary_partition_data[partition + 1] =
931             task_info.boundary_partition_data[partition] + boundary_counter;
932         }
933       task_info.ghost_face_partition_data.resize(2);
934       task_info.ghost_face_partition_data[0] = 0;
935       task_info.ghost_face_partition_data[1] = inner_ghost_faces.size();
936       task_info.refinement_edge_face_partition_data.resize(2);
937       task_info.refinement_edge_face_partition_data[0] = 0;
938       task_info.refinement_edge_face_partition_data[1] =
939         refinement_edge_faces.size();
940     }
941 
942 
943 
944     template <int dim>
945     FaceToCellTopology<1>
create_face(const unsigned int face_no,const typename dealii::Triangulation<dim>::cell_iterator & cell,const unsigned int number_cell_interior,const typename dealii::Triangulation<dim>::cell_iterator & neighbor,const unsigned int number_cell_exterior)946     FaceSetup<dim>::create_face(
947       const unsigned int                                        face_no,
948       const typename dealii::Triangulation<dim>::cell_iterator &cell,
949       const unsigned int number_cell_interior,
950       const typename dealii::Triangulation<dim>::cell_iterator &neighbor,
951       const unsigned int number_cell_exterior)
952     {
953       FaceToCellTopology<1> info;
954       info.cells_interior[0] = number_cell_interior;
955       info.cells_exterior[0] = number_cell_exterior;
956       info.interior_face_no  = face_no;
957       if (cell->has_periodic_neighbor(face_no))
958         info.exterior_face_no = cell->periodic_neighbor_face_no(face_no);
959       else
960         info.exterior_face_no = cell->neighbor_face_no(face_no);
961 
962       info.subface_index = GeometryInfo<dim>::max_children_per_cell;
963       Assert(neighbor->level() <= cell->level(), ExcInternalError());
964       if (cell->level() > neighbor->level())
965         {
966           if (cell->has_periodic_neighbor(face_no))
967             info.subface_index =
968               cell->periodic_neighbor_of_coarser_periodic_neighbor(face_no)
969                 .second;
970           else
971             info.subface_index =
972               cell->neighbor_of_coarser_neighbor(face_no).second;
973         }
974 
975       // special treatment of periodic boundaries
976       if (dim == 3 && cell->has_periodic_neighbor(face_no))
977         {
978           const unsigned int exterior_face_orientation =
979             !cell->get_triangulation()
980                .get_periodic_face_map()
981                .at({cell, face_no})
982                .second[0] +
983             2 * cell->get_triangulation()
984                   .get_periodic_face_map()
985                   .at({cell, face_no})
986                   .second[1] +
987             4 * cell->get_triangulation()
988                   .get_periodic_face_map()
989                   .at({cell, face_no})
990                   .second[2];
991 
992           info.face_orientation = exterior_face_orientation;
993 
994           return info;
995         }
996 
997       info.face_orientation = 0;
998       const unsigned int interior_face_orientation =
999         !cell->face_orientation(face_no) + 2 * cell->face_flip(face_no) +
1000         4 * cell->face_rotation(face_no);
1001       const unsigned int exterior_face_orientation =
1002         !neighbor->face_orientation(info.exterior_face_no) +
1003         2 * neighbor->face_flip(info.exterior_face_no) +
1004         4 * neighbor->face_rotation(info.exterior_face_no);
1005       if (interior_face_orientation != 0)
1006         {
1007           info.face_orientation = 8 + interior_face_orientation;
1008           Assert(exterior_face_orientation == 0,
1009                  ExcMessage(
1010                    "Face seems to be wrongly oriented from both sides"));
1011         }
1012       else
1013         info.face_orientation = exterior_face_orientation;
1014       return info;
1015     }
1016 
1017 
1018 
1019     /**
1020      * This simple comparison for collect_faces_vectorization() identifies
1021      * faces of the same type, i.e., where all of the interior and exterior
1022      * face number, subface index and orientation are the same. This is used
1023      * to batch similar faces together for vectorization.
1024      */
1025     inline bool
compare_faces_for_vectorization(const FaceToCellTopology<1> & face1,const FaceToCellTopology<1> & face2)1026     compare_faces_for_vectorization(const FaceToCellTopology<1> &face1,
1027                                     const FaceToCellTopology<1> &face2)
1028     {
1029       if (face1.interior_face_no != face2.interior_face_no)
1030         return false;
1031       if (face1.exterior_face_no != face2.exterior_face_no)
1032         return false;
1033       if (face1.subface_index != face2.subface_index)
1034         return false;
1035       if (face1.face_orientation != face2.face_orientation)
1036         return false;
1037       return true;
1038     }
1039 
1040 
1041 
1042     /**
1043      * This comparator is used within collect_faces_vectorization() to create
1044      * a sorting of FaceToCellTopology objects based on their
1045      * identifiers. This is used to obtain a good data locality when
1046      * processing the face integrals.
1047      */
1048     template <int length>
1049     struct FaceComparator
1050     {
1051       bool
operatorFaceComparator1052       operator()(const FaceToCellTopology<length> &face1,
1053                  const FaceToCellTopology<length> &face2) const
1054       {
1055         for (unsigned int i = 0; i < length; ++i)
1056           if (face1.cells_interior[i] < face2.cells_interior[i])
1057             return true;
1058           else if (face1.cells_interior[i] > face2.cells_interior[i])
1059             return false;
1060         for (unsigned int i = 0; i < length; ++i)
1061           if (face1.cells_exterior[i] < face2.cells_exterior[i])
1062             return true;
1063           else if (face1.cells_exterior[i] > face2.cells_exterior[i])
1064             return false;
1065         if (face1.interior_face_no < face2.interior_face_no)
1066           return true;
1067         else if (face1.interior_face_no > face2.interior_face_no)
1068           return false;
1069         if (face1.exterior_face_no < face2.exterior_face_no)
1070           return true;
1071         else if (face1.exterior_face_no > face2.exterior_face_no)
1072           return false;
1073 
1074         // we do not need to check for subface_index and orientation because
1075         // those cannot be different if when all the other values are the
1076         // same.
1077         AssertDimension(face1.subface_index, face2.subface_index);
1078         AssertDimension(face1.face_orientation, face2.face_orientation);
1079 
1080         return false;
1081       }
1082     };
1083 
1084 
1085 
1086     template <int vectorization_width>
1087     void
collect_faces_vectorization(const std::vector<FaceToCellTopology<1>> & faces_in,const std::vector<bool> & hard_vectorization_boundary,std::vector<unsigned int> & face_partition_data,std::vector<FaceToCellTopology<vectorization_width>> & faces_out)1088     collect_faces_vectorization(
1089       const std::vector<FaceToCellTopology<1>> &faces_in,
1090       const std::vector<bool> &                 hard_vectorization_boundary,
1091       std::vector<unsigned int> &               face_partition_data,
1092       std::vector<FaceToCellTopology<vectorization_width>> &faces_out)
1093     {
1094       FaceToCellTopology<vectorization_width> macro_face;
1095       std::vector<std::vector<unsigned int>>  faces_type;
1096 
1097       unsigned int face_start = face_partition_data[0],
1098                    face_end   = face_partition_data[0];
1099 
1100       face_partition_data[0] = faces_out.size();
1101       for (unsigned int partition = 0;
1102            partition < face_partition_data.size() - 1;
1103            ++partition)
1104         {
1105           std::vector<std::vector<unsigned int>> new_faces_type;
1106 
1107           // start with the end point for the last partition
1108           face_start = face_end;
1109           face_end   = face_partition_data[partition + 1];
1110 
1111           // set the partitioner to the new vectorized lengths
1112           face_partition_data[partition + 1] = face_partition_data[partition];
1113 
1114           // loop over the faces in the current partition and reorder according
1115           // to the face type
1116           for (unsigned int face = face_start; face < face_end; ++face)
1117             {
1118               for (auto &face_type : faces_type)
1119                 {
1120                   // Compare current face with first face of type type
1121                   if (compare_faces_for_vectorization(faces_in[face],
1122                                                       faces_in[face_type[0]]))
1123                     {
1124                       face_type.push_back(face);
1125                       goto face_found;
1126                     }
1127                 }
1128               faces_type.emplace_back(1, face);
1129             face_found:
1130               {}
1131             }
1132 
1133           // insert new faces in sorted list to get good data locality
1134           std::set<FaceToCellTopology<vectorization_width>,
1135                    FaceComparator<vectorization_width>>
1136             new_faces;
1137           for (const auto &face_type : faces_type)
1138             {
1139               macro_face.interior_face_no =
1140                 faces_in[face_type[0]].interior_face_no;
1141               macro_face.exterior_face_no =
1142                 faces_in[face_type[0]].exterior_face_no;
1143               macro_face.subface_index = faces_in[face_type[0]].subface_index;
1144               macro_face.face_orientation =
1145                 faces_in[face_type[0]].face_orientation;
1146               unsigned int               no_faces = face_type.size();
1147               std::vector<unsigned char> touched(no_faces, 0);
1148 
1149               // do two passes through the data. The first is to identify
1150               // similar faces within the same index range as the cells which
1151               // will allow for vectorized read operations, the second picks up
1152               // all the rest
1153               unsigned int n_vectorized = 0;
1154               for (unsigned int f = 0; f < no_faces; ++f)
1155                 if (faces_in[face_type[f]].cells_interior[0] %
1156                       vectorization_width ==
1157                     0)
1158                   {
1159                     bool is_contiguous = true;
1160                     if (f + vectorization_width > no_faces)
1161                       is_contiguous = false;
1162                     else
1163                       for (unsigned int v = 1; v < vectorization_width; ++v)
1164                         if (faces_in[face_type[f + v]].cells_interior[0] !=
1165                             faces_in[face_type[f]].cells_interior[0] + v)
1166                           is_contiguous = false;
1167                     if (is_contiguous)
1168                       {
1169                         AssertIndexRange(f,
1170                                          face_type.size() -
1171                                            vectorization_width + 1);
1172                         for (unsigned int v = 0; v < vectorization_width; ++v)
1173                           {
1174                             macro_face.cells_interior[v] =
1175                               faces_in[face_type[f + v]].cells_interior[0];
1176                             macro_face.cells_exterior[v] =
1177                               faces_in[face_type[f + v]].cells_exterior[0];
1178                             touched[f + v] = 1;
1179                           }
1180                         new_faces.insert(macro_face);
1181                         f += vectorization_width - 1;
1182                         n_vectorized += vectorization_width;
1183                       }
1184                   }
1185 
1186               std::vector<unsigned int> untouched;
1187               untouched.reserve(no_faces - n_vectorized);
1188               for (unsigned int f = 0; f < no_faces; ++f)
1189                 if (touched[f] == 0)
1190                   untouched.push_back(f);
1191               unsigned int v = 0;
1192               for (const auto f : untouched)
1193                 {
1194                   macro_face.cells_interior[v] =
1195                     faces_in[face_type[f]].cells_interior[0];
1196                   macro_face.cells_exterior[v] =
1197                     faces_in[face_type[f]].cells_exterior[0];
1198                   ++v;
1199                   if (v == vectorization_width)
1200                     {
1201                       new_faces.insert(macro_face);
1202                       v = 0;
1203                     }
1204                 }
1205               if (v > 0 && v < vectorization_width)
1206                 {
1207                   // must add non-filled face
1208                   if (hard_vectorization_boundary[partition + 1] ||
1209                       partition == face_partition_data.size() - 2)
1210                     {
1211                       for (; v < vectorization_width; ++v)
1212                         {
1213                           // Dummy cell, not used
1214                           macro_face.cells_interior[v] =
1215                             numbers::invalid_unsigned_int;
1216                           macro_face.cells_exterior[v] =
1217                             numbers::invalid_unsigned_int;
1218                         }
1219                       new_faces.insert(macro_face);
1220                     }
1221                   else
1222                     {
1223                       // postpone to the next partition
1224                       std::vector<unsigned int> untreated(v);
1225                       for (unsigned int f = 0; f < v; ++f)
1226                         untreated[f] = face_type[*(untouched.end() - 1 - f)];
1227                       new_faces_type.push_back(untreated);
1228                     }
1229                 }
1230             }
1231 
1232           // insert sorted list to vector of faces
1233           for (auto it = new_faces.begin(); it != new_faces.end(); ++it)
1234             faces_out.push_back(*it);
1235           face_partition_data[partition + 1] += new_faces.size();
1236 
1237           // set the faces that were left over to faces_type for the next round
1238           faces_type = std::move(new_faces_type);
1239         }
1240 
1241 #  ifdef DEBUG
1242       // final safety checks
1243       for (const auto &face_type : faces_type)
1244         AssertDimension(face_type.size(), 0U);
1245 
1246       AssertDimension(faces_out.size(), face_partition_data.back());
1247       unsigned int nfaces = 0;
1248       for (unsigned int i = face_partition_data[0];
1249            i < face_partition_data.back();
1250            ++i)
1251         for (unsigned int v = 0; v < vectorization_width; ++v)
1252           nfaces +=
1253             (faces_out[i].cells_interior[v] != numbers::invalid_unsigned_int);
1254       AssertDimension(nfaces, faces_in.size());
1255 
1256       std::vector<std::pair<unsigned int, unsigned int>> in_faces, out_faces;
1257       for (const auto &face_in : faces_in)
1258         in_faces.emplace_back(face_in.cells_interior[0],
1259                               face_in.cells_exterior[0]);
1260       for (unsigned int i = face_partition_data[0];
1261            i < face_partition_data.back();
1262            ++i)
1263         for (unsigned int v = 0;
1264              v < vectorization_width &&
1265              faces_out[i].cells_interior[v] != numbers::invalid_unsigned_int;
1266              ++v)
1267           out_faces.emplace_back(faces_out[i].cells_interior[v],
1268                                  faces_out[i].cells_exterior[v]);
1269       std::sort(in_faces.begin(), in_faces.end());
1270       std::sort(out_faces.begin(), out_faces.end());
1271       AssertDimension(in_faces.size(), out_faces.size());
1272       for (unsigned int i = 0; i < in_faces.size(); ++i)
1273         {
1274           AssertDimension(in_faces[i].first, out_faces[i].first);
1275           AssertDimension(in_faces[i].second, out_faces[i].second);
1276         }
1277 #  endif
1278     }
1279 
1280 #endif // ifndef DOXYGEN
1281 
1282   } // namespace MatrixFreeFunctions
1283 } // namespace internal
1284 
1285 
1286 DEAL_II_NAMESPACE_CLOSE
1287 
1288 #endif
1289