1 // --------------------------------------------------------------------- 2 // 3 // Copyright (C) 2018 - 2020 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 // --------------------------------------------------------------------- 15 16 17 #ifndef dealii_face_setup_internal_h 18 #define dealii_face_setup_internal_h 19 20 #include <deal.II/base/config.h> 21 22 #include <deal.II/base/memory_consumption.h> 23 #include <deal.II/base/utilities.h> 24 25 #include <deal.II/distributed/tria_base.h> 26 27 #include <deal.II/grid/tria.h> 28 #include <deal.II/grid/tria_accessor.h> 29 30 #include <deal.II/matrix_free/face_info.h> 31 #include <deal.II/matrix_free/task_info.h> 32 33 #include <fstream> 34 35 36 DEAL_II_NAMESPACE_OPEN 37 38 39 namespace internal 40 { 41 namespace MatrixFreeFunctions 42 { 43 /** 44 * A struct that is used to represent a collection of faces of a process 45 * with one of its neighbor within the setup done in struct FaceInfo. 46 */ 47 struct FaceIdentifier 48 { FaceIdentifierFaceIdentifier49 FaceIdentifier() 50 : n_hanging_faces_smaller_subdomain(0) 51 , n_hanging_faces_larger_subdomain(0) 52 {} 53 54 std::vector<std::pair<CellId, CellId>> shared_faces; 55 unsigned int n_hanging_faces_smaller_subdomain; 56 unsigned int n_hanging_faces_larger_subdomain; 57 }; 58 59 60 61 /** 62 * A struct that extracts the faces relevant to a given set of cells, 63 * including the assignment of which of the two neighboring processors at 64 * a subdomain boundary with MPI should do the integration (from both 65 * sides). This data structure is used for the setup of the connectivity 66 * between faces and cells and for identification of the dof indices to be 67 * used for face integrals. 68 */ 69 template <int dim> 70 struct FaceSetup 71 { 72 FaceSetup(); 73 74 /** 75 * Perform the initial detection of faces before reading the indices on 76 * the cells. This does not add the faces yet but only decides on 77 * whether some of the faces should be considered for processing 78 * locally. 79 */ 80 void 81 initialize( 82 const dealii::Triangulation<dim> &triangulation, 83 const unsigned int mg_level, 84 const bool hold_all_faces_to_owned_cells, 85 std::vector<std::pair<unsigned int, unsigned int>> &cell_levels); 86 87 /** 88 * Upon completion of the dof indices, this function extracts the 89 * information relevant for FaceToCellTopology and categorizes the faces 90 * into interior faces, boundary faces, and ghost faces (not processed 91 * locally but adjacent to some of the cells present locally). 92 */ 93 void 94 generate_faces( 95 const dealii::Triangulation<dim> & triangulation, 96 const std::vector<std::pair<unsigned int, unsigned int>> &cell_levels, 97 TaskInfo & task_info); 98 99 /** 100 * Fills the information about the cell, the face number, and numbers 101 * within the plain array representation in MatrixFree into 102 * FaceToCellTopology (without vectorization, which is something applied 103 * later). 104 */ 105 FaceToCellTopology<1> 106 create_face( 107 const unsigned int face_no, 108 const typename dealii::Triangulation<dim>::cell_iterator &cell, 109 const unsigned int number_cell_interior, 110 const typename dealii::Triangulation<dim>::cell_iterator &neighbor, 111 const unsigned int number_cell_exterior); 112 113 bool use_active_cells; 114 115 /** 116 * A type that categorizes faces in the first initialize() function such 117 * that we can later get their correct value in generate_faces(). 118 */ 119 enum class FaceCategory : char 120 { 121 locally_active_at_boundary, 122 locally_active_done_here, 123 locally_active_done_elsewhere, 124 ghosted, 125 multigrid_refinement_edge 126 }; 127 128 std::vector<FaceCategory> face_is_owned; 129 std::vector<bool> at_processor_boundary; 130 std::vector<FaceToCellTopology<1>> inner_faces; 131 std::vector<FaceToCellTopology<1>> boundary_faces; 132 std::vector<FaceToCellTopology<1>> inner_ghost_faces; 133 std::vector<FaceToCellTopology<1>> refinement_edge_faces; 134 }; 135 136 137 138 /** 139 * Actually form the batches for vectorized execution of face integrals. 140 */ 141 template <int vectorization_width> 142 void 143 collect_faces_vectorization( 144 const std::vector<FaceToCellTopology<1>> &faces_in, 145 const std::vector<bool> & hard_vectorization_boundary, 146 std::vector<unsigned int> & face_partition_data, 147 std::vector<FaceToCellTopology<vectorization_width>> &faces_out); 148 149 150 151 /* -------------------------------------------------------------------- */ 152 153 #ifndef DOXYGEN 154 155 template <int dim> FaceSetup()156 FaceSetup<dim>::FaceSetup() 157 : use_active_cells(true) 158 {} 159 160 161 162 template <int dim> 163 void initialize(const dealii::Triangulation<dim> & triangulation,const unsigned int mg_level,const bool hold_all_faces_to_owned_cells,std::vector<std::pair<unsigned int,unsigned int>> & cell_levels)164 FaceSetup<dim>::initialize( 165 const dealii::Triangulation<dim> &triangulation, 166 const unsigned int mg_level, 167 const bool hold_all_faces_to_owned_cells, 168 std::vector<std::pair<unsigned int, unsigned int>> &cell_levels) 169 { 170 use_active_cells = mg_level == numbers::invalid_unsigned_int; 171 172 # ifdef DEBUG 173 // safety check 174 if (use_active_cells) 175 for (const auto &cell_level : cell_levels) 176 { 177 typename dealii::Triangulation<dim>::cell_iterator dcell( 178 &triangulation, cell_level.first, cell_level.second); 179 Assert(dcell->is_active(), ExcInternalError()); 180 } 181 # endif 182 183 // step 1: add ghost cells for those cells that we identify as 184 // interesting 185 186 at_processor_boundary.resize(cell_levels.size(), false); 187 face_is_owned.resize(dim > 1 ? triangulation.n_raw_faces() : 188 triangulation.n_vertices(), 189 FaceCategory::locally_active_done_elsewhere); 190 191 // go through the mesh and divide the faces on the processor 192 // boundaries as evenly as possible between the processors 193 std::map<types::subdomain_id, FaceIdentifier> 194 inner_faces_at_proc_boundary; 195 if (triangulation.locally_owned_subdomain() != 196 numbers::invalid_subdomain_id) 197 { 198 const types::subdomain_id my_domain = 199 triangulation.locally_owned_subdomain(); 200 for (unsigned int i = 0; i < cell_levels.size(); ++i) 201 { 202 if (i > 0 && cell_levels[i] == cell_levels[i - 1]) 203 continue; 204 typename dealii::Triangulation<dim>::cell_iterator dcell( 205 &triangulation, cell_levels[i].first, cell_levels[i].second); 206 for (const unsigned int f : dcell->face_indices()) 207 { 208 if (dcell->at_boundary(f) && !dcell->has_periodic_neighbor(f)) 209 continue; 210 typename dealii::Triangulation<dim>::cell_iterator neighbor = 211 dcell->neighbor_or_periodic_neighbor(f); 212 213 // faces at hanging nodes are always treated by the processor 214 // who owns the element on the fine side. but we need to count 215 // the number of inner faces in order to balance the remaining 216 // faces properly 217 const CellId id_mine = dcell->id(); 218 if (use_active_cells && neighbor->has_children()) 219 for (unsigned int c = 0; 220 c < (dcell->has_periodic_neighbor(f) ? 221 dcell->periodic_neighbor(f) 222 ->face(dcell->periodic_neighbor_face_no(f)) 223 ->n_children() : 224 dcell->face(f)->n_children()); 225 ++c) 226 { 227 typename dealii::Triangulation<dim>::cell_iterator 228 neighbor_c = 229 dcell->at_boundary(f) ? 230 dcell->periodic_neighbor_child_on_subface(f, c) : 231 dcell->neighbor_child_on_subface(f, c); 232 const types::subdomain_id neigh_domain = 233 neighbor_c->subdomain_id(); 234 if (my_domain < neigh_domain) 235 inner_faces_at_proc_boundary[neigh_domain] 236 .n_hanging_faces_larger_subdomain++; 237 else if (my_domain > neigh_domain) 238 inner_faces_at_proc_boundary[neigh_domain] 239 .n_hanging_faces_smaller_subdomain++; 240 } 241 else 242 { 243 const types::subdomain_id neigh_domain = 244 use_active_cells ? neighbor->subdomain_id() : 245 neighbor->level_subdomain_id(); 246 if (neighbor->level() < dcell->level() && 247 use_active_cells) 248 { 249 if (my_domain < neigh_domain) 250 inner_faces_at_proc_boundary[neigh_domain] 251 .n_hanging_faces_smaller_subdomain++; 252 else if (my_domain > neigh_domain) 253 inner_faces_at_proc_boundary[neigh_domain] 254 .n_hanging_faces_larger_subdomain++; 255 } 256 else if (neighbor->level() == dcell->level() && 257 my_domain != neigh_domain) 258 { 259 // always list the cell whose owner has the lower 260 // subdomain id first. this applies to both processors 261 // involved, so both processors will generate the same 262 // list that we will later order 263 const CellId id_neigh = neighbor->id(); 264 if (my_domain < neigh_domain) 265 inner_faces_at_proc_boundary[neigh_domain] 266 .shared_faces.emplace_back(id_mine, id_neigh); 267 else 268 inner_faces_at_proc_boundary[neigh_domain] 269 .shared_faces.emplace_back(id_neigh, id_mine); 270 } 271 } 272 } 273 } 274 275 // sort the cell ids related to each neighboring processor. This 276 // algorithm is symmetric so every processor combination should 277 // arrive here and no deadlock should be possible 278 for (auto &inner_face : inner_faces_at_proc_boundary) 279 { 280 Assert(inner_face.first != my_domain, 281 ExcInternalError("Should not send info to myself")); 282 std::sort(inner_face.second.shared_faces.begin(), 283 inner_face.second.shared_faces.end()); 284 inner_face.second.shared_faces.erase( 285 std::unique(inner_face.second.shared_faces.begin(), 286 inner_face.second.shared_faces.end()), 287 inner_face.second.shared_faces.end()); 288 289 // safety check: both involved processors should see the same list 290 // because the pattern of ghosting is symmetric. We test this by 291 // looking at the length of the lists of faces 292 # if defined(DEAL_II_WITH_MPI) && defined(DEBUG) 293 MPI_Comm comm = MPI_COMM_SELF; 294 if (const dealii::parallel::TriangulationBase<dim> *ptria = 295 dynamic_cast<const dealii::parallel::TriangulationBase<dim> 296 *>(&triangulation)) 297 comm = ptria->get_communicator(); 298 299 MPI_Status status; 300 unsigned int mysize = inner_face.second.shared_faces.size(); 301 unsigned int othersize = numbers::invalid_unsigned_int; 302 MPI_Sendrecv(&mysize, 303 1, 304 MPI_UNSIGNED, 305 inner_face.first, 306 600 + my_domain, 307 &othersize, 308 1, 309 MPI_UNSIGNED, 310 inner_face.first, 311 600 + inner_face.first, 312 comm, 313 &status); 314 AssertDimension(mysize, othersize); 315 mysize = inner_face.second.n_hanging_faces_smaller_subdomain; 316 MPI_Sendrecv(&mysize, 317 1, 318 MPI_UNSIGNED, 319 inner_face.first, 320 700 + my_domain, 321 &othersize, 322 1, 323 MPI_UNSIGNED, 324 inner_face.first, 325 700 + inner_face.first, 326 comm, 327 &status); 328 AssertDimension(mysize, othersize); 329 mysize = inner_face.second.n_hanging_faces_larger_subdomain; 330 MPI_Sendrecv(&mysize, 331 1, 332 MPI_UNSIGNED, 333 inner_face.first, 334 800 + my_domain, 335 &othersize, 336 1, 337 MPI_UNSIGNED, 338 inner_face.first, 339 800 + inner_face.first, 340 comm, 341 &status); 342 AssertDimension(mysize, othersize); 343 # endif 344 345 // Arrange the face "ownership" such that cells that are access 346 // by more than one face (think of a cell in a corner) get 347 // ghosted. This arrangement has the advantage that we need to 348 // send less data because the same data is used twice. The 349 // strategy applied here is to ensure the same order of face 350 // pairs on both processors that share some faces, and make the 351 // same decision on both sides. 352 353 // Create a vector with cell ids sorted over the processor with 354 // the larger rank. In the code below we need to be able to 355 // identify the same cell once for the processor with higher 356 // rank and once for the processor with the lower rank. The 357 // format for the processor with the higher rank is already 358 // contained in `shared_faces`, whereas we need a copy that we 359 // sort differently for the other way around. 360 std::vector<std::tuple<CellId, CellId, unsigned int>> other_range( 361 inner_face.second.shared_faces.size()); 362 for (unsigned int i = 0; i < other_range.size(); ++i) 363 other_range[i] = 364 std::make_tuple(inner_face.second.shared_faces[i].second, 365 inner_face.second.shared_faces[i].first, 366 i); 367 std::sort(other_range.begin(), other_range.end()); 368 369 // the vector 'assignment' sets whether a particular cell 370 // appears more often and acts as a pre-selection of the rank. A 371 // value of 1 means that the process with the higher rank gets 372 // those faces, a value -1 means that the process with the lower 373 // rank gets it, whereas a value 0 means that the decision can 374 // be made in an arbitrary way. 375 unsigned int n_faces_lower_proc = 0, n_faces_higher_proc = 0; 376 std::vector<char> assignment(other_range.size(), 0); 377 if (inner_face.second.shared_faces.size() > 0) 378 { 379 // identify faces that go to the processor with the higher 380 // rank 381 unsigned int count = 0; 382 for (unsigned int i = 1; 383 i < inner_face.second.shared_faces.size(); 384 ++i) 385 if (inner_face.second.shared_faces[i].first == 386 inner_face.second.shared_faces[i - 1 - count].first) 387 ++count; 388 else 389 { 390 AssertThrow(count < 2 * dim, ExcInternalError()); 391 if (count > 0) 392 { 393 for (unsigned int k = 0; k <= count; ++k) 394 assignment[i - 1 - k] = 1; 395 n_faces_higher_proc += count + 1; 396 } 397 count = 0; 398 } 399 400 // identify faces that definitely go to the processor with 401 // the lower rank - this must use the sorting of CellId 402 // variables from the processor with the higher rank, i.e., 403 // other_range rather than `shared_faces`. 404 count = 0; 405 for (unsigned int i = 1; i < other_range.size(); ++i) 406 if (std::get<0>(other_range[i]) == 407 std::get<0>(other_range[i - 1 - count])) 408 ++count; 409 else 410 { 411 AssertThrow(count < 2 * dim, ExcInternalError()); 412 if (count > 0) 413 { 414 for (unsigned int k = 0; k <= count; ++k) 415 { 416 Assert(inner_face.second 417 .shared_faces[std::get<2>( 418 other_range[i - 1])] 419 .second == 420 inner_face.second 421 .shared_faces[std::get<2>( 422 other_range[i - 1 - k])] 423 .second, 424 ExcInternalError()); 425 // only assign to -1 if higher rank was not 426 // yet set 427 if (assignment[std::get<2>( 428 other_range[i - 1 - k])] == 0) 429 { 430 assignment[std::get<2>( 431 other_range[i - 1 - k])] = -1; 432 ++n_faces_lower_proc; 433 } 434 } 435 } 436 count = 0; 437 } 438 } 439 440 441 // divide the faces evenly between the two processors. the 442 // processor with small rank takes the first half, the processor 443 // with larger rank the second half. Adjust for the hanging 444 // faces that always get assigned to one side, and the faces we 445 // have already assigned due to the criterion above 446 n_faces_lower_proc += 447 inner_face.second.n_hanging_faces_smaller_subdomain; 448 n_faces_higher_proc += 449 inner_face.second.n_hanging_faces_larger_subdomain; 450 const unsigned int n_total_faces_at_proc_boundary = 451 (inner_face.second.shared_faces.size() + 452 inner_face.second.n_hanging_faces_smaller_subdomain + 453 inner_face.second.n_hanging_faces_larger_subdomain); 454 unsigned int split_index = n_total_faces_at_proc_boundary / 2; 455 if (split_index < n_faces_lower_proc) 456 split_index = 0; 457 else if (split_index < 458 n_total_faces_at_proc_boundary - n_faces_higher_proc) 459 split_index -= n_faces_lower_proc; 460 else 461 split_index = n_total_faces_at_proc_boundary - 462 n_faces_higher_proc - n_faces_lower_proc; 463 464 // make sure the splitting is consistent between both sides 465 # if defined(DEAL_II_WITH_MPI) && defined(DEBUG) 466 MPI_Sendrecv(&split_index, 467 1, 468 MPI_UNSIGNED, 469 inner_face.first, 470 900 + my_domain, 471 &othersize, 472 1, 473 MPI_UNSIGNED, 474 inner_face.first, 475 900 + inner_face.first, 476 comm, 477 &status); 478 AssertDimension(split_index, othersize); 479 MPI_Sendrecv(&n_faces_lower_proc, 480 1, 481 MPI_UNSIGNED, 482 inner_face.first, 483 1000 + my_domain, 484 &othersize, 485 1, 486 MPI_UNSIGNED, 487 inner_face.first, 488 1000 + inner_face.first, 489 comm, 490 &status); 491 AssertDimension(n_faces_lower_proc, othersize); 492 MPI_Sendrecv(&n_faces_higher_proc, 493 1, 494 MPI_UNSIGNED, 495 inner_face.first, 496 1100 + my_domain, 497 &othersize, 498 1, 499 MPI_UNSIGNED, 500 inner_face.first, 501 1100 + inner_face.first, 502 comm, 503 &status); 504 AssertDimension(n_faces_higher_proc, othersize); 505 # endif 506 507 // collect the faces on both sides 508 std::vector<std::pair<CellId, CellId>> owned_faces_lower, 509 owned_faces_higher; 510 for (unsigned int i = 0; i < assignment.size(); ++i) 511 if (assignment[i] < 0) 512 owned_faces_lower.push_back( 513 inner_face.second.shared_faces[i]); 514 else if (assignment[i] > 0) 515 owned_faces_higher.push_back( 516 inner_face.second.shared_faces[i]); 517 AssertIndexRange(split_index, 518 inner_face.second.shared_faces.size() + 1 - 519 owned_faces_lower.size() - 520 owned_faces_higher.size()); 521 522 unsigned int i = 0, c = 0; 523 for (; i < assignment.size() && c < split_index; ++i) 524 if (assignment[i] == 0) 525 { 526 owned_faces_lower.push_back( 527 inner_face.second.shared_faces[i]); 528 ++c; 529 } 530 for (; i < assignment.size(); ++i) 531 if (assignment[i] == 0) 532 { 533 owned_faces_higher.push_back( 534 inner_face.second.shared_faces[i]); 535 } 536 537 # ifdef DEBUG 538 // check consistency of faces on both sides 539 std::vector<std::pair<CellId, CellId>> check_faces; 540 check_faces.insert(check_faces.end(), 541 owned_faces_lower.begin(), 542 owned_faces_lower.end()); 543 check_faces.insert(check_faces.end(), 544 owned_faces_higher.begin(), 545 owned_faces_higher.end()); 546 std::sort(check_faces.begin(), check_faces.end()); 547 AssertDimension(check_faces.size(), 548 inner_face.second.shared_faces.size()); 549 for (unsigned int i = 0; i < check_faces.size(); ++i) 550 Assert(check_faces[i] == inner_face.second.shared_faces[i], 551 ExcInternalError()); 552 # endif 553 554 // now only set half of the faces as the ones to keep 555 if (my_domain < inner_face.first) 556 inner_face.second.shared_faces.swap(owned_faces_lower); 557 else 558 inner_face.second.shared_faces.swap(owned_faces_higher); 559 560 std::sort(inner_face.second.shared_faces.begin(), 561 inner_face.second.shared_faces.end()); 562 } 563 } 564 565 // fill in the additional cells that we need access to via ghosting to 566 // cell_levels 567 std::set<std::pair<unsigned int, unsigned int>> ghost_cells; 568 for (unsigned int i = 0; i < cell_levels.size(); ++i) 569 { 570 typename dealii::Triangulation<dim>::cell_iterator dcell( 571 &triangulation, cell_levels[i].first, cell_levels[i].second); 572 if (use_active_cells) 573 Assert(dcell->is_active(), ExcNotImplemented()); 574 for (const auto f : dcell->face_indices()) 575 { 576 if (dcell->at_boundary(f) && !dcell->has_periodic_neighbor(f)) 577 face_is_owned[dcell->face(f)->index()] = 578 FaceCategory::locally_active_at_boundary; 579 580 // treat boundaries of cells of different refinement level 581 // inside the domain in case of multigrid separately 582 else if ((dcell->at_boundary(f) == false || 583 dcell->has_periodic_neighbor(f)) && 584 mg_level != numbers::invalid_unsigned_int && 585 dcell->neighbor_or_periodic_neighbor(f)->level() < 586 dcell->level()) 587 { 588 face_is_owned[dcell->face(f)->index()] = 589 FaceCategory::multigrid_refinement_edge; 590 } 591 else 592 { 593 typename dealii::Triangulation<dim>::cell_iterator neighbor = 594 dcell->neighbor_or_periodic_neighbor(f); 595 596 // neighbor is refined -> face will be treated by neighbor 597 if (use_active_cells && neighbor->has_children() && 598 hold_all_faces_to_owned_cells == false) 599 continue; 600 601 bool add_to_ghost = false; 602 const types::subdomain_id 603 id1 = use_active_cells ? dcell->subdomain_id() : 604 dcell->level_subdomain_id(), 605 id2 = use_active_cells ? 606 (neighbor->has_children() ? 607 dcell->neighbor_child_on_subface(f, 0) 608 ->subdomain_id() : 609 neighbor->subdomain_id()) : 610 neighbor->level_subdomain_id(); 611 612 // Check whether the current face should be processed 613 // locally (instead of being processed from the other 614 // side). We process a face locally when we are more refined 615 // (in the active cell case) or when the face is listed in 616 // the `shared_faces` data structure that we built above. 617 if ((id1 == id2 && 618 (use_active_cells == false || neighbor->is_active())) || 619 dcell->level() > neighbor->level() || 620 std::binary_search( 621 inner_faces_at_proc_boundary[id2].shared_faces.begin(), 622 inner_faces_at_proc_boundary[id2].shared_faces.end(), 623 std::make_pair(id1 < id2 ? dcell->id() : neighbor->id(), 624 id1 < id2 ? neighbor->id() : 625 dcell->id()))) 626 { 627 face_is_owned[dcell->face(f)->index()] = 628 FaceCategory::locally_active_done_here; 629 if (dcell->level() == neighbor->level()) 630 face_is_owned 631 [neighbor 632 ->face(dcell->has_periodic_neighbor(f) ? 633 dcell->periodic_neighbor_face_no(f) : 634 dcell->neighbor_face_no(f)) 635 ->index()] = 636 FaceCategory::locally_active_done_here; 637 638 // If neighbor is a ghost element (i.e. 639 // dcell->subdomain_id ! 640 // dcell->neighbor(f)->subdomain_id()), we need to add its 641 // index into cell level list. 642 if (use_active_cells) 643 add_to_ghost = 644 (dcell->subdomain_id() != neighbor->subdomain_id()); 645 else 646 add_to_ghost = (dcell->level_subdomain_id() != 647 neighbor->level_subdomain_id()); 648 } 649 else if (hold_all_faces_to_owned_cells == true) 650 { 651 // add all cells to ghost layer... 652 face_is_owned[dcell->face(f)->index()] = 653 FaceCategory::ghosted; 654 if (use_active_cells) 655 { 656 if (neighbor->has_children()) 657 for (unsigned int s = 0; 658 s < dcell->face(f)->n_children(); 659 ++s) 660 if (dcell->at_boundary(f)) 661 { 662 if (dcell 663 ->periodic_neighbor_child_on_subface(f, 664 s) 665 ->subdomain_id() != 666 dcell->subdomain_id()) 667 add_to_ghost = true; 668 } 669 else 670 { 671 if (dcell->neighbor_child_on_subface(f, s) 672 ->subdomain_id() != 673 dcell->subdomain_id()) 674 add_to_ghost = true; 675 } 676 else 677 add_to_ghost = (dcell->subdomain_id() != 678 neighbor->subdomain_id()); 679 } 680 else 681 add_to_ghost = (dcell->level_subdomain_id() != 682 neighbor->level_subdomain_id()); 683 } 684 685 if (add_to_ghost) 686 { 687 if (use_active_cells && neighbor->has_children()) 688 for (unsigned int s = 0; 689 s < dcell->face(f)->n_children(); 690 ++s) 691 { 692 typename dealii::Triangulation<dim>::cell_iterator 693 neighbor_child = 694 dcell->at_boundary(f) ? 695 dcell->periodic_neighbor_child_on_subface(f, 696 s) : 697 dcell->neighbor_child_on_subface(f, s); 698 if (neighbor_child->subdomain_id() != 699 dcell->subdomain_id()) 700 ghost_cells.insert( 701 std::pair<unsigned int, unsigned int>( 702 neighbor_child->level(), 703 neighbor_child->index())); 704 } 705 else 706 ghost_cells.insert( 707 std::pair<unsigned int, unsigned int>( 708 neighbor->level(), neighbor->index())); 709 at_processor_boundary[i] = true; 710 } 711 } 712 } 713 } 714 715 // step 2: append the ghost cells at the end of the locally owned 716 // cells 717 for (const auto &ghost_cell : ghost_cells) 718 cell_levels.push_back(ghost_cell); 719 } 720 721 722 723 template <int dim> 724 void generate_faces(const dealii::Triangulation<dim> & triangulation,const std::vector<std::pair<unsigned int,unsigned int>> & cell_levels,TaskInfo & task_info)725 FaceSetup<dim>::generate_faces( 726 const dealii::Triangulation<dim> & triangulation, 727 const std::vector<std::pair<unsigned int, unsigned int>> &cell_levels, 728 TaskInfo & task_info) 729 { 730 // step 1: create the inverse map between cell iterators and the 731 // cell_level_index field 732 std::map<std::pair<unsigned int, unsigned int>, unsigned int> 733 map_to_vectorized; 734 for (unsigned int cell = 0; cell < cell_levels.size(); ++cell) 735 if (cell == 0 || cell_levels[cell] != cell_levels[cell - 1]) 736 { 737 typename dealii::Triangulation<dim>::cell_iterator dcell( 738 &triangulation, 739 cell_levels[cell].first, 740 cell_levels[cell].second); 741 std::pair<unsigned int, unsigned int> level_index(dcell->level(), 742 dcell->index()); 743 map_to_vectorized[level_index] = cell; 744 } 745 746 // step 2: fill the information about inner faces and boundary faces 747 const unsigned int vectorization_length = task_info.vectorization_length; 748 task_info.face_partition_data.resize( 749 task_info.cell_partition_data.size() - 1, 0); 750 task_info.boundary_partition_data.resize( 751 task_info.cell_partition_data.size() - 1, 0); 752 std::vector<unsigned char> face_visited(face_is_owned.size(), 0); 753 for (unsigned int partition = 0; 754 partition < task_info.cell_partition_data.size() - 2; 755 ++partition) 756 { 757 unsigned int boundary_counter = 0; 758 unsigned int inner_counter = 0; 759 for (unsigned int cell = task_info.cell_partition_data[partition] * 760 vectorization_length; 761 cell < task_info.cell_partition_data[partition + 1] * 762 vectorization_length; 763 ++cell) 764 if (cell == 0 || cell_levels[cell] != cell_levels[cell - 1]) 765 { 766 typename dealii::Triangulation<dim>::cell_iterator dcell( 767 &triangulation, 768 cell_levels[cell].first, 769 cell_levels[cell].second); 770 for (const auto f : dcell->face_indices()) 771 { 772 // boundary face 773 if (face_is_owned[dcell->face(f)->index()] == 774 FaceCategory::locally_active_at_boundary) 775 { 776 Assert(dcell->at_boundary(f), ExcInternalError()); 777 ++boundary_counter; 778 FaceToCellTopology<1> info; 779 info.cells_interior[0] = cell; 780 info.cells_exterior[0] = numbers::invalid_unsigned_int; 781 info.interior_face_no = f; 782 info.exterior_face_no = dcell->face(f)->boundary_id(); 783 info.subface_index = 784 GeometryInfo<dim>::max_children_per_cell; 785 info.face_orientation = 0; 786 boundary_faces.push_back(info); 787 788 face_visited[dcell->face(f)->index()]++; 789 } 790 // interior face, including faces over periodic boundaries 791 else 792 { 793 typename dealii::Triangulation<dim>::cell_iterator 794 neighbor = dcell->neighbor_or_periodic_neighbor(f); 795 if (use_active_cells && neighbor->has_children()) 796 { 797 for (unsigned int c = 0; 798 c < dcell->face(f)->n_children(); 799 ++c) 800 { 801 typename dealii::Triangulation< 802 dim>::cell_iterator neighbor_c = 803 dcell->at_boundary(f) ? 804 dcell->periodic_neighbor_child_on_subface( 805 f, c) : 806 dcell->neighbor_child_on_subface(f, c); 807 const types::subdomain_id neigh_domain = 808 neighbor_c->subdomain_id(); 809 const unsigned int neighbor_face_no = 810 dcell->has_periodic_neighbor(f) ? 811 dcell->periodic_neighbor_face_no(f) : 812 dcell->neighbor_face_no(f); 813 if (neigh_domain != dcell->subdomain_id() || 814 face_visited 815 [dcell->face(f)->child(c)->index()] == 816 1) 817 { 818 std::pair<unsigned int, unsigned int> 819 level_index(neighbor_c->level(), 820 neighbor_c->index()); 821 if (face_is_owned 822 [dcell->face(f)->child(c)->index()] == 823 FaceCategory::locally_active_done_here) 824 { 825 ++inner_counter; 826 inner_faces.push_back(create_face( 827 neighbor_face_no, 828 neighbor_c, 829 map_to_vectorized[level_index], 830 dcell, 831 cell)); 832 } 833 else if (face_is_owned[dcell->face(f) 834 ->child(c) 835 ->index()] == 836 FaceCategory::ghosted) 837 { 838 inner_ghost_faces.push_back(create_face( 839 neighbor_face_no, 840 neighbor_c, 841 map_to_vectorized[level_index], 842 dcell, 843 cell)); 844 } 845 else 846 Assert( 847 face_is_owned[dcell->face(f) 848 ->index()] == 849 FaceCategory:: 850 locally_active_done_elsewhere || 851 face_is_owned[dcell->face(f) 852 ->index()] == 853 FaceCategory::ghosted, 854 ExcInternalError()); 855 } 856 else 857 { 858 face_visited 859 [dcell->face(f)->child(c)->index()] = 1; 860 } 861 } 862 } 863 else 864 { 865 const types::subdomain_id my_domain = 866 use_active_cells ? dcell->subdomain_id() : 867 dcell->level_subdomain_id(); 868 const types::subdomain_id neigh_domain = 869 use_active_cells ? neighbor->subdomain_id() : 870 neighbor->level_subdomain_id(); 871 if (neigh_domain != my_domain || 872 face_visited[dcell->face(f)->index()] == 1) 873 { 874 std::pair<unsigned int, unsigned int> 875 level_index(neighbor->level(), 876 neighbor->index()); 877 if (face_is_owned[dcell->face(f)->index()] == 878 FaceCategory::locally_active_done_here) 879 { 880 Assert(use_active_cells || 881 dcell->level() == 882 neighbor->level(), 883 ExcInternalError()); 884 ++inner_counter; 885 inner_faces.push_back(create_face( 886 f, 887 dcell, 888 cell, 889 neighbor, 890 map_to_vectorized[level_index])); 891 } 892 else if (face_is_owned[dcell->face(f) 893 ->index()] == 894 FaceCategory::ghosted) 895 { 896 inner_ghost_faces.push_back(create_face( 897 f, 898 dcell, 899 cell, 900 neighbor, 901 map_to_vectorized[level_index])); 902 } 903 } 904 else 905 { 906 face_visited[dcell->face(f)->index()] = 1; 907 if (dcell->has_periodic_neighbor(f)) 908 face_visited 909 [neighbor 910 ->face( 911 dcell->periodic_neighbor_face_no(f)) 912 ->index()] = 1; 913 } 914 if (face_is_owned[dcell->face(f)->index()] == 915 FaceCategory::multigrid_refinement_edge) 916 { 917 refinement_edge_faces.push_back( 918 create_face(f, 919 dcell, 920 cell, 921 neighbor, 922 refinement_edge_faces.size())); 923 } 924 } 925 } 926 } 927 } 928 task_info.face_partition_data[partition + 1] = 929 task_info.face_partition_data[partition] + inner_counter; 930 task_info.boundary_partition_data[partition + 1] = 931 task_info.boundary_partition_data[partition] + boundary_counter; 932 } 933 task_info.ghost_face_partition_data.resize(2); 934 task_info.ghost_face_partition_data[0] = 0; 935 task_info.ghost_face_partition_data[1] = inner_ghost_faces.size(); 936 task_info.refinement_edge_face_partition_data.resize(2); 937 task_info.refinement_edge_face_partition_data[0] = 0; 938 task_info.refinement_edge_face_partition_data[1] = 939 refinement_edge_faces.size(); 940 } 941 942 943 944 template <int dim> 945 FaceToCellTopology<1> create_face(const unsigned int face_no,const typename dealii::Triangulation<dim>::cell_iterator & cell,const unsigned int number_cell_interior,const typename dealii::Triangulation<dim>::cell_iterator & neighbor,const unsigned int number_cell_exterior)946 FaceSetup<dim>::create_face( 947 const unsigned int face_no, 948 const typename dealii::Triangulation<dim>::cell_iterator &cell, 949 const unsigned int number_cell_interior, 950 const typename dealii::Triangulation<dim>::cell_iterator &neighbor, 951 const unsigned int number_cell_exterior) 952 { 953 FaceToCellTopology<1> info; 954 info.cells_interior[0] = number_cell_interior; 955 info.cells_exterior[0] = number_cell_exterior; 956 info.interior_face_no = face_no; 957 if (cell->has_periodic_neighbor(face_no)) 958 info.exterior_face_no = cell->periodic_neighbor_face_no(face_no); 959 else 960 info.exterior_face_no = cell->neighbor_face_no(face_no); 961 962 info.subface_index = GeometryInfo<dim>::max_children_per_cell; 963 Assert(neighbor->level() <= cell->level(), ExcInternalError()); 964 if (cell->level() > neighbor->level()) 965 { 966 if (cell->has_periodic_neighbor(face_no)) 967 info.subface_index = 968 cell->periodic_neighbor_of_coarser_periodic_neighbor(face_no) 969 .second; 970 else 971 info.subface_index = 972 cell->neighbor_of_coarser_neighbor(face_no).second; 973 } 974 975 // special treatment of periodic boundaries 976 if (dim == 3 && cell->has_periodic_neighbor(face_no)) 977 { 978 const unsigned int exterior_face_orientation = 979 !cell->get_triangulation() 980 .get_periodic_face_map() 981 .at({cell, face_no}) 982 .second[0] + 983 2 * cell->get_triangulation() 984 .get_periodic_face_map() 985 .at({cell, face_no}) 986 .second[1] + 987 4 * cell->get_triangulation() 988 .get_periodic_face_map() 989 .at({cell, face_no}) 990 .second[2]; 991 992 info.face_orientation = exterior_face_orientation; 993 994 return info; 995 } 996 997 info.face_orientation = 0; 998 const unsigned int interior_face_orientation = 999 !cell->face_orientation(face_no) + 2 * cell->face_flip(face_no) + 1000 4 * cell->face_rotation(face_no); 1001 const unsigned int exterior_face_orientation = 1002 !neighbor->face_orientation(info.exterior_face_no) + 1003 2 * neighbor->face_flip(info.exterior_face_no) + 1004 4 * neighbor->face_rotation(info.exterior_face_no); 1005 if (interior_face_orientation != 0) 1006 { 1007 info.face_orientation = 8 + interior_face_orientation; 1008 Assert(exterior_face_orientation == 0, 1009 ExcMessage( 1010 "Face seems to be wrongly oriented from both sides")); 1011 } 1012 else 1013 info.face_orientation = exterior_face_orientation; 1014 return info; 1015 } 1016 1017 1018 1019 /** 1020 * This simple comparison for collect_faces_vectorization() identifies 1021 * faces of the same type, i.e., where all of the interior and exterior 1022 * face number, subface index and orientation are the same. This is used 1023 * to batch similar faces together for vectorization. 1024 */ 1025 inline bool compare_faces_for_vectorization(const FaceToCellTopology<1> & face1,const FaceToCellTopology<1> & face2)1026 compare_faces_for_vectorization(const FaceToCellTopology<1> &face1, 1027 const FaceToCellTopology<1> &face2) 1028 { 1029 if (face1.interior_face_no != face2.interior_face_no) 1030 return false; 1031 if (face1.exterior_face_no != face2.exterior_face_no) 1032 return false; 1033 if (face1.subface_index != face2.subface_index) 1034 return false; 1035 if (face1.face_orientation != face2.face_orientation) 1036 return false; 1037 return true; 1038 } 1039 1040 1041 1042 /** 1043 * This comparator is used within collect_faces_vectorization() to create 1044 * a sorting of FaceToCellTopology objects based on their 1045 * identifiers. This is used to obtain a good data locality when 1046 * processing the face integrals. 1047 */ 1048 template <int length> 1049 struct FaceComparator 1050 { 1051 bool operatorFaceComparator1052 operator()(const FaceToCellTopology<length> &face1, 1053 const FaceToCellTopology<length> &face2) const 1054 { 1055 for (unsigned int i = 0; i < length; ++i) 1056 if (face1.cells_interior[i] < face2.cells_interior[i]) 1057 return true; 1058 else if (face1.cells_interior[i] > face2.cells_interior[i]) 1059 return false; 1060 for (unsigned int i = 0; i < length; ++i) 1061 if (face1.cells_exterior[i] < face2.cells_exterior[i]) 1062 return true; 1063 else if (face1.cells_exterior[i] > face2.cells_exterior[i]) 1064 return false; 1065 if (face1.interior_face_no < face2.interior_face_no) 1066 return true; 1067 else if (face1.interior_face_no > face2.interior_face_no) 1068 return false; 1069 if (face1.exterior_face_no < face2.exterior_face_no) 1070 return true; 1071 else if (face1.exterior_face_no > face2.exterior_face_no) 1072 return false; 1073 1074 // we do not need to check for subface_index and orientation because 1075 // those cannot be different if when all the other values are the 1076 // same. 1077 AssertDimension(face1.subface_index, face2.subface_index); 1078 AssertDimension(face1.face_orientation, face2.face_orientation); 1079 1080 return false; 1081 } 1082 }; 1083 1084 1085 1086 template <int vectorization_width> 1087 void collect_faces_vectorization(const std::vector<FaceToCellTopology<1>> & faces_in,const std::vector<bool> & hard_vectorization_boundary,std::vector<unsigned int> & face_partition_data,std::vector<FaceToCellTopology<vectorization_width>> & faces_out)1088 collect_faces_vectorization( 1089 const std::vector<FaceToCellTopology<1>> &faces_in, 1090 const std::vector<bool> & hard_vectorization_boundary, 1091 std::vector<unsigned int> & face_partition_data, 1092 std::vector<FaceToCellTopology<vectorization_width>> &faces_out) 1093 { 1094 FaceToCellTopology<vectorization_width> macro_face; 1095 std::vector<std::vector<unsigned int>> faces_type; 1096 1097 unsigned int face_start = face_partition_data[0], 1098 face_end = face_partition_data[0]; 1099 1100 face_partition_data[0] = faces_out.size(); 1101 for (unsigned int partition = 0; 1102 partition < face_partition_data.size() - 1; 1103 ++partition) 1104 { 1105 std::vector<std::vector<unsigned int>> new_faces_type; 1106 1107 // start with the end point for the last partition 1108 face_start = face_end; 1109 face_end = face_partition_data[partition + 1]; 1110 1111 // set the partitioner to the new vectorized lengths 1112 face_partition_data[partition + 1] = face_partition_data[partition]; 1113 1114 // loop over the faces in the current partition and reorder according 1115 // to the face type 1116 for (unsigned int face = face_start; face < face_end; ++face) 1117 { 1118 for (auto &face_type : faces_type) 1119 { 1120 // Compare current face with first face of type type 1121 if (compare_faces_for_vectorization(faces_in[face], 1122 faces_in[face_type[0]])) 1123 { 1124 face_type.push_back(face); 1125 goto face_found; 1126 } 1127 } 1128 faces_type.emplace_back(1, face); 1129 face_found: 1130 {} 1131 } 1132 1133 // insert new faces in sorted list to get good data locality 1134 std::set<FaceToCellTopology<vectorization_width>, 1135 FaceComparator<vectorization_width>> 1136 new_faces; 1137 for (const auto &face_type : faces_type) 1138 { 1139 macro_face.interior_face_no = 1140 faces_in[face_type[0]].interior_face_no; 1141 macro_face.exterior_face_no = 1142 faces_in[face_type[0]].exterior_face_no; 1143 macro_face.subface_index = faces_in[face_type[0]].subface_index; 1144 macro_face.face_orientation = 1145 faces_in[face_type[0]].face_orientation; 1146 unsigned int no_faces = face_type.size(); 1147 std::vector<unsigned char> touched(no_faces, 0); 1148 1149 // do two passes through the data. The first is to identify 1150 // similar faces within the same index range as the cells which 1151 // will allow for vectorized read operations, the second picks up 1152 // all the rest 1153 unsigned int n_vectorized = 0; 1154 for (unsigned int f = 0; f < no_faces; ++f) 1155 if (faces_in[face_type[f]].cells_interior[0] % 1156 vectorization_width == 1157 0) 1158 { 1159 bool is_contiguous = true; 1160 if (f + vectorization_width > no_faces) 1161 is_contiguous = false; 1162 else 1163 for (unsigned int v = 1; v < vectorization_width; ++v) 1164 if (faces_in[face_type[f + v]].cells_interior[0] != 1165 faces_in[face_type[f]].cells_interior[0] + v) 1166 is_contiguous = false; 1167 if (is_contiguous) 1168 { 1169 AssertIndexRange(f, 1170 face_type.size() - 1171 vectorization_width + 1); 1172 for (unsigned int v = 0; v < vectorization_width; ++v) 1173 { 1174 macro_face.cells_interior[v] = 1175 faces_in[face_type[f + v]].cells_interior[0]; 1176 macro_face.cells_exterior[v] = 1177 faces_in[face_type[f + v]].cells_exterior[0]; 1178 touched[f + v] = 1; 1179 } 1180 new_faces.insert(macro_face); 1181 f += vectorization_width - 1; 1182 n_vectorized += vectorization_width; 1183 } 1184 } 1185 1186 std::vector<unsigned int> untouched; 1187 untouched.reserve(no_faces - n_vectorized); 1188 for (unsigned int f = 0; f < no_faces; ++f) 1189 if (touched[f] == 0) 1190 untouched.push_back(f); 1191 unsigned int v = 0; 1192 for (const auto f : untouched) 1193 { 1194 macro_face.cells_interior[v] = 1195 faces_in[face_type[f]].cells_interior[0]; 1196 macro_face.cells_exterior[v] = 1197 faces_in[face_type[f]].cells_exterior[0]; 1198 ++v; 1199 if (v == vectorization_width) 1200 { 1201 new_faces.insert(macro_face); 1202 v = 0; 1203 } 1204 } 1205 if (v > 0 && v < vectorization_width) 1206 { 1207 // must add non-filled face 1208 if (hard_vectorization_boundary[partition + 1] || 1209 partition == face_partition_data.size() - 2) 1210 { 1211 for (; v < vectorization_width; ++v) 1212 { 1213 // Dummy cell, not used 1214 macro_face.cells_interior[v] = 1215 numbers::invalid_unsigned_int; 1216 macro_face.cells_exterior[v] = 1217 numbers::invalid_unsigned_int; 1218 } 1219 new_faces.insert(macro_face); 1220 } 1221 else 1222 { 1223 // postpone to the next partition 1224 std::vector<unsigned int> untreated(v); 1225 for (unsigned int f = 0; f < v; ++f) 1226 untreated[f] = face_type[*(untouched.end() - 1 - f)]; 1227 new_faces_type.push_back(untreated); 1228 } 1229 } 1230 } 1231 1232 // insert sorted list to vector of faces 1233 for (auto it = new_faces.begin(); it != new_faces.end(); ++it) 1234 faces_out.push_back(*it); 1235 face_partition_data[partition + 1] += new_faces.size(); 1236 1237 // set the faces that were left over to faces_type for the next round 1238 faces_type = std::move(new_faces_type); 1239 } 1240 1241 # ifdef DEBUG 1242 // final safety checks 1243 for (const auto &face_type : faces_type) 1244 AssertDimension(face_type.size(), 0U); 1245 1246 AssertDimension(faces_out.size(), face_partition_data.back()); 1247 unsigned int nfaces = 0; 1248 for (unsigned int i = face_partition_data[0]; 1249 i < face_partition_data.back(); 1250 ++i) 1251 for (unsigned int v = 0; v < vectorization_width; ++v) 1252 nfaces += 1253 (faces_out[i].cells_interior[v] != numbers::invalid_unsigned_int); 1254 AssertDimension(nfaces, faces_in.size()); 1255 1256 std::vector<std::pair<unsigned int, unsigned int>> in_faces, out_faces; 1257 for (const auto &face_in : faces_in) 1258 in_faces.emplace_back(face_in.cells_interior[0], 1259 face_in.cells_exterior[0]); 1260 for (unsigned int i = face_partition_data[0]; 1261 i < face_partition_data.back(); 1262 ++i) 1263 for (unsigned int v = 0; 1264 v < vectorization_width && 1265 faces_out[i].cells_interior[v] != numbers::invalid_unsigned_int; 1266 ++v) 1267 out_faces.emplace_back(faces_out[i].cells_interior[v], 1268 faces_out[i].cells_exterior[v]); 1269 std::sort(in_faces.begin(), in_faces.end()); 1270 std::sort(out_faces.begin(), out_faces.end()); 1271 AssertDimension(in_faces.size(), out_faces.size()); 1272 for (unsigned int i = 0; i < in_faces.size(); ++i) 1273 { 1274 AssertDimension(in_faces[i].first, out_faces[i].first); 1275 AssertDimension(in_faces[i].second, out_faces[i].second); 1276 } 1277 # endif 1278 } 1279 1280 #endif // ifndef DOXYGEN 1281 1282 } // namespace MatrixFreeFunctions 1283 } // namespace internal 1284 1285 1286 DEAL_II_NAMESPACE_CLOSE 1287 1288 #endif 1289