1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2020 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18
19
20 // Local includes
21 #include "libmesh/partitioner.h"
22
23 // libMesh includes
24 #include "libmesh/elem.h"
25 #include "libmesh/int_range.h"
26 #include "libmesh/libmesh_logging.h"
27 #include "libmesh/mesh_base.h"
28 #include "libmesh/mesh_tools.h"
29 #include "libmesh/mesh_communication.h"
30 #include "libmesh/parallel_ghost_sync.h"
31
32 // TIMPI includes
33 #include "timpi/parallel_implementation.h"
34 #include "timpi/parallel_sync.h"
35
36 // C/C++ includes
37 #ifdef LIBMESH_HAVE_PETSC
38 #include "libmesh/ignore_warnings.h"
39 #include "petscmat.h"
40 #include "libmesh/restore_warnings.h"
41 #endif
42
43
44 namespace {
45
46 using namespace libMesh;
47
48 struct CorrectProcIds
49 {
50 typedef processor_id_type datum;
51
CorrectProcIdsCorrectProcIds52 CorrectProcIds(MeshBase & _mesh,
53 std::unordered_set<dof_id_type> & _bad_pids) :
54 mesh(_mesh), bad_pids(_bad_pids) {}
55
56 MeshBase & mesh;
57 std::unordered_set<dof_id_type> & bad_pids;
58
59 // ------------------------------------------------------------
gather_dataCorrectProcIds60 void gather_data (const std::vector<dof_id_type> & ids,
61 std::vector<datum> & data)
62 {
63 // Find the processor id of each requested node
64 data.resize(ids.size());
65
66 for (auto i : index_range(ids))
67 {
68 // Look for this point in the mesh and make sure we have a
69 // good pid for it before sending that on
70 if (ids[i] != DofObject::invalid_id &&
71 !bad_pids.count(ids[i]))
72 {
73 Node & node = mesh.node_ref(ids[i]);
74
75 // Return the node's correct processor id,
76 data[i] = node.processor_id();
77 }
78 else
79 data[i] = DofObject::invalid_processor_id;
80 }
81 }
82
83 // ------------------------------------------------------------
act_on_dataCorrectProcIds84 bool act_on_data (const std::vector<dof_id_type> & ids,
85 const std::vector<datum> proc_ids)
86 {
87 bool data_changed = false;
88
89 // Set the ghost node processor ids we've now been informed of
90 for (auto i : index_range(ids))
91 {
92 Node & node = mesh.node_ref(ids[i]);
93
94 processor_id_type & proc_id = node.processor_id();
95 const processor_id_type new_proc_id = proc_ids[i];
96
97 auto it = bad_pids.find(ids[i]);
98
99 if ((new_proc_id != proc_id ||
100 it != bad_pids.end()) &&
101 new_proc_id != DofObject::invalid_processor_id)
102 {
103 if (it != bad_pids.end())
104 {
105 proc_id = new_proc_id;
106 bad_pids.erase(it);
107 }
108 else
109 proc_id = node.choose_processor_id(proc_id, new_proc_id);
110
111 if (proc_id == new_proc_id)
112 data_changed = true;
113 }
114 }
115
116 return data_changed;
117 }
118 };
119
120 }
121
122 namespace libMesh
123 {
124
125
126
127 // ------------------------------------------------------------
128 // Partitioner static data
129 const dof_id_type Partitioner::communication_blocksize =
130 dof_id_type(1000000);
131
132
133
134 // ------------------------------------------------------------
135 // Partitioner implementation
partition(MeshBase & mesh)136 void Partitioner::partition (MeshBase & mesh)
137 {
138 this->partition(mesh,mesh.n_processors());
139 }
140
141
142
partition(MeshBase & mesh,const unsigned int n)143 void Partitioner::partition (MeshBase & mesh,
144 const unsigned int n)
145 {
146 libmesh_parallel_only(mesh.comm());
147
148 // BSK - temporary fix while redistribution is integrated 6/26/2008
149 // Uncomment this to not repartition in parallel
150 // if (!mesh.is_serial())
151 // return;
152
153 // we cannot partition into more pieces than we have
154 // active elements!
155 const unsigned int n_parts =
156 static_cast<unsigned int>
157 (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
158
159 // Set the number of partitions in the mesh
160 mesh.set_n_partitions()=n_parts;
161
162 if (n_parts == 1)
163 {
164 this->single_partition (mesh);
165 return;
166 }
167
168 // First assign a temporary partitioning to any unpartitioned elements
169 Partitioner::partition_unpartitioned_elements(mesh, n_parts);
170
171 // Call the partitioning function
172 this->_do_partition(mesh,n_parts);
173
174 // Set the parent's processor ids
175 Partitioner::set_parent_processor_ids(mesh);
176
177 // Redistribute elements if necessary, before setting node processor
178 // ids, to make sure those will be set consistently
179 mesh.redistribute();
180
181 #ifdef DEBUG
182 MeshTools::libmesh_assert_valid_remote_elems(mesh);
183
184 // Messed up elem processor_id()s can leave us without the child
185 // elements we need to restrict vectors on a distributed mesh
186 MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
187 #endif
188
189 // Set the node's processor ids
190 Partitioner::set_node_processor_ids(mesh);
191
192 #ifdef DEBUG
193 MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
194 #endif
195
196 // Give derived Mesh classes a chance to update any cached data to
197 // reflect the new partitioning
198 mesh.update_post_partitioning();
199 }
200
201
202
repartition(MeshBase & mesh)203 void Partitioner::repartition (MeshBase & mesh)
204 {
205 this->repartition(mesh,mesh.n_processors());
206 }
207
208
209
repartition(MeshBase & mesh,const unsigned int n)210 void Partitioner::repartition (MeshBase & mesh,
211 const unsigned int n)
212 {
213 // we cannot partition into more pieces than we have
214 // active elements!
215 const unsigned int n_parts =
216 static_cast<unsigned int>
217 (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
218
219 // Set the number of partitions in the mesh
220 mesh.set_n_partitions()=n_parts;
221
222 if (n_parts == 1)
223 {
224 this->single_partition (mesh);
225 return;
226 }
227
228 // First assign a temporary partitioning to any unpartitioned elements
229 Partitioner::partition_unpartitioned_elements(mesh, n_parts);
230
231 // Call the partitioning function
232 this->_do_repartition(mesh,n_parts);
233
234 // Set the parent's processor ids
235 Partitioner::set_parent_processor_ids(mesh);
236
237 // Set the node's processor ids
238 Partitioner::set_node_processor_ids(mesh);
239 }
240
241
242
243
244
single_partition(MeshBase & mesh)245 void Partitioner::single_partition (MeshBase & mesh)
246 {
247 this->single_partition_range(mesh.elements_begin(),
248 mesh.elements_end());
249
250 // Redistribute, in case someone (like our unit tests) is doing
251 // something silly (like moving a whole already-distributed mesh
252 // back onto rank 0).
253 mesh.redistribute();
254 }
255
256
257
single_partition_range(MeshBase::element_iterator it,MeshBase::element_iterator end)258 void Partitioner::single_partition_range (MeshBase::element_iterator it,
259 MeshBase::element_iterator end)
260 {
261 LOG_SCOPE("single_partition_range()", "Partitioner");
262
263 for (auto & elem : as_range(it, end))
264 {
265 elem->processor_id() = 0;
266
267 // Assign all this element's nodes to processor 0 as well.
268 for (Node & node : elem->node_ref_range())
269 node.processor_id() = 0;
270 }
271 }
272
partition_unpartitioned_elements(MeshBase & mesh)273 void Partitioner::partition_unpartitioned_elements (MeshBase & mesh)
274 {
275 Partitioner::partition_unpartitioned_elements(mesh, mesh.n_processors());
276 }
277
278
279
partition_unpartitioned_elements(MeshBase & mesh,const unsigned int n_subdomains)280 void Partitioner::partition_unpartitioned_elements (MeshBase & mesh,
281 const unsigned int n_subdomains)
282 {
283 MeshBase::element_iterator it = mesh.unpartitioned_elements_begin();
284 const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();
285
286 const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);
287
288 // the unpartitioned elements must exist on all processors. If the range is empty on one
289 // it is empty on all, and we can quit right here.
290 if (!n_unpartitioned_elements)
291 return;
292
293 // find the target subdomain sizes
294 std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
295
296 for (auto pid : make_range(mesh.n_processors()))
297 {
298 dof_id_type tgt_subdomain_size = 0;
299
300 // watch out for the case that n_subdomains < n_processors
301 if (pid < n_subdomains)
302 {
303 tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;
304
305 if (pid < n_unpartitioned_elements%n_subdomains)
306 tgt_subdomain_size++;
307
308 }
309
310 //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
311 if (pid == 0)
312 subdomain_bounds[0] = tgt_subdomain_size;
313 else
314 subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
315 }
316
317 libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);
318
319 // create the unique mapping for all unpartitioned elements independent of partitioning
320 // determine the global indexing for all the unpartitioned elements
321 std::vector<dof_id_type> global_indices;
322
323 // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
324 // Only the indices for the elements we pass in are returned in the array.
325 MeshCommunication().find_global_indices (mesh.comm(),
326 MeshTools::create_bounding_box(mesh), it, end,
327 global_indices);
328
329 dof_id_type cnt=0;
330 for (auto & elem : as_range(it, end))
331 {
332 libmesh_assert_less (cnt, global_indices.size());
333 const dof_id_type global_index =
334 global_indices[cnt++];
335
336 libmesh_assert_less (global_index, subdomain_bounds.back());
337 libmesh_assert_less (global_index, n_unpartitioned_elements);
338
339 const processor_id_type subdomain_id =
340 cast_int<processor_id_type>
341 (std::distance(subdomain_bounds.begin(),
342 std::upper_bound(subdomain_bounds.begin(),
343 subdomain_bounds.end(),
344 global_index)));
345 libmesh_assert_less (subdomain_id, n_subdomains);
346
347 elem->processor_id() = subdomain_id;
348 //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
349 }
350 }
351
352
353
set_parent_processor_ids(MeshBase & mesh)354 void Partitioner::set_parent_processor_ids(MeshBase & mesh)
355 {
356 // Ignore the parameter when !LIBMESH_ENABLE_AMR
357 libmesh_ignore(mesh);
358
359 LOG_SCOPE("set_parent_processor_ids()", "Partitioner");
360
361 #ifdef LIBMESH_ENABLE_AMR
362
363 // If the mesh is serial we have access to all the elements,
364 // in particular all the active ones. We can therefore set
365 // the parent processor ids indirectly through their children, and
366 // set the subactive processor ids while examining their active
367 // ancestors.
368 // By convention a parent is assigned to the minimum processor
369 // of all its children, and a subactive is assigned to the processor
370 // of its active ancestor.
371 if (mesh.is_serial())
372 {
373 for (auto & elem : mesh.active_element_ptr_range())
374 {
375 // First set descendents
376 std::vector<Elem *> subactive_family;
377 elem->total_family_tree(subactive_family);
378 for (const auto & f : subactive_family)
379 f->processor_id() = elem->processor_id();
380
381 // Then set ancestors
382 Elem * parent = elem->parent();
383
384 while (parent)
385 {
386 // invalidate the parent id, otherwise the min below
387 // will not work if the current parent id is less
388 // than all the children!
389 parent->invalidate_processor_id();
390
391 for (auto & child : parent->child_ref_range())
392 {
393 libmesh_assert(!child.is_remote());
394 libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
395 parent->processor_id() = std::min(parent->processor_id(),
396 child.processor_id());
397 }
398 parent = parent->parent();
399 }
400 }
401 }
402
403 // When the mesh is parallel we cannot guarantee that parents have access to
404 // all their children.
405 else
406 {
407 // Setting subactive processor ids is easy: we can guarantee
408 // that children have access to all their parents.
409
410 // Loop over all the active elements in the mesh
411 for (auto & child : mesh.active_element_ptr_range())
412 {
413 std::vector<Elem *> subactive_family;
414 child->total_family_tree(subactive_family);
415 for (const auto & f : subactive_family)
416 f->processor_id() = child->processor_id();
417 }
418
419 // When the mesh is parallel we cannot guarantee that parents have access to
420 // all their children.
421
422 // We will use a brute-force approach here. Each processor finds its parent
423 // elements and sets the parent pid to the minimum of its
424 // semilocal descendants.
425 // A global reduction is then performed to make sure the true minimum is found.
426 // As noted, this is required because we cannot guarantee that a parent has
427 // access to all its children on any single processor.
428 libmesh_parallel_only(mesh.comm());
429 libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
430 mesh.unpartitioned_elements_end()) == 0);
431
432 const dof_id_type max_elem_id = mesh.max_elem_id();
433
434 std::vector<processor_id_type>
435 parent_processor_ids (std::min(communication_blocksize,
436 max_elem_id));
437
438 for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
439 {
440 last_elem_id =
441 std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
442 max_elem_id);
443 const dof_id_type first_elem_id = blk*communication_blocksize;
444
445 std::fill (parent_processor_ids.begin(),
446 parent_processor_ids.end(),
447 DofObject::invalid_processor_id);
448
449 // first build up local contributions to parent_processor_ids
450 bool have_parent_in_block = false;
451
452 for (auto & parent : as_range(mesh.ancestor_elements_begin(),
453 mesh.ancestor_elements_end()))
454 {
455 const dof_id_type parent_idx = parent->id();
456 libmesh_assert_less (parent_idx, max_elem_id);
457
458 if ((parent_idx >= first_elem_id) &&
459 (parent_idx < last_elem_id))
460 {
461 have_parent_in_block = true;
462 processor_id_type parent_pid = DofObject::invalid_processor_id;
463
464 std::vector<const Elem *> active_family;
465 parent->active_family_tree(active_family);
466 for (const auto & f : active_family)
467 parent_pid = std::min (parent_pid, f->processor_id());
468
469 const dof_id_type packed_idx = parent_idx - first_elem_id;
470 libmesh_assert_less (packed_idx, parent_processor_ids.size());
471
472 parent_processor_ids[packed_idx] = parent_pid;
473 }
474 }
475
476 // then find the global minimum
477 mesh.comm().min (parent_processor_ids);
478
479 // and assign the ids, if we have a parent in this block.
480 if (have_parent_in_block)
481 for (auto & parent : as_range(mesh.ancestor_elements_begin(),
482 mesh.ancestor_elements_end()))
483 {
484 const dof_id_type parent_idx = parent->id();
485
486 if ((parent_idx >= first_elem_id) &&
487 (parent_idx < last_elem_id))
488 {
489 const dof_id_type packed_idx = parent_idx - first_elem_id;
490 libmesh_assert_less (packed_idx, parent_processor_ids.size());
491
492 const processor_id_type parent_pid =
493 parent_processor_ids[packed_idx];
494
495 libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);
496
497 parent->processor_id() = parent_pid;
498 }
499 }
500 }
501 }
502
503 #endif // LIBMESH_ENABLE_AMR
504 }
505
506 void
processor_pairs_to_interface_nodes(MeshBase & mesh,std::map<std::pair<processor_id_type,processor_id_type>,std::set<dof_id_type>> & processor_pair_to_nodes)507 Partitioner::processor_pairs_to_interface_nodes(MeshBase & mesh,
508 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
509 {
510 // This function must be run on all processors at once
511 libmesh_parallel_only(mesh.comm());
512
513 processor_pair_to_nodes.clear();
514
515 std::set<dof_id_type> mynodes;
516 std::set<dof_id_type> neighbor_nodes;
517 std::vector<dof_id_type> common_nodes;
518
519 // Loop over all the active elements
520 for (auto & elem : mesh.active_element_ptr_range())
521 {
522 libmesh_assert(elem);
523
524 libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
525
526 auto n_nodes = elem->n_nodes();
527
528 // prepare data for this element
529 mynodes.clear();
530 neighbor_nodes.clear();
531 common_nodes.clear();
532
533 for (unsigned int inode = 0; inode < n_nodes; inode++)
534 mynodes.insert(elem->node_id(inode));
535
536 for (auto i : elem->side_index_range())
537 {
538 auto neigh = elem->neighbor_ptr(i);
539 if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
540 {
541 neighbor_nodes.clear();
542 common_nodes.clear();
543 auto neigh_n_nodes = neigh->n_nodes();
544 for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
545 neighbor_nodes.insert(neigh->node_id(inode));
546
547 std::set_intersection(mynodes.begin(), mynodes.end(),
548 neighbor_nodes.begin(), neighbor_nodes.end(),
549 std::back_inserter(common_nodes));
550
551 auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
552 std::max(elem->processor_id(), neigh->processor_id()))];
553 for (auto global_node_id : common_nodes)
554 map_set.insert(global_node_id);
555 }
556 }
557 }
558 }
559
set_interface_node_processor_ids_linear(MeshBase & mesh)560 void Partitioner::set_interface_node_processor_ids_linear(MeshBase & mesh)
561 {
562 // This function must be run on all processors at once
563 libmesh_parallel_only(mesh.comm());
564
565 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
566
567 processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
568
569 for (auto & pmap : processor_pair_to_nodes)
570 {
571 std::size_t n_own_nodes = pmap.second.size()/2, i = 0;
572
573 for (dof_id_type id : pmap.second)
574 {
575 auto & node = mesh.node_ref(id);
576 if (i <= n_own_nodes)
577 node.processor_id() = pmap.first.first;
578 else
579 node.processor_id() = pmap.first.second;
580 i++;
581 }
582 }
583 }
584
set_interface_node_processor_ids_BFS(MeshBase & mesh)585 void Partitioner::set_interface_node_processor_ids_BFS(MeshBase & mesh)
586 {
587 // This function must be run on all processors at once
588 libmesh_parallel_only(mesh.comm());
589
590 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
591
592 processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
593
594 std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;
595
596 MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
597
598 std::vector<const Node *> neighbors;
599 std::set<dof_id_type> neighbors_order;
600 std::vector<dof_id_type> common_nodes;
601 std::queue<dof_id_type> nodes_queue;
602 std::set<dof_id_type> visted_nodes;
603
604 for (auto & pmap : processor_pair_to_nodes)
605 {
606 std::size_t n_own_nodes = pmap.second.size()/2;
607
608 // Initialize node assignment
609 for (dof_id_type id : pmap.second)
610 mesh.node_ref(id).processor_id() = pmap.first.second;
611
612 visted_nodes.clear();
613 for (dof_id_type id : pmap.second)
614 {
615 mesh.node_ref(id).processor_id() = pmap.first.second;
616
617 if (visted_nodes.find(id) != visted_nodes.end())
618 continue;
619 else
620 {
621 nodes_queue.push(id);
622 visted_nodes.insert(id);
623 if (visted_nodes.size() >= n_own_nodes)
624 break;
625 }
626
627 while (!nodes_queue.empty())
628 {
629 auto & node = mesh.node_ref(nodes_queue.front());
630 nodes_queue.pop();
631
632 neighbors.clear();
633 MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
634 neighbors_order.clear();
635 for (auto & neighbor : neighbors)
636 neighbors_order.insert(neighbor->id());
637
638 common_nodes.clear();
639 std::set_intersection(pmap.second.begin(), pmap.second.end(),
640 neighbors_order.begin(), neighbors_order.end(),
641 std::back_inserter(common_nodes));
642
643 for (auto c_node : common_nodes)
644 if (visted_nodes.find(c_node) == visted_nodes.end())
645 {
646 nodes_queue.push(c_node);
647 visted_nodes.insert(c_node);
648 if (visted_nodes.size() >= n_own_nodes)
649 goto queue_done;
650 }
651
652 if (visted_nodes.size() >= n_own_nodes)
653 goto queue_done;
654 }
655 }
656 queue_done:
657 for (auto node : visted_nodes)
658 mesh.node_ref(node).processor_id() = pmap.first.first;
659 }
660 }
661
set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)662 void Partitioner::set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)
663 {
664 libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC
665
666 // This function must be run on all processors at once
667 libmesh_parallel_only(mesh.comm());
668
669 #if LIBMESH_HAVE_PETSC
670 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
671
672 processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
673
674 std::vector<std::vector<const Elem *>> nodes_to_elem_map;
675
676 MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
677
678 std::vector<const Node *> neighbors;
679 std::set<dof_id_type> neighbors_order;
680 std::vector<dof_id_type> common_nodes;
681
682 std::vector<dof_id_type> rows;
683 std::vector<dof_id_type> cols;
684
685 std::map<dof_id_type, dof_id_type> global_to_local;
686
687 for (auto & pmap : processor_pair_to_nodes)
688 {
689 unsigned int i = 0;
690
691 rows.clear();
692 rows.resize(pmap.second.size()+1);
693 cols.clear();
694 for (dof_id_type id : pmap.second)
695 global_to_local[id] = i++;
696
697 i = 0;
698 for (auto id : pmap.second)
699 {
700 auto & node = mesh.node_ref(id);
701 neighbors.clear();
702 MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
703 neighbors_order.clear();
704 for (auto & neighbor : neighbors)
705 neighbors_order.insert(neighbor->id());
706
707 common_nodes.clear();
708 std::set_intersection(pmap.second.begin(), pmap.second.end(),
709 neighbors_order.begin(), neighbors_order.end(),
710 std::back_inserter(common_nodes));
711
712 rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());
713
714 for (auto c_node : common_nodes)
715 cols.push_back(global_to_local[c_node]);
716
717 i++;
718 }
719
720 Mat adj;
721 MatPartitioning part;
722 IS is;
723 PetscInt local_size, rows_size, cols_size;
724 PetscInt *adj_i, *adj_j;
725 const PetscInt *indices;
726 PetscCalloc1(rows.size(), &adj_i);
727 PetscCalloc1(cols.size(), &adj_j);
728 rows_size = cast_int<PetscInt>(rows.size());
729 for (PetscInt ii=0; ii<rows_size; ii++)
730 adj_i[ii] = rows[ii];
731
732 cols_size = cast_int<PetscInt>(cols.size());
733 for (PetscInt ii=0; ii<cols_size; ii++)
734 adj_j[ii] = cols[ii];
735
736 const PetscInt sz = cast_int<PetscInt>(pmap.second.size());
737 MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j,nullptr,&adj);
738 MatPartitioningCreate(PETSC_COMM_SELF,&part);
739 MatPartitioningSetAdjacency(part,adj);
740 MatPartitioningSetNParts(part,2);
741 PetscObjectSetOptionsPrefix((PetscObject)part, "balance_");
742 MatPartitioningSetFromOptions(part);
743 MatPartitioningApply(part,&is);
744
745 MatDestroy(&adj);
746 MatPartitioningDestroy(&part);
747
748 ISGetLocalSize(is, &local_size);
749 ISGetIndices(is, &indices);
750 i = 0;
751 for (auto id : pmap.second)
752 {
753 auto & node = mesh.node_ref(id);
754 if (indices[i])
755 node.processor_id() = pmap.first.second;
756 else
757 node.processor_id() = pmap.first.first;
758
759 i++;
760 }
761 ISRestoreIndices(is, &indices);
762 ISDestroy(&is);
763 }
764 #else
765 libmesh_error_msg("PETSc is required");
766 #endif
767 }
768
769
set_node_processor_ids(MeshBase & mesh)770 void Partitioner::set_node_processor_ids(MeshBase & mesh)
771 {
772 LOG_SCOPE("set_node_processor_ids()","Partitioner");
773
774 // This function must be run on all processors at once
775 libmesh_parallel_only(mesh.comm());
776
777 // If we have any unpartitioned elements at this
778 // stage there is a problem
779 libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
780 mesh.unpartitioned_elements_end()) == 0);
781
782 // Start from scratch here: nodes we used to own may not be
783 // eligible for us to own any more.
784 for (auto & node : mesh.node_ptr_range())
785 {
786 node->processor_id() = DofObject::invalid_processor_id;
787 }
788
789 // Loop over all the active elements
790 for (auto & elem : mesh.active_element_ptr_range())
791 {
792 libmesh_assert(elem);
793
794 libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
795
796 // Consider updating the processor id on this element's nodes
797 for (Node & node : elem->node_ref_range())
798 {
799 processor_id_type & pid = node.processor_id();
800 pid = node.choose_processor_id(pid, elem->processor_id());
801 }
802 }
803
804 // How we finish off the node partitioning depends on our command
805 // line options.
806
807 const bool load_balanced_nodes_linear =
808 libMesh::on_command_line ("--load-balanced-nodes-linear");
809
810 const bool load_balanced_nodes_bfs =
811 libMesh::on_command_line ("--load-balanced-nodes-bfs");
812
813 const bool load_balanced_nodes_petscpartition =
814 libMesh::on_command_line ("--load-balanced-nodes-petscpartitioner");
815
816 unsigned int n_load_balance_options = load_balanced_nodes_linear;
817 n_load_balance_options += load_balanced_nodes_bfs;
818 n_load_balance_options += load_balanced_nodes_petscpartition;
819 libmesh_error_msg_if(n_load_balance_options > 1,
820 "Cannot perform more than one load balancing type at a time");
821
822 if (load_balanced_nodes_linear)
823 set_interface_node_processor_ids_linear(mesh);
824 else if (load_balanced_nodes_bfs)
825 set_interface_node_processor_ids_BFS(mesh);
826 else if (load_balanced_nodes_petscpartition)
827 set_interface_node_processor_ids_petscpartitioner(mesh);
828
829 // Node balancing algorithm will response to assign owned nodes.
830 // We still need to sync PIDs
831 {
832 // For inactive elements, we will have already gotten most of
833 // these nodes, *except* for the case of a parent with a subset
834 // of active descendants which are remote elements. In that
835 // case some of the parent nodes will not have been properly
836 // handled yet on our processor.
837 //
838 // We don't want to inadvertently give one of them an incorrect
839 // processor id, but if we're not in serial then we have to
840 // assign them temporary pids to make querying work, so we'll
841 // save our *valid* pids before assigning temporaries.
842 //
843 // Even in serial we'll want to check and make sure we're not
844 // overwriting valid active node pids with pids from subactive
845 // elements.
846 std::unordered_set<dof_id_type> bad_pids;
847
848 for (auto & node : mesh.node_ptr_range())
849 if (node->processor_id() == DofObject::invalid_processor_id)
850 bad_pids.insert(node->id());
851
852 // If we assign our temporary ids by looping from finer elements
853 // to coarser elements, we'll always get an id from the finest
854 // ghost element we can see, which will usually be "closer" to
855 // the true processor we want to query and so will reduce query
856 // cycles that don't reach that processor.
857
858 // But we can still end up with a query cycle that dead-ends, so
859 // we need to prepare a "push" communication step here.
860
861 const bool is_serial = mesh.is_serial();
862 std::unordered_map
863 <processor_id_type,
864 std::unordered_map<dof_id_type, processor_id_type>>
865 potential_pids;
866
867 const unsigned int n_levels = MeshTools::n_levels(mesh);
868 for (unsigned int level = n_levels; level > 0; --level)
869 {
870 for (auto & elem : as_range(mesh.level_elements_begin(level-1),
871 mesh.level_elements_end(level-1)))
872 {
873 libmesh_assert_not_equal_to (elem->processor_id(),
874 DofObject::invalid_processor_id);
875
876 const processor_id_type elem_pid = elem->processor_id();
877
878 // Consider updating the processor id on this element's nodes
879 for (Node & node : elem->node_ref_range())
880 {
881 processor_id_type & pid = node.processor_id();
882 if (bad_pids.count(node.id()))
883 pid = node.choose_processor_id(pid, elem_pid);
884 else if (!is_serial)
885 potential_pids[elem_pid][node.id()] = pid;
886 }
887 }
888 }
889
890 if (!is_serial)
891 {
892 std::unordered_map
893 <processor_id_type,
894 std::vector<std::pair<dof_id_type, processor_id_type>>>
895 potential_pids_vecs;
896
897 for (auto & pair : potential_pids)
898 potential_pids_vecs[pair.first].assign(pair.second.begin(), pair.second.end());
899
900 auto pids_action_functor =
901 [& mesh, & bad_pids]
902 (processor_id_type /* src_pid */,
903 const std::vector<std::pair<dof_id_type, processor_id_type>> & data)
904 {
905 for (auto pair : data)
906 {
907 Node & node = mesh.node_ref(pair.first);
908 processor_id_type & pid = node.processor_id();
909 auto it = bad_pids.find(pair.first);
910 if (it != bad_pids.end())
911 {
912 pid = pair.second;
913 bad_pids.erase(it);
914 }
915 else
916 pid = node.choose_processor_id(pid, pair.second);
917 }
918 };
919
920 Parallel::push_parallel_vector_data
921 (mesh.comm(), potential_pids_vecs, pids_action_functor);
922
923 // Using default libMesh options, we'll just need to sync
924 // between processors now. The catch here is that we can't
925 // initially trust Node::choose_processor_id() because some
926 // of those node processor ids are the temporary ones.
927 CorrectProcIds correct_pids(mesh, bad_pids);
928 Parallel::sync_node_data_by_element_id
929 (mesh, mesh.elements_begin(), mesh.elements_end(),
930 Parallel::SyncEverything(), Parallel::SyncEverything(),
931 correct_pids);
932
933 // But once we've got all the non-temporary pids synced, we
934 // may need to sync again to get any pids on nodes only
935 // connected to subactive elements, for which *only*
936 // "temporary" pids are possible.
937 bad_pids.clear();
938 Parallel::sync_node_data_by_element_id
939 (mesh,
940 mesh.elements_begin(), mesh.elements_end(),
941 Parallel::SyncEverything(), Parallel::SyncEverything(),
942 correct_pids);
943 }
944 }
945
946 // We can't assert that all nodes are connected to elements, because
947 // a DistributedMesh with NodeConstraints might have pulled in some
948 // remote nodes solely for evaluating those constraints.
949 // MeshTools::libmesh_assert_connected_nodes(mesh);
950
951 #ifdef DEBUG
952 MeshTools::libmesh_assert_valid_procids<Node>(mesh);
953 //MeshTools::libmesh_assert_canonical_node_procids(mesh);
954 #endif
955 }
956
957
958
959 struct SyncLocalIDs
960 {
961 typedef dof_id_type datum;
962
963 typedef std::unordered_map<dof_id_type, dof_id_type> map_type;
964
SyncLocalIDsSyncLocalIDs965 SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}
966
967 map_type & id_map;
968
gather_dataSyncLocalIDs969 void gather_data (const std::vector<dof_id_type> & ids,
970 std::vector<datum> & local_ids) const
971 {
972 local_ids.resize(ids.size());
973
974 for (auto i : index_range(ids))
975 local_ids[i] = id_map[ids[i]];
976 }
977
act_on_dataSyncLocalIDs978 void act_on_data (const std::vector<dof_id_type> & ids,
979 const std::vector<datum> & local_ids)
980 {
981 for (auto i : index_range(local_ids))
982 id_map[ids[i]] = local_ids[i];
983 }
984 };
985
_find_global_index_by_pid_map(const MeshBase & mesh)986 void Partitioner::_find_global_index_by_pid_map(const MeshBase & mesh)
987 {
988 const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
989
990 // Find the number of active elements on each processor. We cannot use
991 // mesh.n_active_elem_on_proc(pid) since that only returns the number of
992 // elements assigned to pid which are currently stored on the calling
993 // processor. This will not in general be correct for parallel meshes
994 // when (pid!=mesh.processor_id()).
995 auto n_proc = mesh.n_processors();
996 _n_active_elem_on_proc.resize(n_proc);
997 mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
998
999 std::vector<dof_id_type> n_active_elem_before_proc(mesh.n_processors());
1000
1001 for (auto i : make_range(n_proc-1))
1002 n_active_elem_before_proc[i+1] =
1003 n_active_elem_before_proc[i] + _n_active_elem_on_proc[i];
1004
1005 libMesh::BoundingBox bbox =
1006 MeshTools::create_bounding_box(mesh);
1007
1008 _global_index_by_pid_map.clear();
1009
1010 // create the mapping which is contiguous by processor
1011 MeshCommunication().find_local_indices (bbox,
1012 mesh.active_local_elements_begin(),
1013 mesh.active_local_elements_end(),
1014 _global_index_by_pid_map);
1015
1016 SyncLocalIDs sync(_global_index_by_pid_map);
1017
1018 Parallel::sync_dofobject_data_by_id
1019 (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);
1020
1021 for (const auto & elem : mesh.active_element_ptr_range())
1022 {
1023 const processor_id_type pid = elem->processor_id();
1024 libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);
1025
1026 _global_index_by_pid_map[elem->id()] += n_active_elem_before_proc[pid];
1027 }
1028 }
1029
build_graph(const MeshBase & mesh)1030 void Partitioner::build_graph (const MeshBase & mesh)
1031 {
1032 LOG_SCOPE("build_graph()", "ParmetisPartitioner");
1033
1034 const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1035 // If we have boundary elements in this mesh, we want to account for
1036 // the connectivity between them and interior elements. We can find
1037 // interior elements from boundary elements, but we need to build up
1038 // a lookup map to do the reverse.
1039 typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
1040 map_type interior_to_boundary_map;
1041
1042 for (const auto & elem : mesh.active_element_ptr_range())
1043 {
1044 // If we don't have an interior_parent then there's nothing to look us
1045 // up.
1046 if ((elem->dim() >= LIBMESH_DIM) ||
1047 !elem->interior_parent())
1048 continue;
1049
1050 // get all relevant interior elements
1051 std::set<const Elem *> neighbor_set;
1052 elem->find_interior_neighbors(neighbor_set);
1053
1054 for (const auto & neighbor : neighbor_set)
1055 interior_to_boundary_map.emplace(neighbor, elem);
1056 }
1057
1058 #ifdef LIBMESH_ENABLE_AMR
1059 std::vector<const Elem *> neighbors_offspring;
1060 #endif
1061
1062 // This is costly, and we only need to do it if the mesh has
1063 // changed since we last partitioned... but the mesh probably has
1064 // changed since we last partitioned, and if it hasn't we don't
1065 // have a reliable way to be sure of that.
1066 _find_global_index_by_pid_map(mesh);
1067
1068 dof_id_type first_local_elem = 0;
1069 for (auto pid : make_range(mesh.processor_id()))
1070 first_local_elem += _n_active_elem_on_proc[pid];
1071
1072 _dual_graph.clear();
1073 _dual_graph.resize(n_active_local_elem);
1074 _local_id_to_elem.resize(n_active_local_elem);
1075
1076 for (const auto & elem : mesh.active_local_element_ptr_range())
1077 {
1078 libmesh_assert (_global_index_by_pid_map.count(elem->id()));
1079 const dof_id_type global_index_by_pid =
1080 _global_index_by_pid_map[elem->id()];
1081
1082 const dof_id_type local_index =
1083 global_index_by_pid - first_local_elem;
1084 libmesh_assert_less (local_index, n_active_local_elem);
1085
1086 std::vector<dof_id_type> & graph_row = _dual_graph[local_index];
1087
1088 // Save this off to make it easy to index later
1089 _local_id_to_elem[local_index] = elem;
1090
1091 // Loop over the element's neighbors. An element
1092 // adjacency corresponds to a face neighbor
1093 for (auto neighbor : elem->neighbor_ptr_range())
1094 {
1095 if (neighbor != nullptr)
1096 {
1097 // If the neighbor is active treat it
1098 // as a connection
1099 if (neighbor->active())
1100 {
1101 libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
1102 const dof_id_type neighbor_global_index_by_pid =
1103 _global_index_by_pid_map[neighbor->id()];
1104
1105 graph_row.push_back(neighbor_global_index_by_pid);
1106 }
1107
1108 #ifdef LIBMESH_ENABLE_AMR
1109
1110 // Otherwise we need to find all of the
1111 // neighbor's children that are connected to
1112 // us and add them
1113 else
1114 {
1115 // The side of the neighbor to which
1116 // we are connected
1117 const unsigned int ns =
1118 neighbor->which_neighbor_am_i (elem);
1119 libmesh_assert_less (ns, neighbor->n_neighbors());
1120
1121 // Get all the active children (& grandchildren, etc...)
1122 // of the neighbor
1123
1124 // FIXME - this is the wrong thing, since we
1125 // should be getting the active family tree on
1126 // our side only. But adding too many graph
1127 // links may cause hanging nodes to tend to be
1128 // on partition interiors, which would reduce
1129 // communication overhead for constraint
1130 // equations, so we'll leave it.
1131
1132 neighbor->active_family_tree (neighbors_offspring);
1133
1134 // Get all the neighbor's children that
1135 // live on that side and are thus connected
1136 // to us
1137 for (const auto & child : neighbors_offspring)
1138 {
1139 // This does not assume a level-1 mesh.
1140 // Note that since children have sides numbered
1141 // coincident with the parent then this is a sufficient test.
1142 if (child->neighbor_ptr(ns) == elem)
1143 {
1144 libmesh_assert (child->active());
1145 libmesh_assert (_global_index_by_pid_map.count(child->id()));
1146 const dof_id_type child_global_index_by_pid =
1147 _global_index_by_pid_map[child->id()];
1148
1149 graph_row.push_back(child_global_index_by_pid);
1150 }
1151 }
1152 }
1153
1154 #endif /* ifdef LIBMESH_ENABLE_AMR */
1155
1156
1157 }
1158 }
1159
1160 if ((elem->dim() < LIBMESH_DIM) &&
1161 elem->interior_parent())
1162 {
1163 // get all relevant interior elements
1164 std::set<const Elem *> neighbor_set;
1165 elem->find_interior_neighbors(neighbor_set);
1166
1167 for (const auto & neighbor : neighbor_set)
1168 {
1169 const dof_id_type neighbor_global_index_by_pid =
1170 _global_index_by_pid_map[neighbor->id()];
1171
1172 graph_row.push_back(neighbor_global_index_by_pid);
1173 }
1174 }
1175
1176 // Check for any boundary neighbors
1177 for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
1178 {
1179 const Elem * neighbor = pr.second;
1180
1181 const dof_id_type neighbor_global_index_by_pid =
1182 _global_index_by_pid_map[neighbor->id()];
1183
1184 graph_row.push_back(neighbor_global_index_by_pid);
1185 }
1186 }
1187
1188 }
1189
assign_partitioning(const MeshBase & mesh,const std::vector<dof_id_type> & parts)1190 void Partitioner::assign_partitioning (const MeshBase & mesh, const std::vector<dof_id_type> & parts)
1191 {
1192 LOG_SCOPE("assign_partitioning()", "ParmetisPartitioner");
1193
1194 // This function must be run on all processors at once
1195 libmesh_parallel_only(mesh.comm());
1196
1197 dof_id_type first_local_elem = 0;
1198 for (auto pid : make_range(mesh.processor_id()))
1199 first_local_elem += _n_active_elem_on_proc[pid];
1200
1201 #ifndef NDEBUG
1202 const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1203 #endif
1204
1205 std::map<processor_id_type, std::vector<dof_id_type>>
1206 requested_ids;
1207
1208 // Results to gather from each processor - kept in a map so we
1209 // do only one loop over elements after all receives are done.
1210 std::map<processor_id_type, std::vector<processor_id_type>>
1211 filled_request;
1212
1213 for (auto & elem : mesh.active_element_ptr_range())
1214 {
1215 // we need to get the index from the owning processor
1216 // (note we cannot assign it now -- we are iterating
1217 // over elements again and this will be bad!)
1218 requested_ids[elem->processor_id()].push_back(elem->id());
1219 }
1220
1221 auto gather_functor =
1222 [this,
1223 & parts,
1224 #ifndef NDEBUG
1225 & mesh,
1226 n_active_local_elem,
1227 #endif
1228 first_local_elem]
1229 (processor_id_type, const std::vector<dof_id_type> & ids,
1230 std::vector<processor_id_type> & data)
1231 {
1232 const std::size_t ids_size = ids.size();
1233 data.resize(ids.size());
1234
1235 for (std::size_t i=0; i != ids_size; i++)
1236 {
1237 const dof_id_type requested_elem_index = ids[i];
1238
1239 libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
1240
1241 const dof_id_type global_index_by_pid =
1242 _global_index_by_pid_map[requested_elem_index];
1243
1244 const dof_id_type local_index =
1245 global_index_by_pid - first_local_elem;
1246
1247 libmesh_assert_less (local_index, parts.size());
1248 libmesh_assert_less (local_index, n_active_local_elem);
1249
1250 const processor_id_type elem_procid =
1251 cast_int<processor_id_type>(parts[local_index]);
1252
1253 libmesh_assert_less (elem_procid, mesh.n_partitions());
1254
1255 data[i] = elem_procid;
1256 }
1257 };
1258
1259 auto action_functor =
1260 [&filled_request]
1261 (processor_id_type pid,
1262 const std::vector<dof_id_type> &,
1263 const std::vector<processor_id_type> & new_procids)
1264 {
1265 filled_request[pid] = new_procids;
1266 };
1267
1268 // Trade requests with other processors
1269 const processor_id_type * ex = nullptr;
1270 Parallel::pull_parallel_vector_data
1271 (mesh.comm(), requested_ids, gather_functor, action_functor, ex);
1272
1273 // and finally assign the partitioning.
1274 // note we are iterating in exactly the same order
1275 // used to build up the request, so we can expect the
1276 // required entries to be in the proper sequence.
1277 std::vector<unsigned int> counters(mesh.n_processors(), 0);
1278 for (auto & elem : mesh.active_element_ptr_range())
1279 {
1280 const processor_id_type current_pid = elem->processor_id();
1281
1282 libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
1283
1284 const processor_id_type elem_procid =
1285 filled_request[current_pid][counters[current_pid]++];
1286
1287 libmesh_assert_less (elem_procid, mesh.n_partitions());
1288 elem->processor_id() = elem_procid;
1289 }
1290 }
1291
1292
1293 } // namespace libMesh
1294