1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2020 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 
18 
19 
20 // Local includes
21 #include "libmesh/partitioner.h"
22 
23 // libMesh includes
24 #include "libmesh/elem.h"
25 #include "libmesh/int_range.h"
26 #include "libmesh/libmesh_logging.h"
27 #include "libmesh/mesh_base.h"
28 #include "libmesh/mesh_tools.h"
29 #include "libmesh/mesh_communication.h"
30 #include "libmesh/parallel_ghost_sync.h"
31 
32 // TIMPI includes
33 #include "timpi/parallel_implementation.h"
34 #include "timpi/parallel_sync.h"
35 
36 // C/C++ includes
37 #ifdef LIBMESH_HAVE_PETSC
38 #include "libmesh/ignore_warnings.h"
39 #include "petscmat.h"
40 #include "libmesh/restore_warnings.h"
41 #endif
42 
43 
44 namespace {
45 
46 using namespace libMesh;
47 
48 struct CorrectProcIds
49 {
50   typedef processor_id_type datum;
51 
CorrectProcIdsCorrectProcIds52   CorrectProcIds(MeshBase & _mesh,
53                  std::unordered_set<dof_id_type> & _bad_pids) :
54     mesh(_mesh), bad_pids(_bad_pids) {}
55 
56   MeshBase & mesh;
57   std::unordered_set<dof_id_type> & bad_pids;
58 
59   // ------------------------------------------------------------
gather_dataCorrectProcIds60   void gather_data (const std::vector<dof_id_type> & ids,
61                     std::vector<datum> & data)
62   {
63     // Find the processor id of each requested node
64     data.resize(ids.size());
65 
66     for (auto i : index_range(ids))
67       {
68         // Look for this point in the mesh and make sure we have a
69         // good pid for it before sending that on
70         if (ids[i] != DofObject::invalid_id &&
71             !bad_pids.count(ids[i]))
72           {
73             Node & node = mesh.node_ref(ids[i]);
74 
75             // Return the node's correct processor id,
76             data[i] = node.processor_id();
77           }
78         else
79           data[i] = DofObject::invalid_processor_id;
80       }
81   }
82 
83   // ------------------------------------------------------------
act_on_dataCorrectProcIds84   bool act_on_data (const std::vector<dof_id_type> & ids,
85                     const std::vector<datum> proc_ids)
86   {
87     bool data_changed = false;
88 
89     // Set the ghost node processor ids we've now been informed of
90     for (auto i : index_range(ids))
91       {
92         Node & node = mesh.node_ref(ids[i]);
93 
94         processor_id_type & proc_id = node.processor_id();
95         const processor_id_type new_proc_id = proc_ids[i];
96 
97         auto it = bad_pids.find(ids[i]);
98 
99         if ((new_proc_id != proc_id ||
100              it != bad_pids.end()) &&
101             new_proc_id != DofObject::invalid_processor_id)
102           {
103             if (it != bad_pids.end())
104               {
105                 proc_id = new_proc_id;
106                 bad_pids.erase(it);
107               }
108             else
109               proc_id = node.choose_processor_id(proc_id, new_proc_id);
110 
111             if (proc_id == new_proc_id)
112               data_changed = true;
113           }
114       }
115 
116     return data_changed;
117   }
118 };
119 
120 }
121 
122 namespace libMesh
123 {
124 
125 
126 
127 // ------------------------------------------------------------
128 // Partitioner static data
129 const dof_id_type Partitioner::communication_blocksize =
130   dof_id_type(1000000);
131 
132 
133 
134 // ------------------------------------------------------------
135 // Partitioner implementation
partition(MeshBase & mesh)136 void Partitioner::partition (MeshBase & mesh)
137 {
138   this->partition(mesh,mesh.n_processors());
139 }
140 
141 
142 
partition(MeshBase & mesh,const unsigned int n)143 void Partitioner::partition (MeshBase & mesh,
144                              const unsigned int n)
145 {
146   libmesh_parallel_only(mesh.comm());
147 
148   // BSK - temporary fix while redistribution is integrated 6/26/2008
149   // Uncomment this to not repartition in parallel
150   //   if (!mesh.is_serial())
151   //     return;
152 
153   // we cannot partition into more pieces than we have
154   // active elements!
155   const unsigned int n_parts =
156     static_cast<unsigned int>
157     (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
158 
159   // Set the number of partitions in the mesh
160   mesh.set_n_partitions()=n_parts;
161 
162   if (n_parts == 1)
163     {
164       this->single_partition (mesh);
165       return;
166     }
167 
168   // First assign a temporary partitioning to any unpartitioned elements
169   Partitioner::partition_unpartitioned_elements(mesh, n_parts);
170 
171   // Call the partitioning function
172   this->_do_partition(mesh,n_parts);
173 
174   // Set the parent's processor ids
175   Partitioner::set_parent_processor_ids(mesh);
176 
177   // Redistribute elements if necessary, before setting node processor
178   // ids, to make sure those will be set consistently
179   mesh.redistribute();
180 
181 #ifdef DEBUG
182   MeshTools::libmesh_assert_valid_remote_elems(mesh);
183 
184   // Messed up elem processor_id()s can leave us without the child
185   // elements we need to restrict vectors on a distributed mesh
186   MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
187 #endif
188 
189   // Set the node's processor ids
190   Partitioner::set_node_processor_ids(mesh);
191 
192 #ifdef DEBUG
193   MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
194 #endif
195 
196   // Give derived Mesh classes a chance to update any cached data to
197   // reflect the new partitioning
198   mesh.update_post_partitioning();
199 }
200 
201 
202 
repartition(MeshBase & mesh)203 void Partitioner::repartition (MeshBase & mesh)
204 {
205   this->repartition(mesh,mesh.n_processors());
206 }
207 
208 
209 
repartition(MeshBase & mesh,const unsigned int n)210 void Partitioner::repartition (MeshBase & mesh,
211                                const unsigned int n)
212 {
213   // we cannot partition into more pieces than we have
214   // active elements!
215   const unsigned int n_parts =
216     static_cast<unsigned int>
217     (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
218 
219   // Set the number of partitions in the mesh
220   mesh.set_n_partitions()=n_parts;
221 
222   if (n_parts == 1)
223     {
224       this->single_partition (mesh);
225       return;
226     }
227 
228   // First assign a temporary partitioning to any unpartitioned elements
229   Partitioner::partition_unpartitioned_elements(mesh, n_parts);
230 
231   // Call the partitioning function
232   this->_do_repartition(mesh,n_parts);
233 
234   // Set the parent's processor ids
235   Partitioner::set_parent_processor_ids(mesh);
236 
237   // Set the node's processor ids
238   Partitioner::set_node_processor_ids(mesh);
239 }
240 
241 
242 
243 
244 
single_partition(MeshBase & mesh)245 void Partitioner::single_partition (MeshBase & mesh)
246 {
247   this->single_partition_range(mesh.elements_begin(),
248                                mesh.elements_end());
249 
250   // Redistribute, in case someone (like our unit tests) is doing
251   // something silly (like moving a whole already-distributed mesh
252   // back onto rank 0).
253   mesh.redistribute();
254 }
255 
256 
257 
single_partition_range(MeshBase::element_iterator it,MeshBase::element_iterator end)258 void Partitioner::single_partition_range (MeshBase::element_iterator it,
259                                           MeshBase::element_iterator end)
260 {
261   LOG_SCOPE("single_partition_range()", "Partitioner");
262 
263   for (auto & elem : as_range(it, end))
264     {
265       elem->processor_id() = 0;
266 
267       // Assign all this element's nodes to processor 0 as well.
268       for (Node & node : elem->node_ref_range())
269         node.processor_id() = 0;
270     }
271 }
272 
partition_unpartitioned_elements(MeshBase & mesh)273 void Partitioner::partition_unpartitioned_elements (MeshBase & mesh)
274 {
275   Partitioner::partition_unpartitioned_elements(mesh, mesh.n_processors());
276 }
277 
278 
279 
partition_unpartitioned_elements(MeshBase & mesh,const unsigned int n_subdomains)280 void Partitioner::partition_unpartitioned_elements (MeshBase & mesh,
281                                                     const unsigned int n_subdomains)
282 {
283   MeshBase::element_iterator       it  = mesh.unpartitioned_elements_begin();
284   const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();
285 
286   const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);
287 
288   // the unpartitioned elements must exist on all processors. If the range is empty on one
289   // it is empty on all, and we can quit right here.
290   if (!n_unpartitioned_elements)
291     return;
292 
293   // find the target subdomain sizes
294   std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
295 
296   for (auto pid : make_range(mesh.n_processors()))
297     {
298       dof_id_type tgt_subdomain_size = 0;
299 
300       // watch out for the case that n_subdomains < n_processors
301       if (pid < n_subdomains)
302         {
303           tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;
304 
305           if (pid < n_unpartitioned_elements%n_subdomains)
306             tgt_subdomain_size++;
307 
308         }
309 
310       //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
311       if (pid == 0)
312         subdomain_bounds[0] = tgt_subdomain_size;
313       else
314         subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
315     }
316 
317   libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);
318 
319   // create the unique mapping for all unpartitioned elements independent of partitioning
320   // determine the global indexing for all the unpartitioned elements
321   std::vector<dof_id_type> global_indices;
322 
323   // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
324   // Only the indices for the elements we pass in are returned in the array.
325   MeshCommunication().find_global_indices (mesh.comm(),
326                                            MeshTools::create_bounding_box(mesh), it, end,
327                                            global_indices);
328 
329   dof_id_type cnt=0;
330   for (auto & elem : as_range(it, end))
331     {
332       libmesh_assert_less (cnt, global_indices.size());
333       const dof_id_type global_index =
334         global_indices[cnt++];
335 
336       libmesh_assert_less (global_index, subdomain_bounds.back());
337       libmesh_assert_less (global_index, n_unpartitioned_elements);
338 
339       const processor_id_type subdomain_id =
340         cast_int<processor_id_type>
341         (std::distance(subdomain_bounds.begin(),
342                        std::upper_bound(subdomain_bounds.begin(),
343                                         subdomain_bounds.end(),
344                                         global_index)));
345       libmesh_assert_less (subdomain_id, n_subdomains);
346 
347       elem->processor_id() = subdomain_id;
348       //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
349     }
350 }
351 
352 
353 
set_parent_processor_ids(MeshBase & mesh)354 void Partitioner::set_parent_processor_ids(MeshBase & mesh)
355 {
356   // Ignore the parameter when !LIBMESH_ENABLE_AMR
357   libmesh_ignore(mesh);
358 
359   LOG_SCOPE("set_parent_processor_ids()", "Partitioner");
360 
361 #ifdef LIBMESH_ENABLE_AMR
362 
363   // If the mesh is serial we have access to all the elements,
364   // in particular all the active ones.  We can therefore set
365   // the parent processor ids indirectly through their children, and
366   // set the subactive processor ids while examining their active
367   // ancestors.
368   // By convention a parent is assigned to the minimum processor
369   // of all its children, and a subactive is assigned to the processor
370   // of its active ancestor.
371   if (mesh.is_serial())
372     {
373       for (auto & elem : mesh.active_element_ptr_range())
374         {
375           // First set descendents
376           std::vector<Elem *> subactive_family;
377           elem->total_family_tree(subactive_family);
378           for (const auto & f : subactive_family)
379             f->processor_id() = elem->processor_id();
380 
381           // Then set ancestors
382           Elem * parent = elem->parent();
383 
384           while (parent)
385             {
386               // invalidate the parent id, otherwise the min below
387               // will not work if the current parent id is less
388               // than all the children!
389               parent->invalidate_processor_id();
390 
391               for (auto & child : parent->child_ref_range())
392                 {
393                   libmesh_assert(!child.is_remote());
394                   libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
395                   parent->processor_id() = std::min(parent->processor_id(),
396                                                     child.processor_id());
397                 }
398               parent = parent->parent();
399             }
400         }
401     }
402 
403   // When the mesh is parallel we cannot guarantee that parents have access to
404   // all their children.
405   else
406     {
407       // Setting subactive processor ids is easy: we can guarantee
408       // that children have access to all their parents.
409 
410       // Loop over all the active elements in the mesh
411       for (auto & child : mesh.active_element_ptr_range())
412         {
413           std::vector<Elem *> subactive_family;
414           child->total_family_tree(subactive_family);
415           for (const auto & f : subactive_family)
416             f->processor_id() = child->processor_id();
417         }
418 
419       // When the mesh is parallel we cannot guarantee that parents have access to
420       // all their children.
421 
422       // We will use a brute-force approach here.  Each processor finds its parent
423       // elements and sets the parent pid to the minimum of its
424       // semilocal descendants.
425       // A global reduction is then performed to make sure the true minimum is found.
426       // As noted, this is required because we cannot guarantee that a parent has
427       // access to all its children on any single processor.
428       libmesh_parallel_only(mesh.comm());
429       libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
430                                        mesh.unpartitioned_elements_end()) == 0);
431 
432       const dof_id_type max_elem_id = mesh.max_elem_id();
433 
434       std::vector<processor_id_type>
435         parent_processor_ids (std::min(communication_blocksize,
436                                        max_elem_id));
437 
438       for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
439         {
440           last_elem_id =
441             std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
442                      max_elem_id);
443           const dof_id_type first_elem_id = blk*communication_blocksize;
444 
445           std::fill (parent_processor_ids.begin(),
446                      parent_processor_ids.end(),
447                      DofObject::invalid_processor_id);
448 
449           // first build up local contributions to parent_processor_ids
450           bool have_parent_in_block = false;
451 
452           for (auto & parent : as_range(mesh.ancestor_elements_begin(),
453                                         mesh.ancestor_elements_end()))
454             {
455               const dof_id_type parent_idx = parent->id();
456               libmesh_assert_less (parent_idx, max_elem_id);
457 
458               if ((parent_idx >= first_elem_id) &&
459                   (parent_idx <  last_elem_id))
460                 {
461                   have_parent_in_block = true;
462                   processor_id_type parent_pid = DofObject::invalid_processor_id;
463 
464                   std::vector<const Elem *> active_family;
465                   parent->active_family_tree(active_family);
466                   for (const auto & f : active_family)
467                     parent_pid = std::min (parent_pid, f->processor_id());
468 
469                   const dof_id_type packed_idx = parent_idx - first_elem_id;
470                   libmesh_assert_less (packed_idx, parent_processor_ids.size());
471 
472                   parent_processor_ids[packed_idx] = parent_pid;
473                 }
474             }
475 
476           // then find the global minimum
477           mesh.comm().min (parent_processor_ids);
478 
479           // and assign the ids, if we have a parent in this block.
480           if (have_parent_in_block)
481             for (auto & parent : as_range(mesh.ancestor_elements_begin(),
482                                           mesh.ancestor_elements_end()))
483               {
484                 const dof_id_type parent_idx = parent->id();
485 
486                 if ((parent_idx >= first_elem_id) &&
487                     (parent_idx <  last_elem_id))
488                   {
489                     const dof_id_type packed_idx = parent_idx - first_elem_id;
490                     libmesh_assert_less (packed_idx, parent_processor_ids.size());
491 
492                     const processor_id_type parent_pid =
493                       parent_processor_ids[packed_idx];
494 
495                     libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);
496 
497                     parent->processor_id() = parent_pid;
498                   }
499               }
500         }
501     }
502 
503 #endif // LIBMESH_ENABLE_AMR
504 }
505 
506 void
processor_pairs_to_interface_nodes(MeshBase & mesh,std::map<std::pair<processor_id_type,processor_id_type>,std::set<dof_id_type>> & processor_pair_to_nodes)507 Partitioner::processor_pairs_to_interface_nodes(MeshBase & mesh,
508                                                 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
509 {
510   // This function must be run on all processors at once
511   libmesh_parallel_only(mesh.comm());
512 
513   processor_pair_to_nodes.clear();
514 
515   std::set<dof_id_type> mynodes;
516   std::set<dof_id_type> neighbor_nodes;
517   std::vector<dof_id_type> common_nodes;
518 
519   // Loop over all the active elements
520   for (auto & elem : mesh.active_element_ptr_range())
521     {
522       libmesh_assert(elem);
523 
524       libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
525 
526       auto n_nodes = elem->n_nodes();
527 
528       // prepare data for this element
529       mynodes.clear();
530       neighbor_nodes.clear();
531       common_nodes.clear();
532 
533       for (unsigned int inode = 0; inode < n_nodes; inode++)
534         mynodes.insert(elem->node_id(inode));
535 
536       for (auto i : elem->side_index_range())
537         {
538           auto neigh = elem->neighbor_ptr(i);
539           if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
540             {
541               neighbor_nodes.clear();
542               common_nodes.clear();
543               auto neigh_n_nodes = neigh->n_nodes();
544               for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
545                 neighbor_nodes.insert(neigh->node_id(inode));
546 
547               std::set_intersection(mynodes.begin(), mynodes.end(),
548                                     neighbor_nodes.begin(), neighbor_nodes.end(),
549                                     std::back_inserter(common_nodes));
550 
551               auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
552                                                                       std::max(elem->processor_id(), neigh->processor_id()))];
553               for (auto global_node_id : common_nodes)
554                 map_set.insert(global_node_id);
555             }
556         }
557     }
558 }
559 
set_interface_node_processor_ids_linear(MeshBase & mesh)560 void Partitioner::set_interface_node_processor_ids_linear(MeshBase & mesh)
561 {
562   // This function must be run on all processors at once
563   libmesh_parallel_only(mesh.comm());
564 
565   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
566 
567   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
568 
569   for (auto & pmap : processor_pair_to_nodes)
570     {
571       std::size_t n_own_nodes = pmap.second.size()/2, i = 0;
572 
573       for (dof_id_type id : pmap.second)
574         {
575           auto & node = mesh.node_ref(id);
576           if (i <= n_own_nodes)
577             node.processor_id() = pmap.first.first;
578           else
579             node.processor_id() = pmap.first.second;
580           i++;
581         }
582     }
583 }
584 
set_interface_node_processor_ids_BFS(MeshBase & mesh)585 void Partitioner::set_interface_node_processor_ids_BFS(MeshBase & mesh)
586 {
587   // This function must be run on all processors at once
588   libmesh_parallel_only(mesh.comm());
589 
590   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
591 
592   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
593 
594   std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;
595 
596   MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
597 
598   std::vector<const Node *>  neighbors;
599   std::set<dof_id_type> neighbors_order;
600   std::vector<dof_id_type> common_nodes;
601   std::queue<dof_id_type> nodes_queue;
602   std::set<dof_id_type> visted_nodes;
603 
604   for (auto & pmap : processor_pair_to_nodes)
605     {
606       std::size_t n_own_nodes = pmap.second.size()/2;
607 
608       // Initialize node assignment
609       for (dof_id_type id : pmap.second)
610         mesh.node_ref(id).processor_id() = pmap.first.second;
611 
612       visted_nodes.clear();
613       for (dof_id_type id : pmap.second)
614         {
615           mesh.node_ref(id).processor_id() = pmap.first.second;
616 
617           if (visted_nodes.find(id) != visted_nodes.end())
618             continue;
619           else
620             {
621               nodes_queue.push(id);
622               visted_nodes.insert(id);
623               if (visted_nodes.size() >= n_own_nodes)
624                 break;
625             }
626 
627           while (!nodes_queue.empty())
628             {
629               auto & node = mesh.node_ref(nodes_queue.front());
630               nodes_queue.pop();
631 
632               neighbors.clear();
633               MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
634               neighbors_order.clear();
635               for (auto & neighbor : neighbors)
636                 neighbors_order.insert(neighbor->id());
637 
638               common_nodes.clear();
639               std::set_intersection(pmap.second.begin(), pmap.second.end(),
640                                     neighbors_order.begin(), neighbors_order.end(),
641                                     std::back_inserter(common_nodes));
642 
643               for (auto c_node : common_nodes)
644                 if (visted_nodes.find(c_node) == visted_nodes.end())
645                   {
646                     nodes_queue.push(c_node);
647                     visted_nodes.insert(c_node);
648                     if (visted_nodes.size() >= n_own_nodes)
649                       goto queue_done;
650                   }
651 
652               if (visted_nodes.size() >= n_own_nodes)
653                 goto queue_done;
654             }
655         }
656     queue_done:
657       for (auto node : visted_nodes)
658         mesh.node_ref(node).processor_id() = pmap.first.first;
659     }
660 }
661 
set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)662 void Partitioner::set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)
663 {
664   libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC
665 
666   // This function must be run on all processors at once
667   libmesh_parallel_only(mesh.comm());
668 
669 #if LIBMESH_HAVE_PETSC
670   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
671 
672   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
673 
674   std::vector<std::vector<const Elem *>> nodes_to_elem_map;
675 
676   MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
677 
678   std::vector<const Node *>  neighbors;
679   std::set<dof_id_type> neighbors_order;
680   std::vector<dof_id_type> common_nodes;
681 
682   std::vector<dof_id_type> rows;
683   std::vector<dof_id_type> cols;
684 
685   std::map<dof_id_type, dof_id_type> global_to_local;
686 
687   for (auto & pmap : processor_pair_to_nodes)
688     {
689       unsigned int i = 0;
690 
691       rows.clear();
692       rows.resize(pmap.second.size()+1);
693       cols.clear();
694       for (dof_id_type id : pmap.second)
695         global_to_local[id] = i++;
696 
697       i = 0;
698       for (auto id : pmap.second)
699         {
700           auto & node = mesh.node_ref(id);
701           neighbors.clear();
702           MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
703           neighbors_order.clear();
704           for (auto & neighbor : neighbors)
705             neighbors_order.insert(neighbor->id());
706 
707           common_nodes.clear();
708           std::set_intersection(pmap.second.begin(), pmap.second.end(),
709                                 neighbors_order.begin(), neighbors_order.end(),
710                                 std::back_inserter(common_nodes));
711 
712           rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());
713 
714           for (auto c_node : common_nodes)
715             cols.push_back(global_to_local[c_node]);
716 
717           i++;
718         }
719 
720       Mat adj;
721       MatPartitioning part;
722       IS is;
723       PetscInt local_size, rows_size, cols_size;
724       PetscInt *adj_i, *adj_j;
725       const PetscInt *indices;
726       PetscCalloc1(rows.size(), &adj_i);
727       PetscCalloc1(cols.size(), &adj_j);
728       rows_size = cast_int<PetscInt>(rows.size());
729       for (PetscInt ii=0; ii<rows_size; ii++)
730         adj_i[ii] = rows[ii];
731 
732       cols_size = cast_int<PetscInt>(cols.size());
733       for (PetscInt ii=0; ii<cols_size; ii++)
734         adj_j[ii] = cols[ii];
735 
736       const PetscInt sz = cast_int<PetscInt>(pmap.second.size());
737       MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j,nullptr,&adj);
738       MatPartitioningCreate(PETSC_COMM_SELF,&part);
739       MatPartitioningSetAdjacency(part,adj);
740       MatPartitioningSetNParts(part,2);
741       PetscObjectSetOptionsPrefix((PetscObject)part, "balance_");
742       MatPartitioningSetFromOptions(part);
743       MatPartitioningApply(part,&is);
744 
745       MatDestroy(&adj);
746       MatPartitioningDestroy(&part);
747 
748       ISGetLocalSize(is, &local_size);
749       ISGetIndices(is, &indices);
750       i = 0;
751       for (auto id : pmap.second)
752         {
753           auto & node = mesh.node_ref(id);
754           if (indices[i])
755             node.processor_id() = pmap.first.second;
756           else
757             node.processor_id() = pmap.first.first;
758 
759           i++;
760         }
761       ISRestoreIndices(is, &indices);
762       ISDestroy(&is);
763     }
764 #else
765   libmesh_error_msg("PETSc is required");
766 #endif
767 }
768 
769 
set_node_processor_ids(MeshBase & mesh)770 void Partitioner::set_node_processor_ids(MeshBase & mesh)
771 {
772   LOG_SCOPE("set_node_processor_ids()","Partitioner");
773 
774   // This function must be run on all processors at once
775   libmesh_parallel_only(mesh.comm());
776 
777   // If we have any unpartitioned elements at this
778   // stage there is a problem
779   libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
780                                     mesh.unpartitioned_elements_end()) == 0);
781 
782   // Start from scratch here: nodes we used to own may not be
783   // eligible for us to own any more.
784   for (auto & node : mesh.node_ptr_range())
785     {
786       node->processor_id() = DofObject::invalid_processor_id;
787     }
788 
789   // Loop over all the active elements
790   for (auto & elem : mesh.active_element_ptr_range())
791     {
792       libmesh_assert(elem);
793 
794       libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
795 
796       // Consider updating the processor id on this element's nodes
797       for (Node & node : elem->node_ref_range())
798         {
799           processor_id_type & pid = node.processor_id();
800           pid = node.choose_processor_id(pid, elem->processor_id());
801         }
802     }
803 
804   // How we finish off the node partitioning depends on our command
805   // line options.
806 
807   const bool load_balanced_nodes_linear =
808       libMesh::on_command_line ("--load-balanced-nodes-linear");
809 
810   const bool load_balanced_nodes_bfs =
811        libMesh::on_command_line ("--load-balanced-nodes-bfs");
812 
813   const bool load_balanced_nodes_petscpartition =
814       libMesh::on_command_line ("--load-balanced-nodes-petscpartitioner");
815 
816   unsigned int n_load_balance_options = load_balanced_nodes_linear;
817   n_load_balance_options += load_balanced_nodes_bfs;
818   n_load_balance_options += load_balanced_nodes_petscpartition;
819   libmesh_error_msg_if(n_load_balance_options > 1,
820                        "Cannot perform more than one load balancing type at a time");
821 
822   if (load_balanced_nodes_linear)
823     set_interface_node_processor_ids_linear(mesh);
824   else if (load_balanced_nodes_bfs)
825     set_interface_node_processor_ids_BFS(mesh);
826   else if (load_balanced_nodes_petscpartition)
827     set_interface_node_processor_ids_petscpartitioner(mesh);
828 
829    // Node balancing algorithm will response to assign owned nodes.
830    // We still need to sync PIDs
831   {
832     // For inactive elements, we will have already gotten most of
833     // these nodes, *except* for the case of a parent with a subset
834     // of active descendants which are remote elements.  In that
835     // case some of the parent nodes will not have been properly
836     // handled yet on our processor.
837     //
838     // We don't want to inadvertently give one of them an incorrect
839     // processor id, but if we're not in serial then we have to
840     // assign them temporary pids to make querying work, so we'll
841     // save our *valid* pids before assigning temporaries.
842     //
843     // Even in serial we'll want to check and make sure we're not
844     // overwriting valid active node pids with pids from subactive
845     // elements.
846     std::unordered_set<dof_id_type> bad_pids;
847 
848     for (auto & node : mesh.node_ptr_range())
849       if (node->processor_id() == DofObject::invalid_processor_id)
850         bad_pids.insert(node->id());
851 
852     // If we assign our temporary ids by looping from finer elements
853     // to coarser elements, we'll always get an id from the finest
854     // ghost element we can see, which will usually be "closer" to
855     // the true processor we want to query and so will reduce query
856     // cycles that don't reach that processor.
857 
858     // But we can still end up with a query cycle that dead-ends, so
859     // we need to prepare a "push" communication step here.
860 
861     const bool is_serial = mesh.is_serial();
862     std::unordered_map
863       <processor_id_type,
864        std::unordered_map<dof_id_type, processor_id_type>>
865       potential_pids;
866 
867     const unsigned int n_levels = MeshTools::n_levels(mesh);
868     for (unsigned int level = n_levels; level > 0; --level)
869       {
870         for (auto & elem : as_range(mesh.level_elements_begin(level-1),
871                                     mesh.level_elements_end(level-1)))
872           {
873             libmesh_assert_not_equal_to (elem->processor_id(),
874                                          DofObject::invalid_processor_id);
875 
876             const processor_id_type elem_pid = elem->processor_id();
877 
878             // Consider updating the processor id on this element's nodes
879             for (Node & node : elem->node_ref_range())
880               {
881                 processor_id_type & pid = node.processor_id();
882                 if (bad_pids.count(node.id()))
883                   pid = node.choose_processor_id(pid, elem_pid);
884                 else if (!is_serial)
885                   potential_pids[elem_pid][node.id()] = pid;
886               }
887           }
888       }
889 
890     if (!is_serial)
891       {
892         std::unordered_map
893           <processor_id_type,
894            std::vector<std::pair<dof_id_type, processor_id_type>>>
895           potential_pids_vecs;
896 
897         for (auto & pair : potential_pids)
898           potential_pids_vecs[pair.first].assign(pair.second.begin(), pair.second.end());
899 
900         auto pids_action_functor =
901           [& mesh, & bad_pids]
902           (processor_id_type /* src_pid */,
903            const std::vector<std::pair<dof_id_type, processor_id_type>> & data)
904           {
905             for (auto pair : data)
906               {
907                 Node & node = mesh.node_ref(pair.first);
908                 processor_id_type & pid = node.processor_id();
909                 auto it = bad_pids.find(pair.first);
910                 if (it != bad_pids.end())
911                   {
912                     pid = pair.second;
913                     bad_pids.erase(it);
914                   }
915                 else
916                   pid = node.choose_processor_id(pid, pair.second);
917               }
918           };
919 
920         Parallel::push_parallel_vector_data
921           (mesh.comm(), potential_pids_vecs, pids_action_functor);
922 
923         // Using default libMesh options, we'll just need to sync
924         // between processors now.  The catch here is that we can't
925         // initially trust Node::choose_processor_id() because some
926         // of those node processor ids are the temporary ones.
927         CorrectProcIds correct_pids(mesh, bad_pids);
928         Parallel::sync_node_data_by_element_id
929           (mesh, mesh.elements_begin(), mesh.elements_end(),
930            Parallel::SyncEverything(), Parallel::SyncEverything(),
931            correct_pids);
932 
933         // But once we've got all the non-temporary pids synced, we
934         // may need to sync again to get any pids on nodes only
935         // connected to subactive elements, for which *only*
936         // "temporary" pids are possible.
937         bad_pids.clear();
938         Parallel::sync_node_data_by_element_id
939           (mesh,
940            mesh.elements_begin(), mesh.elements_end(),
941            Parallel::SyncEverything(), Parallel::SyncEverything(),
942            correct_pids);
943       }
944   }
945 
946   // We can't assert that all nodes are connected to elements, because
947   // a DistributedMesh with NodeConstraints might have pulled in some
948   // remote nodes solely for evaluating those constraints.
949   // MeshTools::libmesh_assert_connected_nodes(mesh);
950 
951 #ifdef DEBUG
952   MeshTools::libmesh_assert_valid_procids<Node>(mesh);
953   //MeshTools::libmesh_assert_canonical_node_procids(mesh);
954 #endif
955 }
956 
957 
958 
959 struct SyncLocalIDs
960 {
961   typedef dof_id_type datum;
962 
963   typedef std::unordered_map<dof_id_type, dof_id_type> map_type;
964 
SyncLocalIDsSyncLocalIDs965   SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}
966 
967   map_type & id_map;
968 
gather_dataSyncLocalIDs969   void gather_data (const std::vector<dof_id_type> & ids,
970                     std::vector<datum> & local_ids) const
971   {
972     local_ids.resize(ids.size());
973 
974     for (auto i : index_range(ids))
975       local_ids[i] = id_map[ids[i]];
976   }
977 
act_on_dataSyncLocalIDs978   void act_on_data (const std::vector<dof_id_type> & ids,
979                     const std::vector<datum> & local_ids)
980   {
981     for (auto i : index_range(local_ids))
982       id_map[ids[i]] = local_ids[i];
983   }
984 };
985 
_find_global_index_by_pid_map(const MeshBase & mesh)986 void Partitioner::_find_global_index_by_pid_map(const MeshBase & mesh)
987 {
988   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
989 
990   // Find the number of active elements on each processor.  We cannot use
991   // mesh.n_active_elem_on_proc(pid) since that only returns the number of
992   // elements assigned to pid which are currently stored on the calling
993   // processor. This will not in general be correct for parallel meshes
994   // when (pid!=mesh.processor_id()).
995   auto n_proc = mesh.n_processors();
996   _n_active_elem_on_proc.resize(n_proc);
997   mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
998 
999   std::vector<dof_id_type> n_active_elem_before_proc(mesh.n_processors());
1000 
1001   for (auto i : make_range(n_proc-1))
1002     n_active_elem_before_proc[i+1] =
1003       n_active_elem_before_proc[i] + _n_active_elem_on_proc[i];
1004 
1005   libMesh::BoundingBox bbox =
1006     MeshTools::create_bounding_box(mesh);
1007 
1008   _global_index_by_pid_map.clear();
1009 
1010   // create the mapping which is contiguous by processor
1011   MeshCommunication().find_local_indices (bbox,
1012                                           mesh.active_local_elements_begin(),
1013                                           mesh.active_local_elements_end(),
1014                                           _global_index_by_pid_map);
1015 
1016   SyncLocalIDs sync(_global_index_by_pid_map);
1017 
1018   Parallel::sync_dofobject_data_by_id
1019       (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);
1020 
1021   for (const auto & elem : mesh.active_element_ptr_range())
1022     {
1023       const processor_id_type pid = elem->processor_id();
1024       libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);
1025 
1026       _global_index_by_pid_map[elem->id()] += n_active_elem_before_proc[pid];
1027     }
1028 }
1029 
build_graph(const MeshBase & mesh)1030 void Partitioner::build_graph (const MeshBase & mesh)
1031 {
1032   LOG_SCOPE("build_graph()", "ParmetisPartitioner");
1033 
1034   const dof_id_type n_active_local_elem  = mesh.n_active_local_elem();
1035   // If we have boundary elements in this mesh, we want to account for
1036   // the connectivity between them and interior elements.  We can find
1037   // interior elements from boundary elements, but we need to build up
1038   // a lookup map to do the reverse.
1039   typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
1040   map_type interior_to_boundary_map;
1041 
1042   for (const auto & elem : mesh.active_element_ptr_range())
1043     {
1044       // If we don't have an interior_parent then there's nothing to look us
1045       // up.
1046       if ((elem->dim() >= LIBMESH_DIM) ||
1047           !elem->interior_parent())
1048         continue;
1049 
1050       // get all relevant interior elements
1051       std::set<const Elem *> neighbor_set;
1052       elem->find_interior_neighbors(neighbor_set);
1053 
1054       for (const auto & neighbor : neighbor_set)
1055         interior_to_boundary_map.emplace(neighbor, elem);
1056     }
1057 
1058 #ifdef LIBMESH_ENABLE_AMR
1059   std::vector<const Elem *> neighbors_offspring;
1060 #endif
1061 
1062   // This is costly, and we only need to do it if the mesh has
1063   // changed since we last partitioned... but the mesh probably has
1064   // changed since we last partitioned, and if it hasn't we don't
1065   // have a reliable way to be sure of that.
1066   _find_global_index_by_pid_map(mesh);
1067 
1068   dof_id_type first_local_elem = 0;
1069   for (auto pid : make_range(mesh.processor_id()))
1070      first_local_elem += _n_active_elem_on_proc[pid];
1071 
1072   _dual_graph.clear();
1073   _dual_graph.resize(n_active_local_elem);
1074   _local_id_to_elem.resize(n_active_local_elem);
1075 
1076   for (const auto & elem : mesh.active_local_element_ptr_range())
1077     {
1078       libmesh_assert (_global_index_by_pid_map.count(elem->id()));
1079       const dof_id_type global_index_by_pid =
1080         _global_index_by_pid_map[elem->id()];
1081 
1082       const dof_id_type local_index =
1083         global_index_by_pid - first_local_elem;
1084       libmesh_assert_less (local_index, n_active_local_elem);
1085 
1086       std::vector<dof_id_type> & graph_row = _dual_graph[local_index];
1087 
1088       // Save this off to make it easy to index later
1089       _local_id_to_elem[local_index] = elem;
1090 
1091       // Loop over the element's neighbors.  An element
1092       // adjacency corresponds to a face neighbor
1093       for (auto neighbor : elem->neighbor_ptr_range())
1094         {
1095           if (neighbor != nullptr)
1096             {
1097               // If the neighbor is active treat it
1098               // as a connection
1099               if (neighbor->active())
1100                 {
1101                   libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
1102                   const dof_id_type neighbor_global_index_by_pid =
1103                     _global_index_by_pid_map[neighbor->id()];
1104 
1105                   graph_row.push_back(neighbor_global_index_by_pid);
1106                 }
1107 
1108 #ifdef LIBMESH_ENABLE_AMR
1109 
1110               // Otherwise we need to find all of the
1111               // neighbor's children that are connected to
1112               // us and add them
1113               else
1114                 {
1115                   // The side of the neighbor to which
1116                   // we are connected
1117                   const unsigned int ns =
1118                     neighbor->which_neighbor_am_i (elem);
1119                   libmesh_assert_less (ns, neighbor->n_neighbors());
1120 
1121                   // Get all the active children (& grandchildren, etc...)
1122                   // of the neighbor
1123 
1124                   // FIXME - this is the wrong thing, since we
1125                   // should be getting the active family tree on
1126                   // our side only.  But adding too many graph
1127                   // links may cause hanging nodes to tend to be
1128                   // on partition interiors, which would reduce
1129                   // communication overhead for constraint
1130                   // equations, so we'll leave it.
1131 
1132                   neighbor->active_family_tree (neighbors_offspring);
1133 
1134                   // Get all the neighbor's children that
1135                   // live on that side and are thus connected
1136                   // to us
1137                   for (const auto & child : neighbors_offspring)
1138                     {
1139                       // This does not assume a level-1 mesh.
1140                       // Note that since children have sides numbered
1141                       // coincident with the parent then this is a sufficient test.
1142                       if (child->neighbor_ptr(ns) == elem)
1143                         {
1144                           libmesh_assert (child->active());
1145                           libmesh_assert (_global_index_by_pid_map.count(child->id()));
1146                           const dof_id_type child_global_index_by_pid =
1147                             _global_index_by_pid_map[child->id()];
1148 
1149                           graph_row.push_back(child_global_index_by_pid);
1150                         }
1151                     }
1152                 }
1153 
1154 #endif /* ifdef LIBMESH_ENABLE_AMR */
1155 
1156 
1157             }
1158         }
1159 
1160       if ((elem->dim() < LIBMESH_DIM) &&
1161           elem->interior_parent())
1162         {
1163           // get all relevant interior elements
1164           std::set<const Elem *> neighbor_set;
1165           elem->find_interior_neighbors(neighbor_set);
1166 
1167           for (const auto & neighbor : neighbor_set)
1168             {
1169               const dof_id_type neighbor_global_index_by_pid =
1170                 _global_index_by_pid_map[neighbor->id()];
1171 
1172               graph_row.push_back(neighbor_global_index_by_pid);
1173             }
1174         }
1175 
1176       // Check for any boundary neighbors
1177       for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
1178         {
1179           const Elem * neighbor = pr.second;
1180 
1181           const dof_id_type neighbor_global_index_by_pid =
1182             _global_index_by_pid_map[neighbor->id()];
1183 
1184           graph_row.push_back(neighbor_global_index_by_pid);
1185         }
1186     }
1187 
1188 }
1189 
assign_partitioning(const MeshBase & mesh,const std::vector<dof_id_type> & parts)1190 void Partitioner::assign_partitioning (const MeshBase & mesh, const std::vector<dof_id_type> & parts)
1191 {
1192   LOG_SCOPE("assign_partitioning()", "ParmetisPartitioner");
1193 
1194   // This function must be run on all processors at once
1195   libmesh_parallel_only(mesh.comm());
1196 
1197   dof_id_type first_local_elem = 0;
1198   for (auto pid : make_range(mesh.processor_id()))
1199     first_local_elem += _n_active_elem_on_proc[pid];
1200 
1201 #ifndef NDEBUG
1202   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1203 #endif
1204 
1205   std::map<processor_id_type, std::vector<dof_id_type>>
1206     requested_ids;
1207 
1208   // Results to gather from each processor - kept in a map so we
1209   // do only one loop over elements after all receives are done.
1210   std::map<processor_id_type, std::vector<processor_id_type>>
1211     filled_request;
1212 
1213   for (auto & elem : mesh.active_element_ptr_range())
1214     {
1215       // we need to get the index from the owning processor
1216       // (note we cannot assign it now -- we are iterating
1217       // over elements again and this will be bad!)
1218       requested_ids[elem->processor_id()].push_back(elem->id());
1219     }
1220 
1221   auto gather_functor =
1222     [this,
1223      & parts,
1224 #ifndef NDEBUG
1225      & mesh,
1226      n_active_local_elem,
1227 #endif
1228      first_local_elem]
1229     (processor_id_type, const std::vector<dof_id_type> & ids,
1230      std::vector<processor_id_type> & data)
1231     {
1232       const std::size_t ids_size = ids.size();
1233       data.resize(ids.size());
1234 
1235       for (std::size_t i=0; i != ids_size; i++)
1236         {
1237           const dof_id_type requested_elem_index = ids[i];
1238 
1239           libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
1240 
1241           const dof_id_type global_index_by_pid =
1242             _global_index_by_pid_map[requested_elem_index];
1243 
1244           const dof_id_type local_index =
1245             global_index_by_pid - first_local_elem;
1246 
1247           libmesh_assert_less (local_index, parts.size());
1248           libmesh_assert_less (local_index, n_active_local_elem);
1249 
1250           const processor_id_type elem_procid =
1251             cast_int<processor_id_type>(parts[local_index]);
1252 
1253           libmesh_assert_less (elem_procid, mesh.n_partitions());
1254 
1255           data[i] = elem_procid;
1256         }
1257     };
1258 
1259   auto action_functor =
1260     [&filled_request]
1261     (processor_id_type pid,
1262      const std::vector<dof_id_type> &,
1263      const std::vector<processor_id_type> & new_procids)
1264     {
1265       filled_request[pid] = new_procids;
1266     };
1267 
1268   // Trade requests with other processors
1269   const processor_id_type * ex = nullptr;
1270   Parallel::pull_parallel_vector_data
1271     (mesh.comm(), requested_ids, gather_functor, action_functor, ex);
1272 
1273   // and finally assign the partitioning.
1274   // note we are iterating in exactly the same order
1275   // used to build up the request, so we can expect the
1276   // required entries to be in the proper sequence.
1277   std::vector<unsigned int> counters(mesh.n_processors(), 0);
1278   for (auto & elem : mesh.active_element_ptr_range())
1279     {
1280       const processor_id_type current_pid = elem->processor_id();
1281 
1282       libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
1283 
1284       const processor_id_type elem_procid =
1285         filled_request[current_pid][counters[current_pid]++];
1286 
1287       libmesh_assert_less (elem_procid, mesh.n_partitions());
1288       elem->processor_id() = elem_procid;
1289     }
1290 }
1291 
1292 
1293 } // namespace libMesh
1294