1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2020 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // Lesser General Public  License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 
18 
19 
20 // Local includes
21 #include "libmesh/sparsity_pattern.h"
22 
23 // libMesh includes
24 #include "libmesh/coupling_matrix.h"
25 #include "libmesh/dof_map.h"
26 #include "libmesh/elem.h"
27 #include "libmesh/ghosting_functor.h"
28 #include "libmesh/hashword.h"
29 #include "libmesh/parallel_algebra.h"
30 #include "libmesh/parallel.h"
31 
32 // TIMPI includes
33 #include "timpi/communicator.h"
34 
35 
36 namespace libMesh
37 {
38 namespace SparsityPattern
39 {
40 
41 //-------------------------------------------------------
42 // we need to implement these constructors here so that
43 // a full DofMap definition is available.
Build(const MeshBase & mesh_in,const DofMap & dof_map_in,const CouplingMatrix * dof_coupling_in,const std::set<GhostingFunctor * > & coupling_functors_in,const bool implicit_neighbor_dofs_in,const bool need_full_sparsity_pattern_in)44 Build::Build (const MeshBase & mesh_in,
45               const DofMap & dof_map_in,
46               const CouplingMatrix * dof_coupling_in,
47               const std::set<GhostingFunctor *> & coupling_functors_in,
48               const bool implicit_neighbor_dofs_in,
49               const bool need_full_sparsity_pattern_in) :
50   ParallelObject(dof_map_in),
51   mesh(mesh_in),
52   dof_map(dof_map_in),
53   dof_coupling(dof_coupling_in),
54   coupling_functors(coupling_functors_in),
55   implicit_neighbor_dofs(implicit_neighbor_dofs_in),
56   need_full_sparsity_pattern(need_full_sparsity_pattern_in),
57   sparsity_pattern(),
58   nonlocal_pattern(),
59   n_nz(),
60   n_oz()
61 {}
62 
63 
64 
Build(Build & other,Threads::split)65 Build::Build (Build & other, Threads::split) :
66   ParallelObject(other),
67   mesh(other.mesh),
68   dof_map(other.dof_map),
69   dof_coupling(other.dof_coupling),
70   coupling_functors(other.coupling_functors),
71   implicit_neighbor_dofs(other.implicit_neighbor_dofs),
72   need_full_sparsity_pattern(other.need_full_sparsity_pattern),
73   hashed_dof_sets(other.hashed_dof_sets),
74   sparsity_pattern(),
75   nonlocal_pattern(),
76   n_nz(),
77   n_oz()
78 {}
79 
80 
81 
82 #if defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER)
83 
_dummy_function(void)84 void _dummy_function(void) {}
85 
86 #endif
87 
88 
89 
sorted_connected_dofs(const Elem * elem,std::vector<dof_id_type> & dofs_vi,unsigned int vi)90 void Build::sorted_connected_dofs(const Elem * elem,
91                                   std::vector<dof_id_type> & dofs_vi,
92                                   unsigned int vi)
93 {
94   dof_map.dof_indices (elem, dofs_vi, vi);
95 #ifdef LIBMESH_ENABLE_CONSTRAINTS
96   dof_map.find_connected_dofs (dofs_vi);
97 #endif
98   // We can be more efficient if we sort the element DOFs into
99   // increasing order
100   std::sort(dofs_vi.begin(), dofs_vi.end());
101 
102   // Handle cases where duplicate nodes are intentionally assigned to
103   // a single element.
104   dofs_vi.erase(std::unique(dofs_vi.begin(), dofs_vi.end()), dofs_vi.end());
105 }
106 
107 
108 
handle_vi_vj(const std::vector<dof_id_type> & element_dofs_i,const std::vector<dof_id_type> & element_dofs_j)109 void Build::handle_vi_vj(const std::vector<dof_id_type> & element_dofs_i,
110                          const std::vector<dof_id_type> & element_dofs_j)
111 {
112   const unsigned int n_dofs_on_element_i =
113     cast_int<unsigned int>(element_dofs_i.size());
114 
115   const processor_id_type proc_id     = mesh.processor_id();
116   const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
117   const dof_id_type end_dof_on_proc   = dof_map.end_dof(proc_id);
118 
119   std::vector<dof_id_type>
120     dofs_to_add;
121 
122   const unsigned int n_dofs_on_element_j =
123     cast_int<unsigned int>(element_dofs_j.size());
124 
125   // It only makes sense to compute hashes and see if we can skip
126   // doing work when there are a "large" amount of DOFs for a given
127   // element. The cutoff for "large" is somewhat arbitrarily chosen
128   // based on a test case with a spider node that resulted in O(10^3)
129   // entries in element_dofs_i for O(10^3) elements. Making this
130   // number larger will disable the hashing optimization in more
131   // cases.
132   bool dofs_seen = false;
133   if (n_dofs_on_element_j > 0 && n_dofs_on_element_i > 256)
134     {
135       auto hash_i = Utility::hashword(element_dofs_i);
136       auto hash_j = Utility::hashword(element_dofs_j);
137       auto final_hash = Utility::hashword2(hash_i, hash_j);
138       auto result = hashed_dof_sets.insert(final_hash);
139       // if insert failed, we have already seen these dofs
140       dofs_seen = !result.second;
141     }
142 
143   // there might be 0 dofs for the other variable on the same element
144   // (when subdomain variables do not overlap) and that's when we do
145   // not do anything
146   if (n_dofs_on_element_j > 0 && !dofs_seen)
147     {
148       for (unsigned int i=0; i<n_dofs_on_element_i; i++)
149         {
150           const dof_id_type ig = element_dofs_i[i];
151 
152           SparsityPattern::Row * row;
153 
154           // We save non-local row components for now so we can
155           // communicate them to other processors later.
156 
157           if ((ig >= first_dof_on_proc) &&
158               (ig <  end_dof_on_proc))
159             {
160               // This is what I mean
161               // libmesh_assert_greater_equal ((ig - first_dof_on_proc), 0);
162               // but do the test like this because ig and
163               // first_dof_on_proc are unsigned ints
164               libmesh_assert_greater_equal (ig, first_dof_on_proc);
165               libmesh_assert_less (ig, (sparsity_pattern.size() +
166                                         first_dof_on_proc));
167 
168               row = &sparsity_pattern[ig - first_dof_on_proc];
169             }
170           else
171             {
172               row = &nonlocal_pattern[ig];
173             }
174 
175           // If the row is empty we will add *all*
176           // the element j DOFs, so just do that.
177           if (row->empty())
178             {
179               row->insert(row->end(),
180                           element_dofs_j.begin(),
181                           element_dofs_j.end());
182             }
183           else
184             {
185               // Build a list of the DOF indices not found in the
186               // sparsity pattern
187               dofs_to_add.clear();
188 
189               // Cache iterators.  Low will move forward, subsequent
190               // searches will be on smaller ranges
191               SparsityPattern::Row::iterator
192                 low  = std::lower_bound
193                 (row->begin(), row->end(), element_dofs_j.front()),
194                 high = std::upper_bound
195                 (low,          row->end(), element_dofs_j.back());
196 
197               for (unsigned int j=0; j<n_dofs_on_element_j; j++)
198                 {
199                   const dof_id_type jg = element_dofs_j[j];
200 
201                   // See if jg is in the sorted range
202                   std::pair<SparsityPattern::Row::iterator,
203                             SparsityPattern::Row::iterator>
204                     pos = std::equal_range (low, high, jg);
205 
206                   // Must add jg if it wasn't found
207                   if (pos.first == pos.second)
208                     dofs_to_add.push_back(jg);
209 
210                   // pos.first is now a valid lower bound for any
211                   // remaining element j DOFs. (That's why we sorted them.)
212                   // Use it for the next search
213                   low = pos.first;
214                 }
215 
216               // Add to the sparsity pattern
217               if (!dofs_to_add.empty())
218                 {
219                   const std::size_t old_size = row->size();
220 
221                   row->insert (row->end(),
222                                dofs_to_add.begin(),
223                                dofs_to_add.end());
224 
225                   SparsityPattern::sort_row
226                     (row->begin(), row->begin()+old_size,
227                      row->end());
228                 }
229             }
230         } // End dofs-of-var-i loop
231     } // End if-dofs-of-var-j
232 }
233 
234 
235 
operator()236 void Build::operator()(const ConstElemRange & range)
237 {
238   // Compute the sparsity structure of the global matrix.  This can be
239   // fed into a PetscMatrix to allocate exactly the number of nonzeros
240   // necessary to store the matrix.  This algorithm should be linear
241   // in the (# of elements)*(# nodes per element)
242   const processor_id_type proc_id           = mesh.processor_id();
243   const dof_id_type n_dofs_on_proc    = dof_map.n_dofs_on_processor(proc_id);
244   const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
245   const dof_id_type end_dof_on_proc   = dof_map.end_dof(proc_id);
246 
247   sparsity_pattern.resize(n_dofs_on_proc);
248 
249   // Handle dof coupling specified by library and user coupling functors
250   {
251     const unsigned int n_var = dof_map.n_variables();
252 
253     std::vector<std::vector<dof_id_type> > element_dofs_i(n_var);
254 
255     std::vector<const Elem *> coupled_neighbors;
256     for (const auto & elem : range)
257       {
258         // Make some fake element iterators defining a range
259         // pointing to only this element.
260         Elem * const * elempp = const_cast<Elem * const *>(&elem);
261         Elem * const * elemend = elempp+1;
262 
263         const MeshBase::const_element_iterator fake_elem_it =
264           MeshBase::const_element_iterator(elempp,
265                                            elemend,
266                                            Predicates::NotNull<Elem * const *>());
267 
268         const MeshBase::const_element_iterator fake_elem_end =
269           MeshBase::const_element_iterator(elemend,
270                                            elemend,
271                                            Predicates::NotNull<Elem * const *>());
272 
273         GhostingFunctor::map_type elements_to_couple;
274 
275         // Man, I wish we had guaranteed unique_ptr availability...
276         std::set<CouplingMatrix *> temporary_coupling_matrices;
277 
278         dof_map.merge_ghost_functor_outputs(elements_to_couple,
279                                             temporary_coupling_matrices,
280                                             dof_map.coupling_functors_begin(),
281                                             dof_map.coupling_functors_end(),
282                                             fake_elem_it,
283                                             fake_elem_end,
284                                             DofObject::invalid_processor_id);
285         for (unsigned int vi=0; vi<n_var; vi++)
286           this->sorted_connected_dofs(elem, element_dofs_i[vi], vi);
287 
288         for (unsigned int vi=0; vi<n_var; vi++)
289           for (const auto & pr : elements_to_couple)
290             {
291               const Elem * const partner = pr.first;
292               const CouplingMatrix * ghost_coupling = pr.second;
293 
294               // Loop over coupling matrix row variables if we have a
295               // coupling matrix, or all variables if not.
296               if (ghost_coupling)
297                 {
298                   libmesh_assert_equal_to (ghost_coupling->size(), n_var);
299                   ConstCouplingRow ccr(vi, *ghost_coupling);
300 
301                   for (const auto & idx : ccr)
302                     {
303                       if (partner == elem)
304                         this->handle_vi_vj(element_dofs_i[vi], element_dofs_i[idx]);
305                       else
306                         {
307                           std::vector<dof_id_type> partner_dofs;
308                           this->sorted_connected_dofs(partner, partner_dofs, idx);
309                           this->handle_vi_vj(element_dofs_i[vi], partner_dofs);
310                         }
311                     }
312                 }
313               else
314                 {
315                   for (unsigned int vj = 0; vj != n_var; ++vj)
316                     {
317                       if (partner == elem)
318                         this->handle_vi_vj(element_dofs_i[vi], element_dofs_i[vj]);
319                       else
320                         {
321                           std::vector<dof_id_type> partner_dofs;
322                           this->sorted_connected_dofs(partner, partner_dofs, vj);
323                           this->handle_vi_vj(element_dofs_i[vi], partner_dofs);
324                         }
325                     }
326                 }
327             } // End ghosted element loop
328 
329         for (auto & mat : temporary_coupling_matrices)
330           delete mat;
331 
332       } // End range element loop
333   } // End ghosting functor section
334 
335   // Now a new chunk of sparsity structure is built for all of the
336   // DOFs connected to our rows of the matrix.
337 
338   // If we're building a full sparsity pattern, then we've got
339   // complete rows to work with, so we can just count them from
340   // scratch.
341   if (need_full_sparsity_pattern)
342     {
343       n_nz.clear();
344       n_oz.clear();
345     }
346 
347   n_nz.resize (n_dofs_on_proc, 0);
348   n_oz.resize (n_dofs_on_proc, 0);
349 
350   for (dof_id_type i=0; i<n_dofs_on_proc; i++)
351     {
352       // Get the row of the sparsity pattern
353       SparsityPattern::Row & row = sparsity_pattern[i];
354 
355       for (const auto & df : row)
356         if ((df < first_dof_on_proc) || (df >= end_dof_on_proc))
357           n_oz[i]++;
358         else
359           n_nz[i]++;
360 
361       // If we're not building a full sparsity pattern, then we want
362       // to avoid overcounting these entries as much as possible.
363       if (!need_full_sparsity_pattern)
364         row.clear();
365     }
366 }
367 
368 
369 
join(const SparsityPattern::Build & other)370 void Build::join (const SparsityPattern::Build & other)
371 {
372   const processor_id_type proc_id           = mesh.processor_id();
373   const dof_id_type       n_global_dofs     = dof_map.n_dofs();
374   const dof_id_type       n_dofs_on_proc    = dof_map.n_dofs_on_processor(proc_id);
375   const dof_id_type       first_dof_on_proc = dof_map.first_dof(proc_id);
376   const dof_id_type       end_dof_on_proc   = dof_map.end_dof(proc_id);
377 
378   libmesh_assert_equal_to (sparsity_pattern.size(), other.sparsity_pattern.size());
379   libmesh_assert_equal_to (n_nz.size(), sparsity_pattern.size());
380   libmesh_assert_equal_to (n_oz.size(), sparsity_pattern.size());
381 
382   for (dof_id_type r=0; r<n_dofs_on_proc; r++)
383     {
384       // increment the number of on and off-processor nonzeros in this row
385       // (note this will be an upper bound unless we need the full sparsity pattern)
386       if (need_full_sparsity_pattern)
387         {
388           SparsityPattern::Row       & my_row    = sparsity_pattern[r];
389           const SparsityPattern::Row & their_row = other.sparsity_pattern[r];
390 
391           // simple copy if I have no dofs
392           if (my_row.empty())
393             my_row = their_row;
394 
395           // otherwise add their DOFs to mine, resort, and re-unique the row
396           else if (!their_row.empty()) // do nothing for the trivial case where
397             {                          // their row is empty
398               my_row.insert (my_row.end(),
399                              their_row.begin(),
400                              their_row.end());
401 
402               // We cannot use SparsityPattern::sort_row() here because it expects
403               // the [begin,middle) [middle,end) to be non-overlapping.  This is not
404               // necessarily the case here, so use std::sort()
405               std::sort (my_row.begin(), my_row.end());
406 
407               my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
408             }
409 
410           // fix the number of on and off-processor nonzeros in this row
411           n_nz[r] = n_oz[r] = 0;
412 
413           for (const auto & df : my_row)
414             if ((df < first_dof_on_proc) || (df >= end_dof_on_proc))
415               n_oz[r]++;
416             else
417               n_nz[r]++;
418         }
419       else
420         {
421           n_nz[r] += other.n_nz[r];
422           n_nz[r] = std::min(n_nz[r], n_dofs_on_proc);
423           n_oz[r] += other.n_oz[r];
424           n_oz[r] =std::min(n_oz[r], static_cast<dof_id_type>(n_global_dofs-n_nz[r]));
425         }
426     }
427 
428   // Move nonlocal row information to ourselves; the other thread
429   // won't need it in the map after that.
430   for (const auto & p : other.nonlocal_pattern)
431     {
432 #ifndef NDEBUG
433       const dof_id_type dof_id = p.first;
434 
435       processor_id_type dbg_proc_id = 0;
436       while (dof_id >= dof_map.end_dof(dbg_proc_id))
437         dbg_proc_id++;
438       libmesh_assert (dbg_proc_id != this->processor_id());
439 #endif
440 
441       const SparsityPattern::Row & their_row = p.second;
442 
443       // We should have no empty values in a map
444       libmesh_assert (!their_row.empty());
445 
446       NonlocalGraph::iterator my_it = nonlocal_pattern.find(p.first);
447       if (my_it == nonlocal_pattern.end())
448         {
449           //          nonlocal_pattern[it->first].swap(their_row);
450           nonlocal_pattern[p.first] = their_row;
451         }
452       else
453         {
454           SparsityPattern::Row & my_row = my_it->second;
455 
456           my_row.insert (my_row.end(),
457                          their_row.begin(),
458                          their_row.end());
459 
460           // We cannot use SparsityPattern::sort_row() here because it expects
461           // the [begin,middle) [middle,end) to be non-overlapping.  This is not
462           // necessarily the case here, so use std::sort()
463           std::sort (my_row.begin(), my_row.end());
464 
465           my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
466         }
467     }
468 
469   // Combine the other thread's hashed_dof_sets with ours.
470   hashed_dof_sets.insert(other.hashed_dof_sets.begin(),
471                          other.hashed_dof_sets.end());
472 }
473 
474 
475 
parallel_sync()476 void Build::parallel_sync ()
477 {
478   parallel_object_only();
479   libmesh_assert(this->comm().verify(need_full_sparsity_pattern));
480 
481   auto & comm = this->comm();
482   auto pid = comm.rank();
483   auto num_procs = comm.size();
484 
485   auto dof_tag = comm.get_unique_tag();
486   auto row_tag = comm.get_unique_tag();
487 
488   const auto n_global_dofs   = dof_map.n_dofs();
489   const auto n_dofs_on_proc  = dof_map.n_dofs_on_processor(pid);
490   const auto local_first_dof = dof_map.first_dof();
491   const auto local_end_dof   = dof_map.end_dof();
492 
493   // The data to send
494   std::map<processor_id_type, std::pair<std::vector<dof_id_type>, std::vector<Row>>> data_to_send;
495 
496   // True/false if we're sending to that processor
497   std::vector<char> will_send_to(num_procs);
498 
499   processor_id_type num_sends = 0;
500 
501   // Loop over the nonlocal rows and transform them into the new datastructure
502   NonlocalGraph::iterator it = nonlocal_pattern.begin();
503   while (it != nonlocal_pattern.end())
504   {
505     const auto dof_id = it->first;
506     auto & row = it->second;
507 
508     processor_id_type proc_id = 0;
509     while (dof_id >= dof_map.end_dof(proc_id))
510       proc_id++;
511 
512     // Count the unique sends
513     if (!will_send_to[proc_id])
514       num_sends++;
515 
516     will_send_to[proc_id] = true;
517 
518     // rhs [] on purpose
519     auto & proc_data = data_to_send[proc_id];
520 
521     proc_data.first.push_back(dof_id);
522 
523     // Note this invalidates the data in nonlocal_pattern
524     proc_data.second.emplace_back(std::move(row));
525 
526     // Might as well remove it since it's invalidated anyway
527     it = nonlocal_pattern.erase(it);
528   }
529 
530   // Tell everyone about where everyone will send to
531   comm.alltoall(will_send_to);
532 
533   // will_send_to now represents who we'll receive from
534   // give it a good name
535   auto & will_receive_from = will_send_to;
536 
537   std::vector<Parallel::Request> dof_sends(num_sends);
538   std::vector<Parallel::Request> row_sends(num_sends);
539 
540   // Post all of the sends
541   processor_id_type current_send = 0;
542   for (auto & proc_data : data_to_send)
543   {
544     auto proc_id = proc_data.first;
545 
546     auto & dofs = proc_data.second.first;
547     auto & rows = proc_data.second.second;
548 
549     comm.send(proc_id, dofs, dof_sends[current_send], dof_tag);
550     comm.send(proc_id, rows, row_sends[current_send], row_tag);
551 
552     current_send++;
553   }
554 
555   // Figure out how many receives we're going to have and make a
556   // quick list of who's sending
557   processor_id_type num_receives = 0;
558   std::vector<processor_id_type> receiving_from;
559   for (processor_id_type proc_id = 0; proc_id < num_procs; proc_id++)
560   {
561     auto will = will_receive_from[proc_id];
562 
563     if (will)
564     {
565       num_receives++;
566       receiving_from.push_back(proc_id);
567     }
568   }
569 
570   // Post the receives and handle the data
571   for (auto proc_id : receiving_from)
572   {
573     std::vector<dof_id_type> in_dofs;
574     std::vector<Row> in_rows;
575 
576     // Receive dofs
577     comm.receive(proc_id, in_dofs, dof_tag);
578     comm.receive(proc_id, in_rows, row_tag);
579 
580     const std::size_t n_rows = in_dofs.size();
581     for (std::size_t i = 0; i != n_rows; ++i)
582     {
583       const auto r = in_dofs[i];
584       const auto my_r = r - local_first_dof;
585 
586       auto & their_row = in_rows[i];
587 
588       if (need_full_sparsity_pattern)
589       {
590         auto & my_row = sparsity_pattern[my_r];
591 
592         // They wouldn't have sent an empty row
593         libmesh_assert(!their_row.empty());
594 
595         // We can end up with an empty row on a dof that touches our
596         // inactive elements but not our active ones
597         if (my_row.empty())
598         {
599           my_row.assign (their_row.begin(), their_row.end());
600         }
601         else
602         {
603           my_row.insert (my_row.end(),
604                          their_row.begin(),
605                          their_row.end());
606 
607           // We cannot use SparsityPattern::sort_row() here because it expects
608           // the [begin,middle) [middle,end) to be non-overlapping.  This is not
609           // necessarily the case here, so use std::sort()
610           std::sort (my_row.begin(), my_row.end());
611 
612           my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
613         }
614 
615         // fix the number of on and off-processor nonzeros in this row
616         n_nz[my_r] = n_oz[my_r] = 0;
617 
618         for (const auto & df : my_row)
619           if ((df < local_first_dof) || (df >= local_end_dof))
620             n_oz[my_r]++;
621           else
622             n_nz[my_r]++;
623       }
624       else
625       {
626         for (const auto & df : their_row)
627           if ((df < local_first_dof) || (df >= local_end_dof))
628             n_oz[my_r]++;
629           else
630             n_nz[my_r]++;
631 
632         n_nz[my_r] = std::min(n_nz[my_r], n_dofs_on_proc);
633         n_oz[my_r] = std::min(n_oz[my_r],
634                               static_cast<dof_id_type>(n_global_dofs-n_nz[my_r]));
635       }
636     }
637   }
638 
639   // We should have sent everything at this point.
640   libmesh_assert (nonlocal_pattern.empty());
641 
642   // Make sure to cleanup requests
643   Parallel::wait(dof_sends);
644   Parallel::wait(row_sends);
645 }
646 
647 
648 } // namespace SparsityPattern
649 } // namespace libMesh
650