1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2020 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18
19
20 // Local includes
21 #include "libmesh/sparsity_pattern.h"
22
23 // libMesh includes
24 #include "libmesh/coupling_matrix.h"
25 #include "libmesh/dof_map.h"
26 #include "libmesh/elem.h"
27 #include "libmesh/ghosting_functor.h"
28 #include "libmesh/hashword.h"
29 #include "libmesh/parallel_algebra.h"
30 #include "libmesh/parallel.h"
31
32 // TIMPI includes
33 #include "timpi/communicator.h"
34
35
36 namespace libMesh
37 {
38 namespace SparsityPattern
39 {
40
41 //-------------------------------------------------------
42 // we need to implement these constructors here so that
43 // a full DofMap definition is available.
Build(const MeshBase & mesh_in,const DofMap & dof_map_in,const CouplingMatrix * dof_coupling_in,const std::set<GhostingFunctor * > & coupling_functors_in,const bool implicit_neighbor_dofs_in,const bool need_full_sparsity_pattern_in)44 Build::Build (const MeshBase & mesh_in,
45 const DofMap & dof_map_in,
46 const CouplingMatrix * dof_coupling_in,
47 const std::set<GhostingFunctor *> & coupling_functors_in,
48 const bool implicit_neighbor_dofs_in,
49 const bool need_full_sparsity_pattern_in) :
50 ParallelObject(dof_map_in),
51 mesh(mesh_in),
52 dof_map(dof_map_in),
53 dof_coupling(dof_coupling_in),
54 coupling_functors(coupling_functors_in),
55 implicit_neighbor_dofs(implicit_neighbor_dofs_in),
56 need_full_sparsity_pattern(need_full_sparsity_pattern_in),
57 sparsity_pattern(),
58 nonlocal_pattern(),
59 n_nz(),
60 n_oz()
61 {}
62
63
64
Build(Build & other,Threads::split)65 Build::Build (Build & other, Threads::split) :
66 ParallelObject(other),
67 mesh(other.mesh),
68 dof_map(other.dof_map),
69 dof_coupling(other.dof_coupling),
70 coupling_functors(other.coupling_functors),
71 implicit_neighbor_dofs(other.implicit_neighbor_dofs),
72 need_full_sparsity_pattern(other.need_full_sparsity_pattern),
73 hashed_dof_sets(other.hashed_dof_sets),
74 sparsity_pattern(),
75 nonlocal_pattern(),
76 n_nz(),
77 n_oz()
78 {}
79
80
81
82 #if defined(__GNUC__) && (__GNUC__ < 4) && !defined(__INTEL_COMPILER)
83
_dummy_function(void)84 void _dummy_function(void) {}
85
86 #endif
87
88
89
sorted_connected_dofs(const Elem * elem,std::vector<dof_id_type> & dofs_vi,unsigned int vi)90 void Build::sorted_connected_dofs(const Elem * elem,
91 std::vector<dof_id_type> & dofs_vi,
92 unsigned int vi)
93 {
94 dof_map.dof_indices (elem, dofs_vi, vi);
95 #ifdef LIBMESH_ENABLE_CONSTRAINTS
96 dof_map.find_connected_dofs (dofs_vi);
97 #endif
98 // We can be more efficient if we sort the element DOFs into
99 // increasing order
100 std::sort(dofs_vi.begin(), dofs_vi.end());
101
102 // Handle cases where duplicate nodes are intentionally assigned to
103 // a single element.
104 dofs_vi.erase(std::unique(dofs_vi.begin(), dofs_vi.end()), dofs_vi.end());
105 }
106
107
108
handle_vi_vj(const std::vector<dof_id_type> & element_dofs_i,const std::vector<dof_id_type> & element_dofs_j)109 void Build::handle_vi_vj(const std::vector<dof_id_type> & element_dofs_i,
110 const std::vector<dof_id_type> & element_dofs_j)
111 {
112 const unsigned int n_dofs_on_element_i =
113 cast_int<unsigned int>(element_dofs_i.size());
114
115 const processor_id_type proc_id = mesh.processor_id();
116 const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
117 const dof_id_type end_dof_on_proc = dof_map.end_dof(proc_id);
118
119 std::vector<dof_id_type>
120 dofs_to_add;
121
122 const unsigned int n_dofs_on_element_j =
123 cast_int<unsigned int>(element_dofs_j.size());
124
125 // It only makes sense to compute hashes and see if we can skip
126 // doing work when there are a "large" amount of DOFs for a given
127 // element. The cutoff for "large" is somewhat arbitrarily chosen
128 // based on a test case with a spider node that resulted in O(10^3)
129 // entries in element_dofs_i for O(10^3) elements. Making this
130 // number larger will disable the hashing optimization in more
131 // cases.
132 bool dofs_seen = false;
133 if (n_dofs_on_element_j > 0 && n_dofs_on_element_i > 256)
134 {
135 auto hash_i = Utility::hashword(element_dofs_i);
136 auto hash_j = Utility::hashword(element_dofs_j);
137 auto final_hash = Utility::hashword2(hash_i, hash_j);
138 auto result = hashed_dof_sets.insert(final_hash);
139 // if insert failed, we have already seen these dofs
140 dofs_seen = !result.second;
141 }
142
143 // there might be 0 dofs for the other variable on the same element
144 // (when subdomain variables do not overlap) and that's when we do
145 // not do anything
146 if (n_dofs_on_element_j > 0 && !dofs_seen)
147 {
148 for (unsigned int i=0; i<n_dofs_on_element_i; i++)
149 {
150 const dof_id_type ig = element_dofs_i[i];
151
152 SparsityPattern::Row * row;
153
154 // We save non-local row components for now so we can
155 // communicate them to other processors later.
156
157 if ((ig >= first_dof_on_proc) &&
158 (ig < end_dof_on_proc))
159 {
160 // This is what I mean
161 // libmesh_assert_greater_equal ((ig - first_dof_on_proc), 0);
162 // but do the test like this because ig and
163 // first_dof_on_proc are unsigned ints
164 libmesh_assert_greater_equal (ig, first_dof_on_proc);
165 libmesh_assert_less (ig, (sparsity_pattern.size() +
166 first_dof_on_proc));
167
168 row = &sparsity_pattern[ig - first_dof_on_proc];
169 }
170 else
171 {
172 row = &nonlocal_pattern[ig];
173 }
174
175 // If the row is empty we will add *all*
176 // the element j DOFs, so just do that.
177 if (row->empty())
178 {
179 row->insert(row->end(),
180 element_dofs_j.begin(),
181 element_dofs_j.end());
182 }
183 else
184 {
185 // Build a list of the DOF indices not found in the
186 // sparsity pattern
187 dofs_to_add.clear();
188
189 // Cache iterators. Low will move forward, subsequent
190 // searches will be on smaller ranges
191 SparsityPattern::Row::iterator
192 low = std::lower_bound
193 (row->begin(), row->end(), element_dofs_j.front()),
194 high = std::upper_bound
195 (low, row->end(), element_dofs_j.back());
196
197 for (unsigned int j=0; j<n_dofs_on_element_j; j++)
198 {
199 const dof_id_type jg = element_dofs_j[j];
200
201 // See if jg is in the sorted range
202 std::pair<SparsityPattern::Row::iterator,
203 SparsityPattern::Row::iterator>
204 pos = std::equal_range (low, high, jg);
205
206 // Must add jg if it wasn't found
207 if (pos.first == pos.second)
208 dofs_to_add.push_back(jg);
209
210 // pos.first is now a valid lower bound for any
211 // remaining element j DOFs. (That's why we sorted them.)
212 // Use it for the next search
213 low = pos.first;
214 }
215
216 // Add to the sparsity pattern
217 if (!dofs_to_add.empty())
218 {
219 const std::size_t old_size = row->size();
220
221 row->insert (row->end(),
222 dofs_to_add.begin(),
223 dofs_to_add.end());
224
225 SparsityPattern::sort_row
226 (row->begin(), row->begin()+old_size,
227 row->end());
228 }
229 }
230 } // End dofs-of-var-i loop
231 } // End if-dofs-of-var-j
232 }
233
234
235
operator()236 void Build::operator()(const ConstElemRange & range)
237 {
238 // Compute the sparsity structure of the global matrix. This can be
239 // fed into a PetscMatrix to allocate exactly the number of nonzeros
240 // necessary to store the matrix. This algorithm should be linear
241 // in the (# of elements)*(# nodes per element)
242 const processor_id_type proc_id = mesh.processor_id();
243 const dof_id_type n_dofs_on_proc = dof_map.n_dofs_on_processor(proc_id);
244 const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
245 const dof_id_type end_dof_on_proc = dof_map.end_dof(proc_id);
246
247 sparsity_pattern.resize(n_dofs_on_proc);
248
249 // Handle dof coupling specified by library and user coupling functors
250 {
251 const unsigned int n_var = dof_map.n_variables();
252
253 std::vector<std::vector<dof_id_type> > element_dofs_i(n_var);
254
255 std::vector<const Elem *> coupled_neighbors;
256 for (const auto & elem : range)
257 {
258 // Make some fake element iterators defining a range
259 // pointing to only this element.
260 Elem * const * elempp = const_cast<Elem * const *>(&elem);
261 Elem * const * elemend = elempp+1;
262
263 const MeshBase::const_element_iterator fake_elem_it =
264 MeshBase::const_element_iterator(elempp,
265 elemend,
266 Predicates::NotNull<Elem * const *>());
267
268 const MeshBase::const_element_iterator fake_elem_end =
269 MeshBase::const_element_iterator(elemend,
270 elemend,
271 Predicates::NotNull<Elem * const *>());
272
273 GhostingFunctor::map_type elements_to_couple;
274
275 // Man, I wish we had guaranteed unique_ptr availability...
276 std::set<CouplingMatrix *> temporary_coupling_matrices;
277
278 dof_map.merge_ghost_functor_outputs(elements_to_couple,
279 temporary_coupling_matrices,
280 dof_map.coupling_functors_begin(),
281 dof_map.coupling_functors_end(),
282 fake_elem_it,
283 fake_elem_end,
284 DofObject::invalid_processor_id);
285 for (unsigned int vi=0; vi<n_var; vi++)
286 this->sorted_connected_dofs(elem, element_dofs_i[vi], vi);
287
288 for (unsigned int vi=0; vi<n_var; vi++)
289 for (const auto & pr : elements_to_couple)
290 {
291 const Elem * const partner = pr.first;
292 const CouplingMatrix * ghost_coupling = pr.second;
293
294 // Loop over coupling matrix row variables if we have a
295 // coupling matrix, or all variables if not.
296 if (ghost_coupling)
297 {
298 libmesh_assert_equal_to (ghost_coupling->size(), n_var);
299 ConstCouplingRow ccr(vi, *ghost_coupling);
300
301 for (const auto & idx : ccr)
302 {
303 if (partner == elem)
304 this->handle_vi_vj(element_dofs_i[vi], element_dofs_i[idx]);
305 else
306 {
307 std::vector<dof_id_type> partner_dofs;
308 this->sorted_connected_dofs(partner, partner_dofs, idx);
309 this->handle_vi_vj(element_dofs_i[vi], partner_dofs);
310 }
311 }
312 }
313 else
314 {
315 for (unsigned int vj = 0; vj != n_var; ++vj)
316 {
317 if (partner == elem)
318 this->handle_vi_vj(element_dofs_i[vi], element_dofs_i[vj]);
319 else
320 {
321 std::vector<dof_id_type> partner_dofs;
322 this->sorted_connected_dofs(partner, partner_dofs, vj);
323 this->handle_vi_vj(element_dofs_i[vi], partner_dofs);
324 }
325 }
326 }
327 } // End ghosted element loop
328
329 for (auto & mat : temporary_coupling_matrices)
330 delete mat;
331
332 } // End range element loop
333 } // End ghosting functor section
334
335 // Now a new chunk of sparsity structure is built for all of the
336 // DOFs connected to our rows of the matrix.
337
338 // If we're building a full sparsity pattern, then we've got
339 // complete rows to work with, so we can just count them from
340 // scratch.
341 if (need_full_sparsity_pattern)
342 {
343 n_nz.clear();
344 n_oz.clear();
345 }
346
347 n_nz.resize (n_dofs_on_proc, 0);
348 n_oz.resize (n_dofs_on_proc, 0);
349
350 for (dof_id_type i=0; i<n_dofs_on_proc; i++)
351 {
352 // Get the row of the sparsity pattern
353 SparsityPattern::Row & row = sparsity_pattern[i];
354
355 for (const auto & df : row)
356 if ((df < first_dof_on_proc) || (df >= end_dof_on_proc))
357 n_oz[i]++;
358 else
359 n_nz[i]++;
360
361 // If we're not building a full sparsity pattern, then we want
362 // to avoid overcounting these entries as much as possible.
363 if (!need_full_sparsity_pattern)
364 row.clear();
365 }
366 }
367
368
369
join(const SparsityPattern::Build & other)370 void Build::join (const SparsityPattern::Build & other)
371 {
372 const processor_id_type proc_id = mesh.processor_id();
373 const dof_id_type n_global_dofs = dof_map.n_dofs();
374 const dof_id_type n_dofs_on_proc = dof_map.n_dofs_on_processor(proc_id);
375 const dof_id_type first_dof_on_proc = dof_map.first_dof(proc_id);
376 const dof_id_type end_dof_on_proc = dof_map.end_dof(proc_id);
377
378 libmesh_assert_equal_to (sparsity_pattern.size(), other.sparsity_pattern.size());
379 libmesh_assert_equal_to (n_nz.size(), sparsity_pattern.size());
380 libmesh_assert_equal_to (n_oz.size(), sparsity_pattern.size());
381
382 for (dof_id_type r=0; r<n_dofs_on_proc; r++)
383 {
384 // increment the number of on and off-processor nonzeros in this row
385 // (note this will be an upper bound unless we need the full sparsity pattern)
386 if (need_full_sparsity_pattern)
387 {
388 SparsityPattern::Row & my_row = sparsity_pattern[r];
389 const SparsityPattern::Row & their_row = other.sparsity_pattern[r];
390
391 // simple copy if I have no dofs
392 if (my_row.empty())
393 my_row = their_row;
394
395 // otherwise add their DOFs to mine, resort, and re-unique the row
396 else if (!their_row.empty()) // do nothing for the trivial case where
397 { // their row is empty
398 my_row.insert (my_row.end(),
399 their_row.begin(),
400 their_row.end());
401
402 // We cannot use SparsityPattern::sort_row() here because it expects
403 // the [begin,middle) [middle,end) to be non-overlapping. This is not
404 // necessarily the case here, so use std::sort()
405 std::sort (my_row.begin(), my_row.end());
406
407 my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
408 }
409
410 // fix the number of on and off-processor nonzeros in this row
411 n_nz[r] = n_oz[r] = 0;
412
413 for (const auto & df : my_row)
414 if ((df < first_dof_on_proc) || (df >= end_dof_on_proc))
415 n_oz[r]++;
416 else
417 n_nz[r]++;
418 }
419 else
420 {
421 n_nz[r] += other.n_nz[r];
422 n_nz[r] = std::min(n_nz[r], n_dofs_on_proc);
423 n_oz[r] += other.n_oz[r];
424 n_oz[r] =std::min(n_oz[r], static_cast<dof_id_type>(n_global_dofs-n_nz[r]));
425 }
426 }
427
428 // Move nonlocal row information to ourselves; the other thread
429 // won't need it in the map after that.
430 for (const auto & p : other.nonlocal_pattern)
431 {
432 #ifndef NDEBUG
433 const dof_id_type dof_id = p.first;
434
435 processor_id_type dbg_proc_id = 0;
436 while (dof_id >= dof_map.end_dof(dbg_proc_id))
437 dbg_proc_id++;
438 libmesh_assert (dbg_proc_id != this->processor_id());
439 #endif
440
441 const SparsityPattern::Row & their_row = p.second;
442
443 // We should have no empty values in a map
444 libmesh_assert (!their_row.empty());
445
446 NonlocalGraph::iterator my_it = nonlocal_pattern.find(p.first);
447 if (my_it == nonlocal_pattern.end())
448 {
449 // nonlocal_pattern[it->first].swap(their_row);
450 nonlocal_pattern[p.first] = their_row;
451 }
452 else
453 {
454 SparsityPattern::Row & my_row = my_it->second;
455
456 my_row.insert (my_row.end(),
457 their_row.begin(),
458 their_row.end());
459
460 // We cannot use SparsityPattern::sort_row() here because it expects
461 // the [begin,middle) [middle,end) to be non-overlapping. This is not
462 // necessarily the case here, so use std::sort()
463 std::sort (my_row.begin(), my_row.end());
464
465 my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
466 }
467 }
468
469 // Combine the other thread's hashed_dof_sets with ours.
470 hashed_dof_sets.insert(other.hashed_dof_sets.begin(),
471 other.hashed_dof_sets.end());
472 }
473
474
475
parallel_sync()476 void Build::parallel_sync ()
477 {
478 parallel_object_only();
479 libmesh_assert(this->comm().verify(need_full_sparsity_pattern));
480
481 auto & comm = this->comm();
482 auto pid = comm.rank();
483 auto num_procs = comm.size();
484
485 auto dof_tag = comm.get_unique_tag();
486 auto row_tag = comm.get_unique_tag();
487
488 const auto n_global_dofs = dof_map.n_dofs();
489 const auto n_dofs_on_proc = dof_map.n_dofs_on_processor(pid);
490 const auto local_first_dof = dof_map.first_dof();
491 const auto local_end_dof = dof_map.end_dof();
492
493 // The data to send
494 std::map<processor_id_type, std::pair<std::vector<dof_id_type>, std::vector<Row>>> data_to_send;
495
496 // True/false if we're sending to that processor
497 std::vector<char> will_send_to(num_procs);
498
499 processor_id_type num_sends = 0;
500
501 // Loop over the nonlocal rows and transform them into the new datastructure
502 NonlocalGraph::iterator it = nonlocal_pattern.begin();
503 while (it != nonlocal_pattern.end())
504 {
505 const auto dof_id = it->first;
506 auto & row = it->second;
507
508 processor_id_type proc_id = 0;
509 while (dof_id >= dof_map.end_dof(proc_id))
510 proc_id++;
511
512 // Count the unique sends
513 if (!will_send_to[proc_id])
514 num_sends++;
515
516 will_send_to[proc_id] = true;
517
518 // rhs [] on purpose
519 auto & proc_data = data_to_send[proc_id];
520
521 proc_data.first.push_back(dof_id);
522
523 // Note this invalidates the data in nonlocal_pattern
524 proc_data.second.emplace_back(std::move(row));
525
526 // Might as well remove it since it's invalidated anyway
527 it = nonlocal_pattern.erase(it);
528 }
529
530 // Tell everyone about where everyone will send to
531 comm.alltoall(will_send_to);
532
533 // will_send_to now represents who we'll receive from
534 // give it a good name
535 auto & will_receive_from = will_send_to;
536
537 std::vector<Parallel::Request> dof_sends(num_sends);
538 std::vector<Parallel::Request> row_sends(num_sends);
539
540 // Post all of the sends
541 processor_id_type current_send = 0;
542 for (auto & proc_data : data_to_send)
543 {
544 auto proc_id = proc_data.first;
545
546 auto & dofs = proc_data.second.first;
547 auto & rows = proc_data.second.second;
548
549 comm.send(proc_id, dofs, dof_sends[current_send], dof_tag);
550 comm.send(proc_id, rows, row_sends[current_send], row_tag);
551
552 current_send++;
553 }
554
555 // Figure out how many receives we're going to have and make a
556 // quick list of who's sending
557 processor_id_type num_receives = 0;
558 std::vector<processor_id_type> receiving_from;
559 for (processor_id_type proc_id = 0; proc_id < num_procs; proc_id++)
560 {
561 auto will = will_receive_from[proc_id];
562
563 if (will)
564 {
565 num_receives++;
566 receiving_from.push_back(proc_id);
567 }
568 }
569
570 // Post the receives and handle the data
571 for (auto proc_id : receiving_from)
572 {
573 std::vector<dof_id_type> in_dofs;
574 std::vector<Row> in_rows;
575
576 // Receive dofs
577 comm.receive(proc_id, in_dofs, dof_tag);
578 comm.receive(proc_id, in_rows, row_tag);
579
580 const std::size_t n_rows = in_dofs.size();
581 for (std::size_t i = 0; i != n_rows; ++i)
582 {
583 const auto r = in_dofs[i];
584 const auto my_r = r - local_first_dof;
585
586 auto & their_row = in_rows[i];
587
588 if (need_full_sparsity_pattern)
589 {
590 auto & my_row = sparsity_pattern[my_r];
591
592 // They wouldn't have sent an empty row
593 libmesh_assert(!their_row.empty());
594
595 // We can end up with an empty row on a dof that touches our
596 // inactive elements but not our active ones
597 if (my_row.empty())
598 {
599 my_row.assign (their_row.begin(), their_row.end());
600 }
601 else
602 {
603 my_row.insert (my_row.end(),
604 their_row.begin(),
605 their_row.end());
606
607 // We cannot use SparsityPattern::sort_row() here because it expects
608 // the [begin,middle) [middle,end) to be non-overlapping. This is not
609 // necessarily the case here, so use std::sort()
610 std::sort (my_row.begin(), my_row.end());
611
612 my_row.erase(std::unique (my_row.begin(), my_row.end()), my_row.end());
613 }
614
615 // fix the number of on and off-processor nonzeros in this row
616 n_nz[my_r] = n_oz[my_r] = 0;
617
618 for (const auto & df : my_row)
619 if ((df < local_first_dof) || (df >= local_end_dof))
620 n_oz[my_r]++;
621 else
622 n_nz[my_r]++;
623 }
624 else
625 {
626 for (const auto & df : their_row)
627 if ((df < local_first_dof) || (df >= local_end_dof))
628 n_oz[my_r]++;
629 else
630 n_nz[my_r]++;
631
632 n_nz[my_r] = std::min(n_nz[my_r], n_dofs_on_proc);
633 n_oz[my_r] = std::min(n_oz[my_r],
634 static_cast<dof_id_type>(n_global_dofs-n_nz[my_r]));
635 }
636 }
637 }
638
639 // We should have sent everything at this point.
640 libmesh_assert (nonlocal_pattern.empty());
641
642 // Make sure to cleanup requests
643 Parallel::wait(dof_sends);
644 Parallel::wait(row_sends);
645 }
646
647
648 } // namespace SparsityPattern
649 } // namespace libMesh
650