1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 1999 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 #ifndef dealii_sparse_vanka_templates_h
17 #define dealii_sparse_vanka_templates_h
18 
19 
20 #include <deal.II/base/config.h>
21 
22 #include <deal.II/base/memory_consumption.h>
23 #include <deal.II/base/thread_management.h>
24 
25 #include <deal.II/lac/full_matrix.h>
26 #include <deal.II/lac/sparse_matrix.h>
27 #include <deal.II/lac/sparse_vanka.h>
28 #include <deal.II/lac/vector.h>
29 
30 #include <algorithm>
31 #include <map>
32 
33 DEAL_II_NAMESPACE_OPEN
34 
35 template <typename number>
SparseVanka()36 SparseVanka<number>::SparseVanka()
37   : matrix()
38   , selected()
39   , inverses()
40   , _m(0)
41   , _n(0)
42 {}
43 
44 template <typename number>
SparseVanka(const SparseMatrix<number> & M,const std::vector<bool> & selected_dofs,const bool,const unsigned int)45 SparseVanka<number>::SparseVanka(const SparseMatrix<number> &M,
46                                  const std::vector<bool> &   selected_dofs,
47                                  const bool /*conserve_mem*/,
48                                  const unsigned int /*n_threads*/)
49   : SparseVanka(M, selected_dofs)
50 {}
51 
52 template <typename number>
SparseVanka(const SparseMatrix<number> & M,const std::vector<bool> & selected_dofs)53 SparseVanka<number>::SparseVanka(const SparseMatrix<number> &M,
54                                  const std::vector<bool> &   selected_dofs)
55   : matrix(&M, typeid(*this).name())
56   , selected(&selected_dofs)
57   , inverses(M.m(), nullptr)
58   , _m(M.m())
59   , _n(M.n())
60 {
61   Assert(M.m() == M.n(), ExcNotQuadratic());
62   Assert(M.m() == selected->size(),
63          ExcDimensionMismatch(M.m(), selected->size()));
64 
65   compute_inverses();
66 }
67 
68 
69 template <typename number>
~SparseVanka()70 SparseVanka<number>::~SparseVanka()
71 {
72   typename std::vector<
73     SmartPointer<FullMatrix<float>, SparseVanka<number>>>::iterator i;
74   for (i = inverses.begin(); i != inverses.end(); ++i)
75     {
76       FullMatrix<float> *p = *i;
77       *i                   = nullptr;
78       if (p != nullptr)
79         delete p;
80     }
81 }
82 
83 
84 template <typename number>
85 void
initialize(const SparseMatrix<number> & M,const AdditionalData & additional_data)86 SparseVanka<number>::initialize(const SparseMatrix<number> &M,
87                                 const AdditionalData &      additional_data)
88 {
89   matrix   = &M;
90   selected = &(additional_data.selected);
91   inverses.resize(M.m());
92   _m = M.m();
93   _n = M.n();
94 
95   Assert(M.m() == M.n(), ExcNotQuadratic());
96   Assert(M.m() == selected->size(),
97          ExcDimensionMismatch(M.m(), selected->size()));
98 
99   compute_inverses();
100 }
101 
102 template <typename number>
103 void
compute_inverses()104 SparseVanka<number>::compute_inverses()
105 {
106   Assert(matrix != nullptr, ExcNotInitialized());
107   Assert(selected != nullptr, ExcNotInitialized());
108 
109 #ifndef DEAL_II_WITH_THREADS
110   compute_inverses(0, matrix->m());
111 #else
112   const size_type n_inverses =
113     std::count(selected->begin(), selected->end(), true);
114   // somewhat arbitrarily set up an equal number of tasks as we have threads
115   const std::size_t n_tasks = MultithreadInfo::n_threads();
116   const size_type   n_inverses_per_task =
117     std::max(static_cast<size_type>(n_inverses / n_tasks),
118              static_cast<size_type>(1U));
119 
120   // set up start and end index
121   // for each of the
122   // tasks. note that we have
123   // to work somewhat to get this
124   // appropriate, since the
125   // indices for which inverses
126   // have to be computed may not
127   // be evenly distributed in the
128   // vector. as an extreme
129   // example consider numbering
130   // of DoFs by component, then
131   // all indices for which we
132   // have to do work will be
133   // consecutive, with other
134   // consecutive regions where we
135   // do not have to do something
136   std::vector<std::pair<size_type, unsigned int>> blocking(n_tasks);
137 
138   unsigned int c      = 0;
139   unsigned int task_n = 0;
140   blocking[0].first   = 0;
141 
142   for (size_type i = 0; (i < matrix->m()) && (task_n + 1 < n_tasks); ++i)
143     {
144       if ((*selected)[i] == true)
145         ++c;
146       if (c == n_inverses_per_task)
147         {
148           blocking[task_n].second    = i;
149           blocking[task_n + 1].first = i;
150           ++task_n;
151 
152           c = 0;
153         }
154     }
155   blocking[n_tasks - 1].second = matrix->m();
156 
157   // Now spawn the tasks
158   Threads::TaskGroup<> tasks;
159   for (unsigned int i = 0; i < n_tasks; ++i)
160     tasks += Threads::new_task([&, i]() {
161       this->compute_inverses(blocking[i].first, blocking[i].second);
162     });
163   tasks.join_all();
164 #endif
165 }
166 
167 
168 template <typename number>
169 void
compute_inverses(const size_type begin,const size_type end)170 SparseVanka<number>::compute_inverses(const size_type begin,
171                                       const size_type end)
172 {
173   // set-up the vector that will be used
174   // by the functions which we call
175   // below.
176   std::vector<size_type> local_indices;
177 
178   // traverse all rows of the matrix
179   // which are selected
180   for (size_type row = begin; row < end; ++row)
181     if ((*selected)[row] == true)
182       compute_inverse(row, local_indices);
183 }
184 
185 
186 
187 template <typename number>
188 void
compute_inverse(const size_type row,std::vector<size_type> & local_indices)189 SparseVanka<number>::compute_inverse(const size_type         row,
190                                      std::vector<size_type> &local_indices)
191 {
192   Assert(matrix != nullptr, ExcNotInitialized());
193   Assert(selected != nullptr, ExcNotInitialized());
194 
195   // first define an alias to the sparsity
196   // pattern of the matrix, since this
197   // will be used quite often
198   const SparsityPattern &structure = matrix->get_sparsity_pattern();
199 
200   const size_type row_length = structure.row_length(row);
201 
202   inverses[row] = new FullMatrix<float>(row_length, row_length);
203 
204   // collect the dofs that couple
205   // with @p row
206   local_indices.resize(row_length);
207   for (size_type i = 0; i < row_length; ++i)
208     local_indices[i] = structure.column_number(row, i);
209 
210   // Build local matrix
211   inverses[row]->extract_submatrix_from(*matrix, local_indices, local_indices);
212 
213   // Compute inverse
214   inverses[row]->gauss_jordan();
215 }
216 
217 
218 template <typename number>
219 template <typename number2>
220 void
vmult(Vector<number2> & dst,const Vector<number2> & src)221 SparseVanka<number>::vmult(Vector<number2> &      dst,
222                            const Vector<number2> &src) const
223 {
224   Assert(matrix != nullptr, ExcNotInitialized());
225   Assert(selected != nullptr, ExcNotInitialized());
226 
227   // first set output vector to zero
228   dst = 0;
229   // then pass on to the function
230   // that actually does the work
231   apply_preconditioner(dst, src);
232 }
233 
234 
235 template <typename number>
236 template <typename number2>
237 void
apply_preconditioner(Vector<number2> & dst,const Vector<number2> & src,const std::vector<bool> * const dof_mask)238 SparseVanka<number>::apply_preconditioner(
239   Vector<number2> &              dst,
240   const Vector<number2> &        src,
241   const std::vector<bool> *const dof_mask) const
242 {
243   Assert(dst.size() == src.size(),
244          ExcDimensionMismatch(dst.size(), src.size()));
245   Assert(dst.size() == matrix->m(),
246          ExcDimensionMismatch(dst.size(), src.size()));
247 
248   // first define an alias to the sparsity
249   // pattern of the matrix, since this
250   // will be used quite often
251   const SparsityPattern &structure = matrix->get_sparsity_pattern();
252 
253 
254   // store whether we shall work on
255   // the whole matrix, or only on
256   // blocks. this variable is used to
257   // optimize access to vectors a
258   // little bit.
259   const bool range_is_restricted = (dof_mask != nullptr);
260 
261   // space to be used for local
262   // systems. allocate as much memory
263   // as is the maximum. this
264   // eliminates the need to
265   // re-allocate memory inside the
266   // loop.
267   FullMatrix<float> local_matrix(structure.max_entries_per_row(),
268                                  structure.max_entries_per_row());
269   Vector<float>     b(structure.max_entries_per_row());
270   Vector<float>     x(structure.max_entries_per_row());
271 
272   std::map<size_type, size_type> local_index;
273 
274   // traverse all rows of the matrix
275   // which are selected
276   const size_type n = matrix->m();
277   for (size_type row = 0; row < n; ++row)
278     if (((*selected)[row] == true) &&
279         ((range_is_restricted == false) || ((*dof_mask)[row] == true)))
280       {
281         const size_type row_length = structure.row_length(row);
282 
283         b.reinit(row_length);
284         x.reinit(row_length);
285         // mapping between:
286         // 1 column number of all
287         //   entries in this row, and
288         // 2 the position within this
289         //   row (as stored in the
290         //   SparsityPattern object
291         //
292         // since we do not explicitly
293         // consider nonsymmetric sparsity
294         // patterns, the first element
295         // of each entry simply denotes
296         // all degrees of freedom that
297         // couple with @p row.
298         local_index.clear();
299         for (size_type i = 0; i < row_length; ++i)
300           local_index.insert(
301             std::pair<size_type, size_type>(structure.column_number(row, i),
302                                             i));
303 
304         // Build local matrix and rhs
305         for (std::map<size_type, size_type>::const_iterator is =
306                local_index.begin();
307              is != local_index.end();
308              ++is)
309           {
310             // irow loops over all DoFs that
311             // couple with the present DoF
312             const size_type irow = is->first;
313             // index of DoF irow in the matrix
314             // row corresponding to DoF @p row.
315             // runs between 0 and row_length
316             const size_type i = is->second;
317 
318             // copy rhs
319             b(i) = src(irow);
320 
321             // for all the DoFs that irow
322             // couples with
323             // number of DoFs coupling to
324             // irow (including irow itself)
325             for (typename SparseMatrix<number>::const_iterator p =
326                    matrix->begin(irow);
327                  p != matrix->end(irow);
328                  ++p)
329               {
330                 // find out whether this DoF
331                 // (that couples with @p irow,
332                 // which itself couples with
333                 // @p row) also couples with
334                 // @p row.
335                 const std::map<size_type, size_type>::const_iterator js =
336                   local_index.find(p->column());
337                 // if not, then still use
338                 // this dof to modify the rhs
339                 //
340                 // note that if so, we already
341                 // have copied the entry above
342                 if (js == local_index.end())
343                   {
344                     if (!range_is_restricted ||
345                         ((*dof_mask)[p->column()] == true))
346                       b(i) -= p->value() * dst(p->column());
347                   }
348               }
349           }
350 
351         // apply preconditioner
352         inverses[row]->vmult(x, b);
353 
354         // Distribute new values
355         for (std::map<size_type, size_type>::const_iterator is =
356                local_index.begin();
357              is != local_index.end();
358              ++is)
359           {
360             const size_type irow = is->first;
361             const size_type i    = is->second;
362 
363             if (!range_is_restricted || ((*dof_mask)[irow] == true))
364               dst(irow) = x(i);
365             // do nothing if not in
366             // the range
367           }
368       }
369 }
370 
371 
372 
373 template <typename number>
374 std::size_t
memory_consumption()375 SparseVanka<number>::memory_consumption() const
376 {
377   std::size_t mem =
378     (sizeof(*this) + MemoryConsumption::memory_consumption(*selected));
379   for (size_type i = 0; i < inverses.size(); ++i)
380     mem += MemoryConsumption::memory_consumption(*inverses[i]);
381 
382   return mem;
383 }
384 
385 
386 
387 template <typename number>
AdditionalData(const std::vector<bool> & selected)388 SparseVanka<number>::AdditionalData::AdditionalData(
389   const std::vector<bool> &selected)
390   : selected(selected)
391 {}
392 
393 
394 
395 template <typename number>
AdditionalData(const std::vector<bool> & selected,const bool,const unsigned int)396 SparseVanka<number>::AdditionalData::AdditionalData(
397   const std::vector<bool> &selected,
398   const bool /*conserve_mem*/,
399   const unsigned int /*n_threads*/)
400   : AdditionalData(selected)
401 {}
402 
403 
404 //---------------------------------------------------------------------------
405 
406 
407 template <typename number>
SparseBlockVanka(const SparseMatrix<number> & M,const std::vector<bool> & selected,const unsigned int n_blocks,const BlockingStrategy blocking_strategy)408 SparseBlockVanka<number>::SparseBlockVanka(
409   const SparseMatrix<number> &M,
410   const std::vector<bool> &   selected,
411   const unsigned int          n_blocks,
412   const BlockingStrategy      blocking_strategy)
413   : SparseVanka<number>(M, selected)
414   , n_blocks(n_blocks)
415   , dof_masks(n_blocks, std::vector<bool>(M.m(), false))
416 {
417   compute_dof_masks(M, selected, blocking_strategy);
418 }
419 
420 
421 template <typename number>
SparseBlockVanka(const SparseMatrix<number> & M,const std::vector<bool> & selected,const unsigned int n_blocks,const BlockingStrategy blocking_strategy,const bool,const unsigned int)422 SparseBlockVanka<number>::SparseBlockVanka(
423   const SparseMatrix<number> &M,
424   const std::vector<bool> &   selected,
425   const unsigned int          n_blocks,
426   const BlockingStrategy      blocking_strategy,
427   const bool /*conserve_memory*/,
428   const unsigned int /*n_threads*/)
429   : SparseBlockVanka(M, selected, n_blocks, blocking_strategy)
430 {}
431 
432 
433 template <typename number>
434 void
compute_dof_masks(const SparseMatrix<number> & M,const std::vector<bool> & selected,const BlockingStrategy blocking_strategy)435 SparseBlockVanka<number>::compute_dof_masks(
436   const SparseMatrix<number> &M,
437   const std::vector<bool> &   selected,
438   const BlockingStrategy      blocking_strategy)
439 {
440   Assert(n_blocks > 0, ExcInternalError());
441 
442   const size_type n_inverses =
443     std::count(selected.begin(), selected.end(), true);
444 
445   const size_type n_inverses_per_block =
446     std::max(n_inverses / n_blocks, static_cast<size_type>(1U));
447 
448   // precompute the splitting points
449   std::vector<std::pair<size_type, size_type>> intervals(n_blocks);
450 
451   // set up start and end index for
452   // each of the blocks. note that
453   // we have to work somewhat to get
454   // this appropriate, since the
455   // indices for which inverses have
456   // to be computed may not be evenly
457   // distributed in the vector. as an
458   // extreme example consider
459   // numbering of DoFs by component,
460   // then all indices for which we
461   // have to do work will be
462   // consecutive, with other
463   // consecutive regions where we do
464   // not have to do something
465   {
466     unsigned int c     = 0;
467     unsigned int block = 0;
468     intervals[0].first = 0;
469 
470     for (size_type i = 0; (i < M.m()) && (block + 1 < n_blocks); ++i)
471       {
472         if (selected[i] == true)
473           ++c;
474         if (c == n_inverses_per_block)
475           {
476             intervals[block].second    = i;
477             intervals[block + 1].first = i;
478             ++block;
479 
480             c = 0;
481           }
482       }
483     intervals[n_blocks - 1].second = M.m();
484   }
485 
486   // now transfer the knowledge on
487   // the splitting points into the
488   // vector<bool>s that the base
489   // class wants to see. the way how
490   // we do this depends on the
491   // requested blocking strategy
492   switch (blocking_strategy)
493     {
494       case index_intervals:
495         {
496           for (unsigned int block = 0; block < n_blocks; ++block)
497             std::fill_n(dof_masks[block].begin() + intervals[block].first,
498                         intervals[block].second - intervals[block].first,
499                         true);
500           break;
501         }
502 
503       case adaptive:
504         {
505           // the splitting points for
506           // the DoF have been computed
507           // above already, but we will
508           // only use them to split the
509           // Lagrange dofs into
510           // blocks. splitting the
511           // remaining dofs will be
512           // done now.
513 
514           // first count how often the
515           // Lagrange dofs of each
516           // block access the different
517           // dofs
518           Table<2, size_type> access_count(n_blocks, M.m());
519 
520           // set-up the map that will
521           // be used to store the
522           // indices each Lagrange dof
523           // accesses
524           const SparsityPattern &structure = M.get_sparsity_pattern();
525 
526           for (size_type row = 0; row < M.m(); ++row)
527             if (selected[row] == true)
528               {
529                 // first find out to
530                 // which block the
531                 // present row belongs
532                 unsigned int block_number = 0;
533                 while (row >= intervals[block_number].second)
534                   ++block_number;
535                 Assert(block_number < n_blocks, ExcInternalError());
536 
537                 // now traverse the
538                 // matrix structure to
539                 // find out to which
540                 // dofs number the
541                 // present index wants
542                 // to write
543                 const size_type row_length = structure.row_length(row);
544                 for (size_type i = 0; i < row_length; ++i)
545                   ++access_count[block_number][structure.column_number(row, i)];
546               }
547 
548           // now we know that block @p i
549           // wants to write to DoF @p j
550           // as often as
551           // <tt>access_count[i][j]</tt>
552           // times. Let @p j be allotted
553           // to the block which
554           // accesses it most often.
555           //
556           // if it is a Lagrange dof,
557           // the of course we leave it
558           // to the block we put it
559           // into in the first place
560           for (size_type row = 0; row < M.m(); ++row)
561             if (selected[row] == true)
562               {
563                 unsigned int block_number = 0;
564                 while (row >= intervals[block_number].second)
565                   ++block_number;
566 
567                 dof_masks[block_number][row] = true;
568               }
569             else
570               {
571                 // find out which block
572                 // accesses this dof
573                 // the most often
574                 size_type    max_accesses     = 0;
575                 unsigned int max_access_block = 0;
576                 for (unsigned int block = 0; block < n_blocks; ++block)
577                   if (access_count[block][row] > max_accesses)
578                     {
579                       max_accesses     = access_count[block][row];
580                       max_access_block = block;
581                     }
582                 dof_masks[max_access_block][row] = true;
583               }
584 
585           break;
586         }
587 
588       default:
589         Assert(false, ExcInternalError());
590     }
591 }
592 
593 
594 
595 template <typename number>
596 template <typename number2>
597 void
vmult(Vector<number2> & dst,const Vector<number2> & src)598 SparseBlockVanka<number>::vmult(Vector<number2> &      dst,
599                                 const Vector<number2> &src) const
600 {
601   dst = 0;
602 
603   Threads::TaskGroup<> tasks;
604   for (unsigned int block = 0; block < n_blocks; ++block)
605     tasks += Threads::new_task(
606       [&, block] { this->apply_preconditioner(dst, src, &dof_masks[block]); });
607   tasks.join_all();
608 }
609 
610 
611 
612 template <typename number>
613 std::size_t
memory_consumption()614 SparseBlockVanka<number>::memory_consumption() const
615 {
616   return SparseVanka<number>::memory_consumption() +
617          MemoryConsumption::memory_consumption(dof_masks);
618 }
619 
620 
621 
622 DEAL_II_NAMESPACE_CLOSE
623 
624 #endif
625