1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 1999 - 2019 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15
16 #ifndef dealii_sparse_vanka_h
17 #define dealii_sparse_vanka_h
18
19
20
21 #include <deal.II/base/config.h>
22
23 #include <deal.II/base/multithread_info.h>
24 #include <deal.II/base/smartpointer.h>
25
26 #include <map>
27 #include <vector>
28
29 DEAL_II_NAMESPACE_OPEN
30
31 // Forward declarations
32 #ifndef DOXYGEN
33 template <typename number>
34 class FullMatrix;
35 template <typename number>
36 class SparseMatrix;
37 template <typename number>
38 class Vector;
39
40 template <typename number>
41 class SparseVanka;
42 template <typename number>
43 class SparseBlockVanka;
44 #endif
45
46 /*! @addtogroup Preconditioners
47 *@{
48 */
49
50 /**
51 * Point-wise Vanka preconditioning. This class does Vanka preconditioning on
52 * a point-wise base. Vanka preconditioners are used for saddle point problems
53 * like Stokes' problem or problems arising in optimization where Lagrange
54 * multipliers occur and the Newton method matrix has a zero block. With these
55 * matrices the application of Jacobi or Gauss-Seidel methods is impossible,
56 * because some diagonal elements are zero in the rows of the Lagrange
57 * multiplier. The approach of Vanka is to solve a small (usually indefinite)
58 * system of equations for each Langrange multiplier variable (we will also
59 * call the pressure in Stokes' equation a Langrange multiplier since it can
60 * be interpreted as such).
61 *
62 * Objects of this class are constructed by passing a vector of indices of the
63 * degrees of freedom of the Lagrange multiplier. In the actual
64 * preconditioning method, these rows are traversed in the order in which the
65 * appear in the matrix. Since this is a Gauß-Seidel like procedure, remember
66 * to have a good ordering in advance (for transport dominated problems,
67 * Cuthill-McKee algorithms are a good means for this, if points on the inflow
68 * boundary are chosen as starting points for the renumbering).
69 *
70 * For each selected degree of freedom, a local system of equations is built
71 * by the degree of freedom itself and all other values coupling immediately,
72 * i.e. the set of degrees of freedom considered for the local system of
73 * equations for degree of freedom @p i is @p i itself and all @p j such that
74 * the element <tt>(i,j)</tt> is a nonzero entry in the sparse matrix under
75 * consideration. The elements <tt>(j,i)</tt> are not considered. We now pick
76 * all matrix entries from rows and columns out of the set of degrees of
77 * freedom just described out of the global matrix and put it into a local
78 * matrix, which is subsequently inverted. This system may be of different
79 * size for each degree of freedom, depending for example on the local
80 * neighborhood of the respective node on a computational grid.
81 *
82 * The right hand side is built up in the same way, i.e. by copying all
83 * entries that coupled with the one under present consideration, but it is
84 * augmented by all degrees of freedom coupling with the degrees from the set
85 * described above (i.e. the DoFs coupling second order to the present one).
86 * The reason for this is, that the local problems to be solved shall have
87 * Dirichlet boundary conditions on the second order coupling DoFs, so we have
88 * to take them into account but eliminate them before actually solving; this
89 * elimination is done by the modification of the right hand side, and in the
90 * end these degrees of freedom do not occur in the matrix and solution vector
91 * any more at all.
92 *
93 * This local system is solved and the values are updated into the destination
94 * vector.
95 *
96 * Remark: the Vanka method is a non-symmetric preconditioning method.
97 *
98 *
99 * <h3>Example of Use</h3> This little example is taken from a program doing
100 * parameter optimization. The Lagrange multiplier is the third component of
101 * the finite element used. The system is solved by the GMRES method.
102 * @code
103 * // tag the Lagrange multiplier variable
104 * vector<bool> signature(3);
105 * signature[0] = signature[1] = false;
106 * signature[2] = true;
107 *
108 * // tag all dofs belonging to the Lagrange multiplier
109 * vector<bool> selected_dofs (dof.n_dofs(), false);
110 * DoFTools::extract_dofs(dof, signature, p_select);
111 * // create the Vanka object
112 * SparseVanka<double> vanka (global_matrix, selected_dofs);
113 *
114 * // create the solver
115 * SolverGMRES<> gmres(control,memory,504);
116 *
117 * // solve
118 * gmres.solve (global_matrix, solution, right_hand_side,
119 * vanka);
120 * @endcode
121 *
122 *
123 * <h4>Implementor's remark</h4> At present, the local matrices are built up
124 * such that the degree of freedom associated with the local Lagrange
125 * multiplier is the first one. Thus, usually the upper left entry in the
126 * local matrix is zero. It is not clear to me (W.B.) whether this might pose
127 * some problems in the inversion of the local matrices. Maybe someone would
128 * like to check this.
129 *
130 * @note Instantiations for this template are provided for <tt>@<float@> and
131 * @<double@></tt>; others can be generated in application programs (see the
132 * section on
133 * @ref Instantiations
134 * in the manual).
135 */
136 template <typename number>
137 class SparseVanka
138 {
139 public:
140 /**
141 * Declare type for container size.
142 */
143 using size_type = types::global_dof_index;
144
145 /**
146 * Constructor. Does nothing.
147 *
148 * Call the initialize() function before using this object as preconditioner
149 * (vmult()).
150 */
151 SparseVanka();
152
153 /**
154 * Constructor which also takes two deprecated inputs.
155 *
156 * @deprecated The use of the last two parameters is deprecated. They are
157 * currently ignored.
158 */
159 DEAL_II_DEPRECATED
160 SparseVanka(const SparseMatrix<number> &M,
161 const std::vector<bool> & selected,
162 const bool conserve_memory,
163 const unsigned int n_threads = MultithreadInfo::n_threads());
164
165 /**
166 * Constructor. Gets the matrix for preconditioning and a bit vector with
167 * entries @p true for all rows to be updated. A reference to this vector
168 * will be stored, so it must persist longer than the Vanka object. The same
169 * is true for the matrix.
170 *
171 * The matrix @p M which is passed here may or may not be the same matrix
172 * for which this object shall act as preconditioner. In particular, it is
173 * conceivable that the preconditioner is build up for one matrix once, but
174 * is used for subsequent steps in a nonlinear process as well, where the
175 * matrix changes in each step slightly.
176 */
177 SparseVanka(const SparseMatrix<number> &M, const std::vector<bool> &selected);
178
179 /**
180 * Destructor. Delete all allocated matrices.
181 */
182 ~SparseVanka();
183
184 /**
185 * Parameters for SparseVanka.
186 */
187 class AdditionalData
188 {
189 public:
190 /**
191 * Constructor. For the parameters' description, see below.
192 */
193 explicit AdditionalData(const std::vector<bool> &selected);
194
195 /**
196 * Constructor. For the parameters' description, see below.
197 *
198 * @deprecated The use of this constructor is deprecated - the second and
199 * third parameters are ignored.
200 */
201 DEAL_II_DEPRECATED
202 AdditionalData(const std::vector<bool> &selected,
203 const bool conserve_memory,
204 const unsigned int n_threads = MultithreadInfo::n_threads());
205
206 /**
207 * Indices of those degrees of freedom that we shall work on.
208 */
209 const std::vector<bool> &selected;
210 };
211
212
213 /**
214 * If the default constructor is used then this function needs to be called
215 * before an object of this class is used as preconditioner.
216 *
217 * For more detail about possible parameters, see the class documentation
218 * and the documentation of the SparseVanka::AdditionalData class.
219 *
220 * After this function is called the preconditioner is ready to be used
221 * (using the <code>vmult</code> function of derived classes).
222 */
223 void
224 initialize(const SparseMatrix<number> &M,
225 const AdditionalData & additional_data);
226
227 /**
228 * Do the preconditioning. This function takes the residual in @p src and
229 * returns the resulting update vector in @p dst.
230 */
231 template <typename number2>
232 void
233 vmult(Vector<number2> &dst, const Vector<number2> &src) const;
234
235 /**
236 * Apply transpose preconditioner. This function takes the residual in @p
237 * src and returns the resulting update vector in @p dst.
238 */
239 template <typename number2>
240 void
241 Tvmult(Vector<number2> &dst, const Vector<number2> &src) const;
242
243 /**
244 * Return the dimension of the codomain (or range) space. Note that the
245 * matrix is of dimension $m \times n$.
246 *
247 * @note This function should only be called if the preconditioner has been
248 * initialized.
249 */
250 size_type
251 m() const;
252
253 /**
254 * Return the dimension of the domain space. Note that the matrix is of
255 * dimension $m \times n$.
256 *
257 * @note This function should only be called if the preconditioner has been
258 * initialized.
259 */
260 size_type
261 n() const;
262
263 protected:
264 /**
265 * Apply the inverses corresponding to those degrees of freedom that have a
266 * @p true value in @p dof_mask, to the @p src vector and move the result
267 * into @p dst. Actually, only values for allowed indices are written to @p
268 * dst, so the application of this function only does what is announced in
269 * the general documentation if the given mask sets all values to zero
270 *
271 * The reason for providing the mask anyway is that in derived classes we
272 * may want to apply the preconditioner to parts of the matrix only, in
273 * order to parallelize the application. Then, it is important to only write
274 * to some slices of @p dst, in order to eliminate the dependencies of
275 * threads of each other.
276 *
277 * If a null pointer is passed instead of a pointer to the @p dof_mask (as
278 * is the default value), then it is assumed that we shall work on all
279 * degrees of freedom. This is then equivalent to calling the function with
280 * a <tt>vector<bool>(n_dofs,true)</tt>.
281 *
282 * The @p vmult of this class of course calls this function with a null
283 * pointer
284 */
285 template <typename number2>
286 void
287 apply_preconditioner(Vector<number2> & dst,
288 const Vector<number2> & src,
289 const std::vector<bool> *const dof_mask = nullptr) const;
290
291 /**
292 * Determine an estimate for the memory consumption (in bytes) of this
293 * object.
294 */
295 std::size_t
296 memory_consumption() const;
297
298 private:
299 /**
300 * Pointer to the matrix.
301 */
302 SmartPointer<const SparseMatrix<number>, SparseVanka<number>> matrix;
303
304 /**
305 * Indices of those degrees of freedom that we shall work on.
306 */
307 const std::vector<bool> *selected;
308
309 /**
310 * Array of inverse matrices, one for each degree of freedom. Only those
311 * elements will be used that are tagged in @p selected.
312 */
313 mutable std::vector<SmartPointer<FullMatrix<float>, SparseVanka<number>>>
314 inverses;
315
316 /**
317 * The dimension of the range space.
318 */
319 size_type _m;
320
321 /**
322 * The dimension of the domain space.
323 */
324 size_type _n;
325
326 /**
327 * Compute the inverses of all selected diagonal elements.
328 */
329 void
330 compute_inverses();
331
332 /**
333 * Compute the inverses at positions in the range <tt>[begin,end)</tt>. In
334 * non-multithreaded mode, <tt>compute_inverses()</tt> calls this function
335 * with the whole range, but in multithreaded mode, several copies of this
336 * function are spawned.
337 */
338 void
339 compute_inverses(const size_type begin, const size_type end);
340
341 /**
342 * Compute the inverse of the block located at position @p row. Since the
343 * vector is used quite often, it is generated only once in the caller of
344 * this function and passed to this function which first clears it. Reusing
345 * the vector makes the process significantly faster than in the case where
346 * this function re-creates it each time.
347 */
348 void
349 compute_inverse(const size_type row, std::vector<size_type> &local_indices);
350
351 // Make the derived class a friend. This seems silly, but is actually
352 // necessary, since derived classes can only access non-public members
353 // through their @p this pointer, but not access these members as member
354 // functions of other objects of the type of this base class (i.e. like
355 // <tt>x.f()</tt>, where @p x is an object of the base class, and @p f one
356 // of it's non-public member functions).
357 //
358 // Now, in the case of the @p SparseBlockVanka class, we would like to take
359 // the address of a function of the base class in order to call it through
360 // the multithreading framework, so the derived class has to be a friend.
361 template <typename T>
362 friend class SparseBlockVanka;
363 };
364
365
366
367 /**
368 * Block version of the sparse Vanka preconditioner. This class divides the
369 * matrix into blocks and works on the diagonal blocks only, which of course
370 * reduces the efficiency as preconditioner, but is perfectly parallelizable.
371 * The constructor takes a parameter into how many blocks the matrix shall be
372 * subdivided and then lets the underlying class do the work. Division of the
373 * matrix is done in several ways which are described in detail below.
374 *
375 * This class is probably useless if you don't have a multiprocessor system,
376 * since then the amount of work per preconditioning step is the same as for
377 * the @p SparseVanka class, but preconditioning properties are worse. On the
378 * other hand, if you have a multiprocessor system, the worse preconditioning
379 * quality (leading to more iterations of the linear solver) usually is well
380 * balanced by the increased speed of application due to the parallelization,
381 * leading to an overall decrease in elapsed wall-time for solving your linear
382 * system. It should be noted that the quality as preconditioner reduces with
383 * growing number of blocks, so there may be an optimal value (in terms of
384 * wall-time per linear solve) for the number of blocks.
385 *
386 * To facilitate writing portable code, if the number of blocks into which the
387 * matrix is to be subdivided, is set to one, then this class acts just like
388 * the @p SparseVanka class. You may therefore want to set the number of
389 * blocks equal to the number of processors you have.
390 *
391 * Note that the parallelization is done if <tt>deal.II</tt> was configured
392 * for multithread use and that the number of threads which is spawned equals
393 * the number of blocks. This is reasonable since you will not want to set the
394 * number of blocks unnecessarily large, since, as mentioned, this reduces the
395 * preconditioning properties.
396 *
397 *
398 * <h3>Splitting the matrix into blocks</h3>
399 *
400 * Splitting the matrix into blocks is always done in a way such that the
401 * blocks are not necessarily of equal size, but such that the number of
402 * selected degrees of freedom for which a local system is to be solved is
403 * equal between blocks. The reason for this strategy to subdivision is load-
404 * balancing for multithreading. There are several possibilities to actually
405 * split the matrix into blocks, which are selected by the flag @p
406 * blocking_strategy that is passed to the constructor. By a block, we will in
407 * the sequel denote a list of indices of degrees of freedom; the algorithm
408 * will work on each block separately, i.e. the solutions of the local systems
409 * corresponding to a degree of freedom of one block will only be used to
410 * update the degrees of freedom belonging to the same block, but never to
411 * update degrees of freedom of other blocks. A block can be a consecutive
412 * list of indices, as in the first alternative below, or a nonconsecutive
413 * list of indices. Of course, we assume that the intersection of each two
414 * blocks is empty and that the union of all blocks equals the interval
415 * <tt>[0,N)</tt>, where @p N is the number of degrees of freedom of the
416 * system of equations.
417 *
418 * <ul>
419 * <li> @p index_intervals: Here, we chose the blocks to be intervals
420 * <tt>[a_i,a_{i+1</tt>)}, i.e. consecutive degrees of freedom are usually
421 * also within the same block. This is a reasonable strategy, if the degrees
422 * of freedom have, for example, be re-numbered using the Cuthill-McKee
423 * algorithm, in which spatially neighboring degrees of freedom have
424 * neighboring indices. In that case, coupling in the matrix is usually
425 * restricted to the vicinity of the diagonal as well, and we can simply cut
426 * the matrix into blocks.
427 *
428 * The bounds of the intervals, i.e. the @p a_i above, are chosen such that
429 * the number of degrees of freedom on which we shall work (i.e. usually the
430 * degrees of freedom corresponding to Lagrange multipliers) is about the same
431 * in each block; this does not mean, however, that the sizes of the blocks
432 * are equal, since the blocks also comprise the other degrees of freedom for
433 * which no local system is solved. In the extreme case, consider that all
434 * Lagrange multipliers are sorted to the end of the range of DoF indices,
435 * then the first block would be very large, since it comprises all other DoFs
436 * and some Lagrange multipliers, while all other blocks are rather small and
437 * comprise only Langrange multipliers. This strategy therefore does not only
438 * depend on the order in which the Lagrange DoFs are sorted, but also on the
439 * order in which the other DoFs are sorted. It is therefore necessary to note
440 * that this almost renders the capability as preconditioner useless if the
441 * degrees of freedom are numbered by component, i.e. all Lagrange multipliers
442 * en bloc.
443 *
444 * <li> @p adaptive: This strategy is a bit more clever in cases where the
445 * Langrange DoFs are clustered, as in the example above. It works as follows:
446 * it first groups the Lagrange DoFs into blocks, using the same strategy as
447 * above. However, instead of grouping the other DoFs into the blocks of
448 * Lagrange DoFs with nearest DoF index, it decides for each non-Lagrange DoF
449 * to put it into the block of Lagrange DoFs which write to this non-Lagrange
450 * DoF most often. This makes it possible to even sort the Lagrange DoFs to
451 * the end and still associate spatially neighboring non-Lagrange DoFs to the
452 * same blocks where the respective Lagrange DoFs are, since they couple to
453 * each other while spatially distant DoFs don't couple.
454 *
455 * The additional computational effort to sorting the non-Lagrange DoFs is not
456 * very large compared with the inversion of the local systems and applying
457 * the preconditioner, so this strategy might be reasonable if you want to
458 * sort your degrees of freedom by component. If the degrees of freedom are
459 * not sorted by component, the results of the both strategies outlined above
460 * does not differ much. However, unlike the first strategy, the performance
461 * of the second strategy does not deteriorate if the DoFs are renumbered by
462 * component.
463 * </ul>
464 *
465 *
466 * <h3>Typical results</h3>
467 *
468 * As a prototypical test case, we use a nonlinear problem from optimization,
469 * which leads to a series of saddle point problems, each of which is solved
470 * using GMRES with Vanka as preconditioner. The equation had approx. 850
471 * degrees of freedom. With the non-blocked version @p SparseVanka (or @p
472 * SparseBlockVanka with <tt>n_blocks==1</tt>), the following numbers of
473 * iterations is needed to solver the linear system in each nonlinear step:
474 * @verbatim
475 * 101 68 64 53 35 21
476 * @endverbatim
477 *
478 * With four blocks, we need the following numbers of iterations
479 * @verbatim
480 * 124 88 83 66 44 28
481 * @endverbatim
482 * As can be seen, more iterations are needed. However, in terms of computing
483 * time, the first version needs 72 seconds wall time (and 79 seconds CPU
484 * time, which is more than wall time since some other parts of the program
485 * were parallelized as well), while the second version needed 53 second wall
486 * time (and 110 seconds CPU time) on a four processor machine. The total time
487 * is in both cases dominated by the linear solvers. In this case, it is
488 * therefore worth while using the blocked version of the preconditioner if
489 * wall time is more important than CPU time.
490 *
491 * The results with the block version above were obtained with the first
492 * blocking strategy and the degrees of freedom were not numbered by
493 * component. Using the second strategy does not much change the numbers of
494 * iterations (at most by one in each step) and they also do not change when
495 * the degrees of freedom are sorted by component, while the first strategy
496 * significantly deteriorated.
497 */
498 template <typename number>
499 class SparseBlockVanka : public SparseVanka<number>
500 {
501 public:
502 /**
503 * Declare type for container size.
504 */
505 using size_type = types::global_dof_index;
506
507 /**
508 * Enumeration of the different methods by which the DoFs are distributed to
509 * the blocks on which we are to work.
510 */
511 enum BlockingStrategy
512 {
513 /**
514 * Block by index intervals.
515 */
516 index_intervals,
517 /**
518 * Block with an adaptive strategy.
519 */
520 adaptive
521 };
522
523 /**
524 * Constructor. Pass all arguments except for @p n_blocks to the base class.
525 *
526 * @deprecated This constructor is deprecated. The values passed to the last
527 * two arguments are ignored.
528 */
529 DEAL_II_DEPRECATED
530 SparseBlockVanka(const SparseMatrix<number> &M,
531 const std::vector<bool> & selected,
532 const unsigned int n_blocks,
533 const BlockingStrategy blocking_strategy,
534 const bool conserve_memory,
535 const unsigned int n_threads = MultithreadInfo::n_threads());
536
537 /**
538 * Constructor. Pass all arguments except for @p n_blocks to the base class.
539 */
540 SparseBlockVanka(const SparseMatrix<number> &M,
541 const std::vector<bool> & selected,
542 const unsigned int n_blocks,
543 const BlockingStrategy blocking_strategy);
544
545 /**
546 * Apply the preconditioner.
547 */
548 template <typename number2>
549 void
550 vmult(Vector<number2> &dst, const Vector<number2> &src) const;
551
552 /**
553 * Determine an estimate for the memory consumption (in bytes) of this
554 * object.
555 */
556 std::size_t
557 memory_consumption() const;
558
559 private:
560 /**
561 * Store the number of blocks.
562 */
563 const unsigned int n_blocks;
564
565 /**
566 * In this field, we precompute for each block which degrees of freedom
567 * belong to it. Thus, if <tt>dof_masks[i][j]==true</tt>, then DoF @p j
568 * belongs to block @p i. Of course, no other <tt>dof_masks[l][j]</tt> may
569 * be @p true for <tt>l!=i</tt>. This computation is done in the
570 * constructor, to avoid recomputing each time the preconditioner is called.
571 */
572 std::vector<std::vector<bool>> dof_masks;
573
574 /**
575 * Compute the contents of the field @p dof_masks. This function is called
576 * from the constructor.
577 */
578 void
579 compute_dof_masks(const SparseMatrix<number> &M,
580 const std::vector<bool> & selected,
581 const BlockingStrategy blocking_strategy);
582 };
583
584 /*@}*/
585 /* ---------------------------------- Inline functions ------------------- */
586
587 #ifndef DOXYGEN
588
589 template <typename number>
590 inline typename SparseVanka<number>::size_type
m()591 SparseVanka<number>::m() const
592 {
593 Assert(_m != 0, ExcNotInitialized());
594 return _m;
595 }
596
597 template <typename number>
598 inline typename SparseVanka<number>::size_type
n()599 SparseVanka<number>::n() const
600 {
601 Assert(_n != 0, ExcNotInitialized());
602 return _n;
603 }
604
605 template <typename number>
606 template <typename number2>
607 inline void
Tvmult(Vector<number2> &,const Vector<number2> &)608 SparseVanka<number>::Tvmult(Vector<number2> & /*dst*/,
609 const Vector<number2> & /*src*/) const
610 {
611 AssertThrow(false, ExcNotImplemented());
612 }
613
614 #endif // DOXYGEN
615
616 DEAL_II_NAMESPACE_CLOSE
617
618 #endif
619