1 #ifndef AMGCL_MPI_AMG_HPP
2 #define AMGCL_MPI_AMG_HPP
3 
4 /*
5 The MIT License
6 
7 Copyright (c) 2012-2021 Denis Demidov <dennis.demidov@gmail.com>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 */
27 
28 /**
29  * \file   amgcl/mpi/amg.hpp
30  * \author Denis Demidov <dennis.demidov@gmail.com>
31  * \brief  Distributed memory AMG preconditioner.
32  */
33 
34 #include <iostream>
35 #include <iomanip>
36 #include <list>
37 #include <memory>
38 
39 #include <amgcl/backend/interface.hpp>
40 #include <amgcl/value_type/interface.hpp>
41 #include <amgcl/mpi/util.hpp>
42 #include <amgcl/mpi/distributed_matrix.hpp>
43 #include <amgcl/mpi/direct_solver/skyline_lu.hpp>
44 #include <amgcl/mpi/partition/merge.hpp>
45 
46 namespace amgcl {
47 namespace mpi {
48 
49 template <
50     class Backend,
51     class Coarsening,
52     class Relaxation,
53     class DirectSolver = direct::skyline_lu<typename Backend::value_type>,
54     class Repartition = partition::merge<Backend>
55     >
56 class amg {
57     public:
58         typedef Backend                                    backend_type;
59         typedef typename Backend::params                   backend_params;
60         typedef typename Backend::value_type               value_type;
61         typedef typename math::scalar_of<value_type>::type scalar_type;
62         typedef distributed_matrix<Backend>                matrix;
63         typedef typename Backend::vector                   vector;
64 
65         struct params {
66             typedef typename Coarsening::params   coarsening_params;
67             typedef typename Relaxation::params   relax_params;
68             typedef typename DirectSolver::params direct_params;
69             typedef typename Repartition::params  repart_params;
70 
71             coarsening_params coarsening;   ///< Coarsening parameters.
72             relax_params      relax;        ///< Relaxation parameters.
73             direct_params     direct;       ///< Direct solver parameters.
74             repart_params     repart;       ///< Repartition parameters.
75 
76             /// Specifies when level is coarse enough to be solved directly.
77             /**
78              * If number of variables at a next level in the hierarchy becomes
79              * lower than this threshold, then the hierarchy construction is
80              * stopped and the linear system is solved directly at this level.
81              */
82             unsigned coarse_enough;
83 
84             /// Use direct solver at the coarsest level.
85             /**
86              * When set, the coarsest level is solved with a direct solver.
87              * Otherwise a smoother is used as a solver.
88              */
89             bool direct_coarse;
90 
91             /// Maximum number of levels.
92             /** If this number is reached while the size of the last level is
93              * greater that `coarse_enough`, then the coarsest level will not
94              * be solved exactly, but will use a smoother.
95              */
96             unsigned max_levels;
97 
98             /// Number of pre-relaxations.
99             unsigned npre;
100 
101             /// Number of post-relaxations.
102             unsigned npost;
103 
104             /// Number of cycles (1 for V-cycle, 2 for W-cycle, etc.).
105             unsigned ncycle;
106 
107             /// Number of cycles to make as part of preconditioning.
108             unsigned pre_cycles;
109 
paramsamgcl::mpi::amg::params110             params() :
111                 coarse_enough(DirectSolver::coarse_enough()), direct_coarse(true),
112                 max_levels( std::numeric_limits<unsigned>::max() ),
113                 npre(1), npost(1), ncycle(1), pre_cycles(1)
114             {}
115 
116 #ifndef AMGCL_NO_BOOST
paramsamgcl::mpi::amg::params117             params(const boost::property_tree::ptree &p)
118                 : AMGCL_PARAMS_IMPORT_CHILD(p, coarsening),
119                   AMGCL_PARAMS_IMPORT_CHILD(p, relax),
120                   AMGCL_PARAMS_IMPORT_CHILD(p, direct),
121                   AMGCL_PARAMS_IMPORT_CHILD(p, repart),
122                   AMGCL_PARAMS_IMPORT_VALUE(p, coarse_enough),
123                   AMGCL_PARAMS_IMPORT_VALUE(p, direct_coarse),
124                   AMGCL_PARAMS_IMPORT_VALUE(p, max_levels),
125                   AMGCL_PARAMS_IMPORT_VALUE(p, npre),
126                   AMGCL_PARAMS_IMPORT_VALUE(p, npost),
127                   AMGCL_PARAMS_IMPORT_VALUE(p, ncycle),
128                   AMGCL_PARAMS_IMPORT_VALUE(p, pre_cycles)
129             {
130                 check_params(p, {"coarsening", "relax", "direct", "repart", "coarse_enough",  "direct_coarse", "max_levels", "npre", "npost", "ncycle", "pre_cycles"});
131 
132                 amgcl::precondition(max_levels > 0, "max_levels should be positive");
133             }
134 
getamgcl::mpi::amg::params135             void get(
136                     boost::property_tree::ptree &p,
137                     const std::string &path = ""
138                     ) const
139             {
140                 AMGCL_PARAMS_EXPORT_CHILD(p, path, coarsening);
141                 AMGCL_PARAMS_EXPORT_CHILD(p, path, relax);
142                 AMGCL_PARAMS_EXPORT_CHILD(p, path, direct);
143                 AMGCL_PARAMS_EXPORT_CHILD(p, path, repart);
144                 AMGCL_PARAMS_EXPORT_VALUE(p, path, coarse_enough);
145                 AMGCL_PARAMS_EXPORT_VALUE(p, path, direct_coarse);
146                 AMGCL_PARAMS_EXPORT_VALUE(p, path, max_levels);
147                 AMGCL_PARAMS_EXPORT_VALUE(p, path, npre);
148                 AMGCL_PARAMS_EXPORT_VALUE(p, path, npost);
149                 AMGCL_PARAMS_EXPORT_VALUE(p, path, ncycle);
150                 AMGCL_PARAMS_EXPORT_VALUE(p, path, pre_cycles);
151             }
152 #endif
153         } prm;
154 
155         template <class Matrix>
amg(communicator comm,const Matrix & A,const params & prm=params (),const backend_params & bprm=backend_params ())156         amg(
157                 communicator comm,
158                 const Matrix &A,
159                 const params &prm = params(),
160                 const backend_params &bprm = backend_params()
161            ) : prm(prm), repart(prm.repart)
162         {
163             init(std::make_shared<matrix>(comm, A, backend::rows(A)), bprm);
164         }
165 
amg(communicator,std::shared_ptr<matrix> A,const params & prm=params (),const backend_params & bprm=backend_params ())166         amg(
167                 communicator,
168                 std::shared_ptr<matrix> A,
169                 const params &prm = params(),
170                 const backend_params &bprm = backend_params()
171            ) : prm(prm), repart(prm.repart)
172         {
173             init(A, bprm);
174         }
175 
176         template <class Vec1, class Vec2>
cycle(const Vec1 & rhs,Vec2 && x) const177         void cycle(const Vec1 &rhs, Vec2 &&x) const {
178             cycle(levels.begin(), rhs, x);
179         }
180 
181         template <class Vec1, class Vec2>
apply(const Vec1 & rhs,Vec2 && x) const182         void apply(const Vec1 &rhs, Vec2 &&x) const {
183             if (prm.pre_cycles) {
184                 backend::clear(x);
185                 for(unsigned i = 0; i < prm.pre_cycles; ++i)
186                     cycle(levels.begin(), rhs, x);
187             } else {
188                 backend::copy(rhs, x);
189             }
190         }
191 
192         /// Returns the system matrix from the finest level.
system_matrix_ptr() const193         std::shared_ptr<matrix> system_matrix_ptr() const {
194             return A;
195         }
196 
system_matrix() const197         const matrix& system_matrix() const {
198             return *system_matrix_ptr();
199         }
200     private:
201         struct level {
202             ptrdiff_t nrows, nnz;
203             int active_procs;
204 
205             std::shared_ptr<matrix>       A, P, R;
206             std::shared_ptr<vector>       f, u, t;
207             std::shared_ptr<Relaxation>   relax;
208             std::shared_ptr<DirectSolver> solve;
209 
levelamgcl::mpi::amg::level210             level() {}
211 
levelamgcl::mpi::amg::level212             level(
213                     std::shared_ptr<matrix> a,
214                     params &prm,
215                     const backend_params &bprm,
216                     bool direct = false
217                  )
218                 : nrows(a->glob_rows()), nnz(a->glob_nonzeros()),
219                   f(Backend::create_vector(a->loc_rows(), bprm)),
220                   u(Backend::create_vector(a->loc_rows(), bprm))
221             {
222                 int active = (a->loc_rows() > 0);
223                 active_procs = a->comm().reduce(MPI_SUM, active);
224 
225                 sort_rows(*a);
226 
227                 if (direct) {
228                     AMGCL_TIC("direct solver");
229                     solve = std::make_shared<DirectSolver>(a->comm(), *a, prm.direct);
230                     AMGCL_TOC("direct solver");
231                 } else {
232                     A = a;
233                     t = Backend::create_vector(a->loc_rows(), bprm);
234 
235                     AMGCL_TIC("relaxation");
236                     relax = std::make_shared<Relaxation>(*a, prm.relax, bprm);
237                     AMGCL_TOC("relaxation");
238                 }
239             }
240 
step_downamgcl::mpi::amg::level241             std::shared_ptr<matrix> step_down(Coarsening &C, const Repartition &repart)
242             {
243                 AMGCL_TIC("transfer operators");
244                 std::tie(P, R) = C.transfer_operators(*A);
245 
246                 AMGCL_TIC("sort");
247                 sort_rows(*P);
248                 sort_rows(*R);
249                 AMGCL_TOC("sort");
250 
251                 AMGCL_TOC("transfer operators");
252 
253                 if (P->glob_cols() == 0) {
254                     // Zero-sized coarse level in amgcl (diagonal matrix?)
255                     return std::shared_ptr<matrix>();
256                 }
257 
258                 AMGCL_TIC("coarse operator");
259                 auto Ac = C.coarse_operator(*A, *P, *R);
260                 AMGCL_TOC("coarse operator");
261 
262                 if (repart.is_needed(*Ac)) {
263                     AMGCL_TIC("partition");
264                     auto I = repart(*Ac, block_size(C));
265                     auto J = transpose(*I);
266 
267                     P  = product(*P, *I);
268                     R  = product(*J, *R);
269                     Ac = product(*J, *product(*Ac, *I));
270                     AMGCL_TOC("partition");
271                 }
272 
273                 return Ac;
274             }
275 
move_to_backendamgcl::mpi::amg::level276             void move_to_backend(const backend_params &bprm) {
277                 AMGCL_TIC("move to backend");
278                 if (A) A->move_to_backend(bprm);
279                 if (P) P->move_to_backend(bprm);
280                 if (R) R->move_to_backend(bprm);
281                 AMGCL_TOC("move to backend");
282             }
283 
rowsamgcl::mpi::amg::level284             ptrdiff_t rows() const {
285                 return nrows;
286             }
287 
nonzerosamgcl::mpi::amg::level288             ptrdiff_t nonzeros() const {
289                 return nnz;
290             }
291         };
292 
293         typedef typename std::list<level>::const_iterator level_iterator;
294 
295         std::shared_ptr<matrix> A;
296         Repartition repart;
297         std::list<level> levels;
298 
init(std::shared_ptr<matrix> A,const backend_params & bprm)299         void init(std::shared_ptr<matrix> A, const backend_params &bprm)
300         {
301             A->comm().check(A->glob_rows() == A->glob_cols(), "Matrix should be square!");
302 
303             this->A = A;
304             Coarsening C(prm.coarsening);
305             bool need_coarse = true;
306 
307             while(A->glob_rows() > prm.coarse_enough) {
308                 levels.push_back( level(A, prm, bprm) );
309 
310                 if (levels.size() >= prm.max_levels) {
311                     levels.back().move_to_backend(bprm);
312                     break;
313                 }
314 
315                 A = levels.back().step_down(C, repart);
316                 levels.back().move_to_backend(bprm);
317 
318                 if (!A) {
319                     // Zero-sized coarse level. Probably the system matrix on
320                     // this level is diagonal, should be easily solvable with a
321                     // couple of smoother iterations.
322                     need_coarse = false;
323                     break;
324                 }
325             }
326 
327             if (!A || A->glob_rows() > prm.coarse_enough) {
328                 // The coarse matrix is still too big to be solved directly.
329                 need_coarse = false;
330             }
331 
332             if (A && need_coarse) {
333                 levels.push_back(level(A, prm, bprm, prm.direct_coarse));
334                 levels.back().move_to_backend(bprm);
335             }
336 
337             AMGCL_TIC("move to backend");
338             this->A->move_to_backend(bprm);
339             AMGCL_TOC("move to backend");
340         }
341 
342         template <class Vec1, class Vec2>
cycle(level_iterator lvl,const Vec1 & rhs,Vec2 & x) const343         void cycle(level_iterator lvl, const Vec1 &rhs, Vec2 &x) const {
344             level_iterator nxt = lvl, end = levels.end();
345             ++nxt;
346 
347             if (nxt == end) {
348                 if (lvl->solve) {
349                     AMGCL_TIC("direct solver");
350                     (*lvl->solve)(rhs, x);
351                     AMGCL_TOC("direct solver");
352                 } else {
353                     AMGCL_TIC("relax");
354                     for (size_t i = 0; i < prm.npre;  ++i) lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t);
355                     for (size_t i = 0; i < prm.npost; ++i) lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t);
356                     AMGCL_TOC("relax");
357                 }
358             } else {
359                 for (size_t j = 0; j < prm.ncycle; ++j) {
360                     AMGCL_TIC("relax");
361                     for(size_t i = 0; i < prm.npre; ++i)
362                         lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t);
363                     AMGCL_TOC("relax");
364 
365                     backend::residual(rhs, *lvl->A, x, *lvl->t);
366 
367                     backend::spmv(math::identity<scalar_type>(), *lvl->R, *lvl->t, math::zero<scalar_type>(), *nxt->f);
368 
369                     backend::clear(*nxt->u);
370                     cycle(nxt, *nxt->f, *nxt->u);
371 
372                     backend::spmv(math::identity<scalar_type>(), *lvl->P, *nxt->u, math::identity<scalar_type>(), x);
373 
374                     AMGCL_TIC("relax");
375                     for(size_t i = 0; i < prm.npost; ++i)
376                         lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t);
377                     AMGCL_TOC("relax");
378                 }
379             }
380         }
381 
382     template <class B, class C, class R, class D, class I>
383     friend std::ostream& operator<<(std::ostream &os, const amg<B, C, R, D, I> &a);
384 };
385 
386 template <class B, class C, class R, class D, class I>
operator <<(std::ostream & os,const amg<B,C,R,D,I> & a)387 std::ostream& operator<<(std::ostream &os, const amg<B, C, R, D, I> &a)
388 {
389     typedef typename amg<B, C, R, D, I>::level level;
390     ios_saver ss(os);
391 
392     size_t sum_dof = 0;
393     size_t sum_nnz = 0;
394 
395     for(const level &lvl : a.levels) {
396         sum_dof += lvl.rows();
397         sum_nnz += lvl.nonzeros();
398     }
399 
400     os << "Number of levels:    "   << a.levels.size()
401         << "\nOperator complexity: " << std::fixed << std::setprecision(2)
402         << 1.0 * sum_nnz / a.levels.front().nonzeros()
403         << "\nGrid complexity:     " << std::fixed << std::setprecision(2)
404         << 1.0 * sum_dof / a.levels.front().rows()
405         << "\n\nlevel     unknowns       nonzeros\n"
406         << "---------------------------------\n";
407 
408     size_t depth = 0;
409     for(const level &lvl : a.levels) {
410         os << std::setw(5)  << depth++
411            << std::setw(13) << lvl.rows()
412            << std::setw(15) << lvl.nonzeros() << " ("
413            << std::setw(5) << std::fixed << std::setprecision(2)
414            << 100.0 * lvl.nonzeros() / sum_nnz
415            << "%) [" << lvl.active_procs << "]" << std::endl;
416     }
417 
418     return os;
419 }
420 
421 } // namespace mpi
422 } // namespace amgcl
423 
424 #endif
425