1 #ifndef AMGCL_MPI_AMG_HPP
2 #define AMGCL_MPI_AMG_HPP
3
4 /*
5 The MIT License
6
7 Copyright (c) 2012-2021 Denis Demidov <dennis.demidov@gmail.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 */
27
28 /**
29 * \file amgcl/mpi/amg.hpp
30 * \author Denis Demidov <dennis.demidov@gmail.com>
31 * \brief Distributed memory AMG preconditioner.
32 */
33
34 #include <iostream>
35 #include <iomanip>
36 #include <list>
37 #include <memory>
38
39 #include <amgcl/backend/interface.hpp>
40 #include <amgcl/value_type/interface.hpp>
41 #include <amgcl/mpi/util.hpp>
42 #include <amgcl/mpi/distributed_matrix.hpp>
43 #include <amgcl/mpi/direct_solver/skyline_lu.hpp>
44 #include <amgcl/mpi/partition/merge.hpp>
45
46 namespace amgcl {
47 namespace mpi {
48
49 template <
50 class Backend,
51 class Coarsening,
52 class Relaxation,
53 class DirectSolver = direct::skyline_lu<typename Backend::value_type>,
54 class Repartition = partition::merge<Backend>
55 >
56 class amg {
57 public:
58 typedef Backend backend_type;
59 typedef typename Backend::params backend_params;
60 typedef typename Backend::value_type value_type;
61 typedef typename math::scalar_of<value_type>::type scalar_type;
62 typedef distributed_matrix<Backend> matrix;
63 typedef typename Backend::vector vector;
64
65 struct params {
66 typedef typename Coarsening::params coarsening_params;
67 typedef typename Relaxation::params relax_params;
68 typedef typename DirectSolver::params direct_params;
69 typedef typename Repartition::params repart_params;
70
71 coarsening_params coarsening; ///< Coarsening parameters.
72 relax_params relax; ///< Relaxation parameters.
73 direct_params direct; ///< Direct solver parameters.
74 repart_params repart; ///< Repartition parameters.
75
76 /// Specifies when level is coarse enough to be solved directly.
77 /**
78 * If number of variables at a next level in the hierarchy becomes
79 * lower than this threshold, then the hierarchy construction is
80 * stopped and the linear system is solved directly at this level.
81 */
82 unsigned coarse_enough;
83
84 /// Use direct solver at the coarsest level.
85 /**
86 * When set, the coarsest level is solved with a direct solver.
87 * Otherwise a smoother is used as a solver.
88 */
89 bool direct_coarse;
90
91 /// Maximum number of levels.
92 /** If this number is reached while the size of the last level is
93 * greater that `coarse_enough`, then the coarsest level will not
94 * be solved exactly, but will use a smoother.
95 */
96 unsigned max_levels;
97
98 /// Number of pre-relaxations.
99 unsigned npre;
100
101 /// Number of post-relaxations.
102 unsigned npost;
103
104 /// Number of cycles (1 for V-cycle, 2 for W-cycle, etc.).
105 unsigned ncycle;
106
107 /// Number of cycles to make as part of preconditioning.
108 unsigned pre_cycles;
109
paramsamgcl::mpi::amg::params110 params() :
111 coarse_enough(DirectSolver::coarse_enough()), direct_coarse(true),
112 max_levels( std::numeric_limits<unsigned>::max() ),
113 npre(1), npost(1), ncycle(1), pre_cycles(1)
114 {}
115
116 #ifndef AMGCL_NO_BOOST
paramsamgcl::mpi::amg::params117 params(const boost::property_tree::ptree &p)
118 : AMGCL_PARAMS_IMPORT_CHILD(p, coarsening),
119 AMGCL_PARAMS_IMPORT_CHILD(p, relax),
120 AMGCL_PARAMS_IMPORT_CHILD(p, direct),
121 AMGCL_PARAMS_IMPORT_CHILD(p, repart),
122 AMGCL_PARAMS_IMPORT_VALUE(p, coarse_enough),
123 AMGCL_PARAMS_IMPORT_VALUE(p, direct_coarse),
124 AMGCL_PARAMS_IMPORT_VALUE(p, max_levels),
125 AMGCL_PARAMS_IMPORT_VALUE(p, npre),
126 AMGCL_PARAMS_IMPORT_VALUE(p, npost),
127 AMGCL_PARAMS_IMPORT_VALUE(p, ncycle),
128 AMGCL_PARAMS_IMPORT_VALUE(p, pre_cycles)
129 {
130 check_params(p, {"coarsening", "relax", "direct", "repart", "coarse_enough", "direct_coarse", "max_levels", "npre", "npost", "ncycle", "pre_cycles"});
131
132 amgcl::precondition(max_levels > 0, "max_levels should be positive");
133 }
134
getamgcl::mpi::amg::params135 void get(
136 boost::property_tree::ptree &p,
137 const std::string &path = ""
138 ) const
139 {
140 AMGCL_PARAMS_EXPORT_CHILD(p, path, coarsening);
141 AMGCL_PARAMS_EXPORT_CHILD(p, path, relax);
142 AMGCL_PARAMS_EXPORT_CHILD(p, path, direct);
143 AMGCL_PARAMS_EXPORT_CHILD(p, path, repart);
144 AMGCL_PARAMS_EXPORT_VALUE(p, path, coarse_enough);
145 AMGCL_PARAMS_EXPORT_VALUE(p, path, direct_coarse);
146 AMGCL_PARAMS_EXPORT_VALUE(p, path, max_levels);
147 AMGCL_PARAMS_EXPORT_VALUE(p, path, npre);
148 AMGCL_PARAMS_EXPORT_VALUE(p, path, npost);
149 AMGCL_PARAMS_EXPORT_VALUE(p, path, ncycle);
150 AMGCL_PARAMS_EXPORT_VALUE(p, path, pre_cycles);
151 }
152 #endif
153 } prm;
154
155 template <class Matrix>
amg(communicator comm,const Matrix & A,const params & prm=params (),const backend_params & bprm=backend_params ())156 amg(
157 communicator comm,
158 const Matrix &A,
159 const params &prm = params(),
160 const backend_params &bprm = backend_params()
161 ) : prm(prm), repart(prm.repart)
162 {
163 init(std::make_shared<matrix>(comm, A, backend::rows(A)), bprm);
164 }
165
amg(communicator,std::shared_ptr<matrix> A,const params & prm=params (),const backend_params & bprm=backend_params ())166 amg(
167 communicator,
168 std::shared_ptr<matrix> A,
169 const params &prm = params(),
170 const backend_params &bprm = backend_params()
171 ) : prm(prm), repart(prm.repart)
172 {
173 init(A, bprm);
174 }
175
176 template <class Vec1, class Vec2>
cycle(const Vec1 & rhs,Vec2 && x) const177 void cycle(const Vec1 &rhs, Vec2 &&x) const {
178 cycle(levels.begin(), rhs, x);
179 }
180
181 template <class Vec1, class Vec2>
apply(const Vec1 & rhs,Vec2 && x) const182 void apply(const Vec1 &rhs, Vec2 &&x) const {
183 if (prm.pre_cycles) {
184 backend::clear(x);
185 for(unsigned i = 0; i < prm.pre_cycles; ++i)
186 cycle(levels.begin(), rhs, x);
187 } else {
188 backend::copy(rhs, x);
189 }
190 }
191
192 /// Returns the system matrix from the finest level.
system_matrix_ptr() const193 std::shared_ptr<matrix> system_matrix_ptr() const {
194 return A;
195 }
196
system_matrix() const197 const matrix& system_matrix() const {
198 return *system_matrix_ptr();
199 }
200 private:
201 struct level {
202 ptrdiff_t nrows, nnz;
203 int active_procs;
204
205 std::shared_ptr<matrix> A, P, R;
206 std::shared_ptr<vector> f, u, t;
207 std::shared_ptr<Relaxation> relax;
208 std::shared_ptr<DirectSolver> solve;
209
levelamgcl::mpi::amg::level210 level() {}
211
levelamgcl::mpi::amg::level212 level(
213 std::shared_ptr<matrix> a,
214 params &prm,
215 const backend_params &bprm,
216 bool direct = false
217 )
218 : nrows(a->glob_rows()), nnz(a->glob_nonzeros()),
219 f(Backend::create_vector(a->loc_rows(), bprm)),
220 u(Backend::create_vector(a->loc_rows(), bprm))
221 {
222 int active = (a->loc_rows() > 0);
223 active_procs = a->comm().reduce(MPI_SUM, active);
224
225 sort_rows(*a);
226
227 if (direct) {
228 AMGCL_TIC("direct solver");
229 solve = std::make_shared<DirectSolver>(a->comm(), *a, prm.direct);
230 AMGCL_TOC("direct solver");
231 } else {
232 A = a;
233 t = Backend::create_vector(a->loc_rows(), bprm);
234
235 AMGCL_TIC("relaxation");
236 relax = std::make_shared<Relaxation>(*a, prm.relax, bprm);
237 AMGCL_TOC("relaxation");
238 }
239 }
240
step_downamgcl::mpi::amg::level241 std::shared_ptr<matrix> step_down(Coarsening &C, const Repartition &repart)
242 {
243 AMGCL_TIC("transfer operators");
244 std::tie(P, R) = C.transfer_operators(*A);
245
246 AMGCL_TIC("sort");
247 sort_rows(*P);
248 sort_rows(*R);
249 AMGCL_TOC("sort");
250
251 AMGCL_TOC("transfer operators");
252
253 if (P->glob_cols() == 0) {
254 // Zero-sized coarse level in amgcl (diagonal matrix?)
255 return std::shared_ptr<matrix>();
256 }
257
258 AMGCL_TIC("coarse operator");
259 auto Ac = C.coarse_operator(*A, *P, *R);
260 AMGCL_TOC("coarse operator");
261
262 if (repart.is_needed(*Ac)) {
263 AMGCL_TIC("partition");
264 auto I = repart(*Ac, block_size(C));
265 auto J = transpose(*I);
266
267 P = product(*P, *I);
268 R = product(*J, *R);
269 Ac = product(*J, *product(*Ac, *I));
270 AMGCL_TOC("partition");
271 }
272
273 return Ac;
274 }
275
move_to_backendamgcl::mpi::amg::level276 void move_to_backend(const backend_params &bprm) {
277 AMGCL_TIC("move to backend");
278 if (A) A->move_to_backend(bprm);
279 if (P) P->move_to_backend(bprm);
280 if (R) R->move_to_backend(bprm);
281 AMGCL_TOC("move to backend");
282 }
283
rowsamgcl::mpi::amg::level284 ptrdiff_t rows() const {
285 return nrows;
286 }
287
nonzerosamgcl::mpi::amg::level288 ptrdiff_t nonzeros() const {
289 return nnz;
290 }
291 };
292
293 typedef typename std::list<level>::const_iterator level_iterator;
294
295 std::shared_ptr<matrix> A;
296 Repartition repart;
297 std::list<level> levels;
298
init(std::shared_ptr<matrix> A,const backend_params & bprm)299 void init(std::shared_ptr<matrix> A, const backend_params &bprm)
300 {
301 A->comm().check(A->glob_rows() == A->glob_cols(), "Matrix should be square!");
302
303 this->A = A;
304 Coarsening C(prm.coarsening);
305 bool need_coarse = true;
306
307 while(A->glob_rows() > prm.coarse_enough) {
308 levels.push_back( level(A, prm, bprm) );
309
310 if (levels.size() >= prm.max_levels) {
311 levels.back().move_to_backend(bprm);
312 break;
313 }
314
315 A = levels.back().step_down(C, repart);
316 levels.back().move_to_backend(bprm);
317
318 if (!A) {
319 // Zero-sized coarse level. Probably the system matrix on
320 // this level is diagonal, should be easily solvable with a
321 // couple of smoother iterations.
322 need_coarse = false;
323 break;
324 }
325 }
326
327 if (!A || A->glob_rows() > prm.coarse_enough) {
328 // The coarse matrix is still too big to be solved directly.
329 need_coarse = false;
330 }
331
332 if (A && need_coarse) {
333 levels.push_back(level(A, prm, bprm, prm.direct_coarse));
334 levels.back().move_to_backend(bprm);
335 }
336
337 AMGCL_TIC("move to backend");
338 this->A->move_to_backend(bprm);
339 AMGCL_TOC("move to backend");
340 }
341
342 template <class Vec1, class Vec2>
cycle(level_iterator lvl,const Vec1 & rhs,Vec2 & x) const343 void cycle(level_iterator lvl, const Vec1 &rhs, Vec2 &x) const {
344 level_iterator nxt = lvl, end = levels.end();
345 ++nxt;
346
347 if (nxt == end) {
348 if (lvl->solve) {
349 AMGCL_TIC("direct solver");
350 (*lvl->solve)(rhs, x);
351 AMGCL_TOC("direct solver");
352 } else {
353 AMGCL_TIC("relax");
354 for (size_t i = 0; i < prm.npre; ++i) lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t);
355 for (size_t i = 0; i < prm.npost; ++i) lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t);
356 AMGCL_TOC("relax");
357 }
358 } else {
359 for (size_t j = 0; j < prm.ncycle; ++j) {
360 AMGCL_TIC("relax");
361 for(size_t i = 0; i < prm.npre; ++i)
362 lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t);
363 AMGCL_TOC("relax");
364
365 backend::residual(rhs, *lvl->A, x, *lvl->t);
366
367 backend::spmv(math::identity<scalar_type>(), *lvl->R, *lvl->t, math::zero<scalar_type>(), *nxt->f);
368
369 backend::clear(*nxt->u);
370 cycle(nxt, *nxt->f, *nxt->u);
371
372 backend::spmv(math::identity<scalar_type>(), *lvl->P, *nxt->u, math::identity<scalar_type>(), x);
373
374 AMGCL_TIC("relax");
375 for(size_t i = 0; i < prm.npost; ++i)
376 lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t);
377 AMGCL_TOC("relax");
378 }
379 }
380 }
381
382 template <class B, class C, class R, class D, class I>
383 friend std::ostream& operator<<(std::ostream &os, const amg<B, C, R, D, I> &a);
384 };
385
386 template <class B, class C, class R, class D, class I>
operator <<(std::ostream & os,const amg<B,C,R,D,I> & a)387 std::ostream& operator<<(std::ostream &os, const amg<B, C, R, D, I> &a)
388 {
389 typedef typename amg<B, C, R, D, I>::level level;
390 ios_saver ss(os);
391
392 size_t sum_dof = 0;
393 size_t sum_nnz = 0;
394
395 for(const level &lvl : a.levels) {
396 sum_dof += lvl.rows();
397 sum_nnz += lvl.nonzeros();
398 }
399
400 os << "Number of levels: " << a.levels.size()
401 << "\nOperator complexity: " << std::fixed << std::setprecision(2)
402 << 1.0 * sum_nnz / a.levels.front().nonzeros()
403 << "\nGrid complexity: " << std::fixed << std::setprecision(2)
404 << 1.0 * sum_dof / a.levels.front().rows()
405 << "\n\nlevel unknowns nonzeros\n"
406 << "---------------------------------\n";
407
408 size_t depth = 0;
409 for(const level &lvl : a.levels) {
410 os << std::setw(5) << depth++
411 << std::setw(13) << lvl.rows()
412 << std::setw(15) << lvl.nonzeros() << " ("
413 << std::setw(5) << std::fixed << std::setprecision(2)
414 << 100.0 * lvl.nonzeros() / sum_nnz
415 << "%) [" << lvl.active_procs << "]" << std::endl;
416 }
417
418 return os;
419 }
420
421 } // namespace mpi
422 } // namespace amgcl
423
424 #endif
425