1 #include <iostream>
2 #include <iomanip>
3 #include <fstream>
4 #include <vector>
5 #include <numeric>
6 #include <cmath>
7 #include <stdexcept>
8 #ifdef _OPENMP
9 #include <omp.h>
10 #endif
11
12 #include "domain_partition.hpp"
13
14 #include "mba.hpp"
15
16 #include <boost/scope_exit.hpp>
17 #include <memory>
18 #include <boost/program_options.hpp>
19 #include <boost/property_tree/ptree.hpp>
20 #include <boost/property_tree/json_parser.hpp>
21
22 #include <boost/multi_array.hpp>
23
24 #if defined(SOLVER_BACKEND_VEXCL)
25 # include <amgcl/backend/vexcl.hpp>
26 typedef amgcl::backend::vexcl<double> Backend;
27 #elif defined(SOLVER_BACKEND_CUDA)
28 # include <amgcl/backend/cuda.hpp>
29 # include <amgcl/relaxation/cusparse_ilu0.hpp>
30 typedef amgcl::backend::cuda<double> Backend;
31 #else
32 # ifndef SOLVER_BACKEND_BUILTIN
33 # define SOLVER_BACKEND_BUILTIN
34 # endif
35 # include <amgcl/backend/builtin.hpp>
36 typedef amgcl::backend::builtin<double> Backend;
37 #endif
38
39 #include <amgcl/make_solver.hpp>
40 #include <amgcl/amg.hpp>
41 #include <amgcl/coarsening/runtime.hpp>
42 #include <amgcl/relaxation/runtime.hpp>
43 #include <amgcl/preconditioner/runtime.hpp>
44 #include <amgcl/mpi/direct_solver/runtime.hpp>
45 #include <amgcl/mpi/solver/runtime.hpp>
46 #include <amgcl/mpi/subdomain_deflation.hpp>
47 #include <amgcl/adapter/crs_tuple.hpp>
48 #include <amgcl/adapter/zero_copy.hpp>
49 #include <amgcl/profiler.hpp>
50
51 namespace amgcl {
52 profiler<> prof;
53 }
54
55 struct partitioned_deflation {
56 unsigned nparts;
57 std::vector<unsigned> domain;
58
partitioned_deflationpartitioned_deflation59 partitioned_deflation(
60 boost::array<ptrdiff_t, 2> LO,
61 boost::array<ptrdiff_t, 2> HI,
62 unsigned nparts
63 ) : nparts(nparts)
64 {
65 domain_partition<2> part(LO, HI, nparts);
66
67 ptrdiff_t nx = HI[0] - LO[0] + 1;
68 ptrdiff_t ny = HI[1] - LO[1] + 1;
69
70 domain.resize(nx * ny);
71 for(unsigned p = 0; p < nparts; ++p) {
72 boost::array<ptrdiff_t, 2> lo = part.domain(p).min_corner();
73 boost::array<ptrdiff_t, 2> hi = part.domain(p).max_corner();
74
75 for(int j = lo[1]; j <= hi[1]; ++j) {
76 for(int i = lo[0]; i <= hi[0]; ++i) {
77 domain[(j - LO[1]) * nx + (i - LO[0])] = p;
78 }
79 }
80 }
81 }
82
dimpartitioned_deflation83 size_t dim() const { return nparts; }
84
operator ()partitioned_deflation85 double operator()(ptrdiff_t i, unsigned j) const {
86 return domain[i] == j;
87 }
88 };
89
90 struct linear_deflation {
91 std::vector<double> x;
92 std::vector<double> y;
93
linear_deflationlinear_deflation94 linear_deflation(
95 ptrdiff_t chunk,
96 boost::array<ptrdiff_t, 2> lo,
97 boost::array<ptrdiff_t, 2> hi
98 )
99 {
100 double hx = 1.0 / (hi[0] - lo[0]);
101 double hy = 1.0 / (hi[1] - lo[1]);
102
103 ptrdiff_t nx = hi[0] - lo[0] + 1;
104 ptrdiff_t ny = hi[1] - lo[1] + 1;
105
106 x.reserve(chunk);
107 y.reserve(chunk);
108
109 for (ptrdiff_t j = 0; j < ny; ++j) {
110 for (ptrdiff_t i = 0; i < nx; ++i) {
111 x.push_back(i * hx - 0.5);
112 y.push_back(j * hy - 0.5);
113 }
114 }
115 }
116
dimlinear_deflation117 size_t dim() const { return 3; }
118
operator ()linear_deflation119 double operator()(ptrdiff_t i, unsigned j) const {
120 switch(j) {
121 default:
122 case 0:
123 return 1;
124 case 1:
125 return x[i];
126 case 2:
127 return y[i];
128 }
129 }
130 };
131
132 struct bilinear_deflation {
133 size_t nv, chunk;
134 std::vector<double> v;
135
bilinear_deflationbilinear_deflation136 bilinear_deflation(
137 ptrdiff_t n,
138 ptrdiff_t chunk,
139 boost::array<ptrdiff_t, 2> lo,
140 boost::array<ptrdiff_t, 2> hi
141 ) : nv(0), chunk(chunk)
142 {
143 // See which neighbors we have.
144 int neib[2][2] = {
145 {lo[0] > 0 || lo[1] > 0, hi[0] + 1 < n || lo[1] > 0 },
146 {lo[0] > 0 || hi[1] + 1 < n, hi[0] + 1 < n || hi[1] + 1 < n}
147 };
148
149 for(int j = 0; j < 2; ++j)
150 for(int i = 0; i < 2; ++i)
151 if (neib[j][i]) ++nv;
152
153 if (nv == 0) {
154 // Single MPI process?
155 nv = 1;
156 v.resize(chunk, 1);
157 return;
158 }
159
160 v.resize(chunk * nv, 0);
161
162 double *dv = v.data();
163
164 ptrdiff_t nx = hi[0] - lo[0] + 1;
165 ptrdiff_t ny = hi[1] - lo[1] + 1;
166
167 double hx = 1.0 / (nx - 1);
168 double hy = 1.0 / (ny - 1);
169
170 for(int j = 0; j < 2; ++j) {
171 for(int i = 0; i < 2; ++i) {
172 if (!neib[j][i]) continue;
173
174 boost::multi_array_ref<double, 2> V(dv, boost::extents[ny][nx]);
175
176 for(ptrdiff_t jj = 0; jj < ny; ++jj) {
177 double y = jj * hy;
178 double b = std::abs((1 - j) - y);
179 for(ptrdiff_t ii = 0; ii < nx; ++ii) {
180 double x = ii * hx;
181
182 double a = std::abs((1 - i) - x);
183 V[jj][ii] = a * b;
184 }
185 }
186
187 dv += chunk;
188 }
189 }
190 }
191
dimbilinear_deflation192 size_t dim() const { return nv; }
193
operator ()bilinear_deflation194 double operator()(ptrdiff_t i, unsigned j) const {
195 return v[j * chunk + i];
196 }
197 };
198
199 #ifndef SOLVER_BACKEND_CUDA
200 struct mba_deflation {
201 size_t chunk, nv;
202 std::vector<double> v;
203
mba_deflationmba_deflation204 mba_deflation(
205 ptrdiff_t n,
206 ptrdiff_t chunk,
207 boost::array<ptrdiff_t,2> lo,
208 boost::array<ptrdiff_t,2> hi
209 ) : chunk(chunk), nv(1)
210 {
211 // See which neighbors we have.
212 int neib[2][2] = {
213 {lo[0] > 0 || lo[1] > 0, hi[0] + 1 < n || lo[1] > 0 },
214 {lo[0] > 0 || hi[1] + 1 < n, hi[0] + 1 < n || hi[1] + 1 < n}
215 };
216
217 for(int j = 0; j < 2; ++j)
218 for(int i = 0; i < 2; ++i)
219 if (neib[j][i]) ++nv;
220
221 v.resize(chunk * nv, 0);
222
223 double *dv = v.data();
224 std::fill(dv, dv + chunk, 1.0);
225 dv += chunk;
226
227 ptrdiff_t nx = hi[0] - lo[0] + 1;
228 ptrdiff_t ny = hi[1] - lo[1] + 1;
229
230 double hx = 1.0 / (nx - 1);
231 double hy = 1.0 / (ny - 1);
232
233 std::array<double, 2> cmin = {-0.01, -0.01};
234 std::array<double, 2> cmax = { 1.01, 1.01};
235 std::array<size_t, 2> grid = {3, 3};
236
237 std::array< std::array<double, 2>, 4 > coo;
238 std::array< double, 4 > val;
239
240 for(int j = 0, idx = 0; j < 2; ++j) {
241 for(int i = 0; i < 2; ++i, ++idx) {
242 coo[idx][0] = i;
243 coo[idx][1] = j;
244 }
245 }
246
247 for(int j = 0, idx = 0; j < 2; ++j) {
248 for(int i = 0; i < 2; ++i, ++idx) {
249 if (!neib[j][i]) continue;
250
251 std::fill(val.begin(), val.end(), 0.0);
252 val[idx] = 1.0;
253
254 mba::MBA<2> interp(cmin, cmax, grid, coo, val, 8, 1e-8, 0.5, zero);
255
256 boost::multi_array_ref<double, 2> V(dv, boost::extents[ny][nx]);
257
258 for(int jj = 0; jj < ny; ++jj)
259 for(int ii = 0; ii < nx; ++ii) {
260 std::array<double, 2> p = {ii * hx, jj * hy};
261 V[jj][ii] = interp(p);
262 }
263
264 dv += chunk;
265 }
266 }
267 }
268
dimmba_deflation269 size_t dim() const { return nv; }
270
operator ()mba_deflation271 double operator()(ptrdiff_t i, unsigned j) const {
272 return v[j * chunk + i];
273 }
274
zeromba_deflation275 static double zero(const std::array<double, 2>&) {
276 return 0;
277 }
278 };
279 #endif
280
281 struct harmonic_deflation {
282 size_t nv, chunk;
283 std::vector<double> v;
284
harmonic_deflationharmonic_deflation285 harmonic_deflation(
286 ptrdiff_t n,
287 ptrdiff_t chunk,
288 boost::array<ptrdiff_t, 2> lo,
289 boost::array<ptrdiff_t, 2> hi
290 ) : nv(0), chunk(chunk)
291 {
292 // See which neighbors we have.
293 int neib[2][2] = {
294 {lo[0] > 0 || lo[1] > 0, hi[0] + 1 < n || lo[1] > 0 },
295 {lo[0] > 0 || hi[1] + 1 < n, hi[0] + 1 < n || hi[1] + 1 < n}
296 };
297
298 for(int j = 0; j < 2; ++j)
299 for(int i = 0; i < 2; ++i)
300 if (neib[j][i]) ++nv;
301
302 if (nv == 0) {
303 // Single MPI process?
304 nv = 1;
305 v.resize(chunk, 1);
306 return;
307 }
308
309 v.resize(chunk * nv, 0);
310 double *dv = v.data();
311
312
313 ptrdiff_t nx = hi[0] - lo[0] + 1;
314 ptrdiff_t ny = hi[1] - lo[1] + 1;
315
316 std::vector<ptrdiff_t> ptr;
317 std::vector<ptrdiff_t> col;
318 std::vector<double> val;
319 std::vector<double> rhs(chunk, 0.0);
320
321 ptr.reserve(chunk + 1);
322 col.reserve(chunk * 5);
323 val.reserve(chunk * 5);
324
325 ptr.push_back(0);
326
327 for(int j = 0, k = 0; j < ny; ++j) {
328 for(int i = 0; i < nx; ++i, ++k) {
329 if (
330 (i == 0 && j == 0 ) ||
331 (i == 0 && j == ny-1) ||
332 (i == nx-1 && j == 0 ) ||
333 (i == nx-1 && j == ny-1)
334 )
335 {
336 col.push_back(k);
337 val.push_back(1);
338 } else {
339 col.push_back(k);
340 val.push_back(1.0);
341
342 if (j == 0) {
343 col.push_back(k + nx);
344 val.push_back(-0.5);
345 } else if (j == ny-1) {
346 col.push_back(k - nx);
347 val.push_back(-0.5);
348 } else {
349 col.push_back(k - nx);
350 val.push_back(-0.25);
351
352 col.push_back(k + nx);
353 val.push_back(-0.25);
354 }
355
356 if (i == 0) {
357 col.push_back(k + 1);
358 val.push_back(-0.5);
359 } else if (i == nx-1) {
360 col.push_back(k - 1);
361 val.push_back(-0.5);
362 } else {
363 col.push_back(k - 1);
364 val.push_back(-0.25);
365
366 col.push_back(k + 1);
367 val.push_back(-0.25);
368 }
369 }
370
371 ptr.push_back(col.size());
372 }
373 }
374
375 amgcl::make_solver<
376 amgcl::amg<
377 amgcl::backend::builtin<double>,
378 amgcl::coarsening::smoothed_aggregation,
379 amgcl::relaxation::gauss_seidel
380 >,
381 amgcl::solver::gmres<
382 amgcl::backend::builtin<double>
383 >
384 > solve( amgcl::adapter::zero_copy(chunk, ptr.data(), col.data(), val.data()) );
385
386 for(int j = 0; j < 2; ++j) {
387 for(int i = 0; i < 2; ++i) {
388 if (!neib[j][i]) continue;
389
390 ptrdiff_t idx = i * (nx - 1) + j * (ny - 1) * nx;
391 rhs[idx] = 1.0;
392
393 boost::iterator_range<double*> x(dv, dv + chunk);
394 solve(rhs, x);
395
396 rhs[idx] = 0.0;
397
398 dv += chunk;
399 }
400 }
401 }
402
dimharmonic_deflation403 size_t dim() const { return nv; }
404
operator ()harmonic_deflation405 double operator()(ptrdiff_t i, unsigned j) const {
406 return v[j * chunk + i];
407 }
408 };
409
410 struct renumbering {
411 const domain_partition<2> ∂
412 const std::vector<ptrdiff_t> &dom;
413
renumberingrenumbering414 renumbering(
415 const domain_partition<2> &p,
416 const std::vector<ptrdiff_t> &d
417 ) : part(p), dom(d)
418 {}
419
operator ()renumbering420 ptrdiff_t operator()(ptrdiff_t i, ptrdiff_t j) const {
421 boost::array<ptrdiff_t, 2> p = {{i, j}};
422 std::pair<int,ptrdiff_t> v = part.index(p);
423 return dom[v.first] + v.second;
424 }
425 };
426
main(int argc,char * argv[])427 int main(int argc, char *argv[]) {
428 int provided;
429 MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
430 BOOST_SCOPE_EXIT(void) {
431 MPI_Finalize();
432 } BOOST_SCOPE_EXIT_END
433
434 amgcl::mpi::communicator world(MPI_COMM_WORLD);
435
436 if (world.rank == 0)
437 std::cout << "World size: " << world.size << std::endl;
438
439 // Read configuration from command line
440 ptrdiff_t n = 1024;
441 std::string deflation_type = "bilinear";
442
443 amgcl::runtime::coarsening::type coarsening = amgcl::runtime::coarsening::smoothed_aggregation;
444 amgcl::runtime::relaxation::type relaxation = amgcl::runtime::relaxation::spai0;
445 amgcl::runtime::solver::type iterative_solver = amgcl::runtime::solver::bicgstabl;
446 amgcl::runtime::mpi::direct::type direct_solver = amgcl::runtime::mpi::direct::skyline_lu;
447
448 bool just_relax = false;
449 bool symm_dirichlet = true;
450 std::string problem = "laplace2d";
451 std::string parameter_file;
452 std::string out_file;
453
454 namespace po = boost::program_options;
455 po::options_description desc("Options");
456
457 desc.add_options()
458 ("help,h", "show help")
459 (
460 "problem",
461 po::value<std::string>(&problem)->default_value(problem),
462 "laplace2d, recirc2d"
463 )
464 (
465 "symbc",
466 po::value<bool>(&symm_dirichlet)->default_value(symm_dirichlet),
467 "Use symmetric Dirichlet conditions in laplace2d"
468 )
469 (
470 "size,n",
471 po::value<ptrdiff_t>(&n)->default_value(n),
472 "domain size"
473 )
474 (
475 "coarsening,c",
476 po::value<amgcl::runtime::coarsening::type>(&coarsening)->default_value(coarsening),
477 "ruge_stuben, aggregation, smoothed_aggregation, smoothed_aggr_emin"
478 )
479 (
480 "relaxation,r",
481 po::value<amgcl::runtime::relaxation::type>(&relaxation)->default_value(relaxation),
482 "gauss_seidel, ilu0, iluk, ilut, damped_jacobi, spai0, spai1, chebyshev"
483 )
484 (
485 "iter_solver,i",
486 po::value<amgcl::runtime::solver::type>(&iterative_solver)->default_value(iterative_solver),
487 "cg, bicgstab, bicgstabl, gmres"
488 )
489 (
490 "dir_solver,d",
491 po::value<amgcl::runtime::mpi::direct::type>(&direct_solver)->default_value(direct_solver),
492 "skyline_lu"
493 #ifdef AMGCL_HAVE_PASTIX
494 ", pastix"
495 #endif
496 )
497 (
498 "deflation,v",
499 po::value<std::string>(&deflation_type)->default_value(deflation_type),
500 "constant, partitioned, linear, bilinear, mba, harmonic"
501 )
502 (
503 "subparts",
504 po::value<int>()->default_value(16),
505 "number of partitions for partitioned deflation"
506 )
507 (
508 "params,P",
509 po::value<std::string>(¶meter_file),
510 "parameter file in json format"
511 )
512 (
513 "prm,p",
514 po::value< std::vector<std::string> >()->multitoken(),
515 "Parameters specified as name=value pairs. "
516 "May be provided multiple times. Examples:\n"
517 " -p solver.tol=1e-3\n"
518 " -p precond.coarse_enough=300"
519 )
520 (
521 "just-relax,0",
522 po::bool_switch(&just_relax),
523 "Do not create AMG hierarchy, use relaxation as preconditioner"
524 )
525 (
526 "out,o",
527 po::value<std::string>(&out_file),
528 "out file"
529 )
530 ;
531
532 po::variables_map vm;
533 po::store(po::parse_command_line(argc, argv, desc), vm);
534 po::notify(vm);
535
536 if (vm.count("help")) {
537 std::cout << desc << std::endl;
538 return 0;
539 }
540
541 boost::property_tree::ptree prm;
542 if (vm.count("params")) read_json(parameter_file, prm);
543
544 if (vm.count("prm")) {
545 for(const std::string &v : vm["prm"].as< std::vector<std::string> >()) {
546 amgcl::put(prm, v);
547 }
548 }
549
550 prm.put("isolver.type", iterative_solver);
551 prm.put("dsolver.type", direct_solver);
552
553 const ptrdiff_t n2 = n * n;
554 const double hinv = (n - 1);
555 const double h2i = (n - 1) * (n - 1);
556 const double h = 1 / hinv;
557
558
559 boost::array<ptrdiff_t, 2> lo = { {0, 0} };
560 boost::array<ptrdiff_t, 2> hi = { {n - 1, n - 1} };
561
562 using amgcl::prof;
563
564 prof.tic("partition");
565 domain_partition<2> part(lo, hi, world.size);
566 ptrdiff_t chunk = part.size( world.rank );
567
568 std::vector<ptrdiff_t> domain(world.size + 1);
569 MPI_Allgather(
570 &chunk, 1, amgcl::mpi::datatype<ptrdiff_t>(),
571 &domain[1], 1, amgcl::mpi::datatype<ptrdiff_t>(), world);
572 std::partial_sum(domain.begin(), domain.end(), domain.begin());
573
574 lo = part.domain(world.rank).min_corner();
575 hi = part.domain(world.rank).max_corner();
576 prof.toc("partition");
577
578 renumbering renum(part, domain);
579
580 prof.tic("deflation");
581 std::function<double(ptrdiff_t,unsigned)> dv;
582 unsigned ndv = 1;
583
584 if (deflation_type == "constant") {
585 dv = amgcl::mpi::constant_deflation(1);
586 } else if (deflation_type == "partitioned") {
587 ndv = vm["subparts"].as<int>();
588 dv = partitioned_deflation(lo, hi, ndv);
589 } else if (deflation_type == "linear") {
590 ndv = 3;
591 dv = linear_deflation(chunk, lo, hi);
592 } else if (deflation_type == "bilinear") {
593 bilinear_deflation bld(n, chunk, lo, hi);
594 ndv = bld.dim();
595 dv = bld;
596 #ifndef SOLVER_BACKEND_CUDA
597 } else if (deflation_type == "mba") {
598 mba_deflation mba(n, chunk, lo, hi);
599 ndv = mba.dim();
600 dv = mba;
601 #endif
602 } else if (deflation_type == "harmonic") {
603 harmonic_deflation hd(n, chunk, lo, hi);
604 ndv = hd.dim();
605 dv = hd;
606 } else {
607 throw std::runtime_error("Unsupported deflation type");
608 }
609
610 prm.put("num_def_vec", ndv);
611 prm.put("def_vec", &dv);
612 prof.toc("deflation");
613
614 prof.tic("assemble");
615 std::vector<ptrdiff_t> ptr;
616 std::vector<ptrdiff_t> col;
617 std::vector<double> val;
618 std::vector<double> rhs;
619
620 ptr.reserve(chunk + 1);
621 col.reserve(chunk * 5);
622 val.reserve(chunk * 5);
623 rhs.reserve(chunk);
624
625 ptr.push_back(0);
626
627 if (problem == "recirc2d") {
628 const double eps = 1e-5;
629
630 for(ptrdiff_t j = lo[1]; j <= hi[1]; ++j) {
631 double y = h * j;
632 for(ptrdiff_t i = lo[0]; i <= hi[0]; ++i) {
633 double x = h * i;
634
635 if (i == 0 || j == 0 || i + 1 == n || j + 1 == n) {
636 col.push_back(renum(i,j));
637 val.push_back(1);
638 rhs.push_back(
639 sin(M_PI * x) + sin(M_PI * y) +
640 sin(13 * M_PI * x) + sin(13 * M_PI * y)
641 );
642 } else {
643 double a = -sin(M_PI * x) * cos(M_PI * y) * hinv;
644 double b = sin(M_PI * y) * cos(M_PI * x) * hinv;
645
646 if (j > 0) {
647 col.push_back(renum(i,j-1));
648 val.push_back(-eps * h2i - std::max(b, 0.0));
649 }
650
651 if (i > 0) {
652 col.push_back(renum(i-1,j));
653 val.push_back(-eps * h2i - std::max(a, 0.0));
654 }
655
656 col.push_back(renum(i,j));
657 val.push_back(4 * eps * h2i + fabs(a) + fabs(b));
658
659 if (i + 1 < n) {
660 col.push_back(renum(i+1,j));
661 val.push_back(-eps * h2i + std::min(a, 0.0));
662 }
663
664 if (j + 1 < n) {
665 col.push_back(renum(i,j+1));
666 val.push_back(-eps * h2i + std::min(b, 0.0));
667 }
668
669 rhs.push_back(1.0);
670 }
671 ptr.push_back( col.size() );
672 }
673 }
674 } else {
675 for(ptrdiff_t j = lo[1]; j <= hi[1]; ++j) {
676 for(ptrdiff_t i = lo[0]; i <= hi[0]; ++i) {
677 if (!symm_dirichlet && (i == 0 || j == 0 || i + 1 == n || j + 1 == n)) {
678 col.push_back(renum(i,j));
679 val.push_back(1);
680 rhs.push_back(0);
681 } else {
682 if (j > 0) {
683 col.push_back(renum(i,j-1));
684 val.push_back(-h2i);
685 }
686
687 if (i > 0) {
688 col.push_back(renum(i-1,j));
689 val.push_back(-h2i);
690 }
691
692 col.push_back(renum(i,j));
693 val.push_back(4 * h2i);
694
695 if (i + 1 < n) {
696 col.push_back(renum(i+1,j));
697 val.push_back(-h2i);
698 }
699
700 if (j + 1 < n) {
701 col.push_back(renum(i,j+1));
702 val.push_back(-h2i);
703 }
704
705 rhs.push_back(1);
706 }
707 ptr.push_back( col.size() );
708 }
709 }
710 }
711 prof.toc("assemble");
712
713 Backend::params bprm;
714
715 #if defined(SOLVER_BACKEND_VEXCL)
716 vex::Context ctx(vex::Filter::Env);
717 std::cout << ctx << std::endl;
718 bprm.q = ctx;
719 #elif defined(SOLVER_BACKEND_CUDA)
720 cusparseCreate(&bprm.cusparse_handle);
721 #endif
722
723 auto f = Backend::copy_vector(rhs, bprm);
724 auto x = Backend::create_vector(chunk, bprm);
725
726 amgcl::backend::clear(*x);
727
728 size_t iters;
729 double resid, tm_setup, tm_solve;
730
731 if (just_relax) {
732 prm.put("local.class", "relaxation");
733 prm.put("local.type", relaxation);
734 } else {
735 prm.put("local.coarsening.type", coarsening);
736 prm.put("local.relax.type", relaxation);
737 }
738
739 prof.tic("setup");
740 typedef
741 amgcl::mpi::subdomain_deflation<
742 amgcl::runtime::preconditioner< Backend >,
743 amgcl::runtime::mpi::solver::wrapper< Backend >,
744 amgcl::runtime::mpi::direct::solver<double>
745 > SDD;
746
747 SDD solve(world, std::tie(chunk, ptr, col, val), prm, bprm);
748 tm_setup = prof.toc("setup");
749
750 prof.tic("solve");
751 std::tie(iters, resid) = solve(*f, *x);
752 tm_solve = prof.toc("solve");
753
754 if (world.rank == 0) {
755 std::cout
756 << "Iterations: " << iters << std::endl
757 << "Error: " << resid << std::endl
758 << prof << std::endl;
759
760 #ifdef _OPENMP
761 int nt = omp_get_max_threads();
762 #else
763 int nt = 1;
764 #endif
765 std::ostringstream log_name;
766 log_name << "log_" << n2 << "_" << nt << "_" << world.size << ".txt";
767 std::ofstream log(log_name.str().c_str(), std::ios::app);
768 log << n2 << "\t" << nt << "\t" << world.size
769 << "\t" << tm_setup << "\t" << tm_solve
770 << "\t" << iters << "\t" << std::endl;
771 }
772
773
774 if (!out_file.empty()) {
775 std::vector<double> X(world.rank == 0 ? n2 : chunk);
776
777 #if defined(SOLVER_BACKEND_VEXCL)
778 vex::copy(x->begin(), x->end(), X.begin());
779 #elif defined(SOLVER_BACKEND_CUDA)
780 thrust::copy(x->begin(), x->end(), X.begin());
781 #else
782 std::copy(x->data(), x->data() + chunk, X.begin());
783 #endif
784
785 if (world.rank == 0) {
786 for(int i = 1; i < world.size; ++i)
787 MPI_Recv(&X[domain[i]], domain[i+1] - domain[i], MPI_DOUBLE, i, 42, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
788
789 std::ofstream f(out_file.c_str(), std::ios::binary);
790 int m = n;
791 f.write((char*)&m, sizeof(int));
792 for(int j = 0; j < n; ++j) {
793 for(int i = 0; i < n; ++i) {
794 double buf = X[renum(i,j)];
795 f.write((char*)&buf, sizeof(double));
796 }
797 }
798 } else {
799 MPI_Send(X.data(), chunk, MPI_DOUBLE, 0, 42, MPI_COMM_WORLD);
800 }
801 }
802 }
803