1 #include <iostream>
2 #include <string>
3 #include <random>
4 
5 #include <boost/program_options.hpp>
6 #include <boost/property_tree/ptree.hpp>
7 #include <boost/property_tree/json_parser.hpp>
8 #include <boost/range/iterator_range.hpp>
9 #include <boost/preprocessor/seq/for_each.hpp>
10 
11 
12 #if defined(SOLVER_BACKEND_VEXCL)
13 #  include <amgcl/value_type/static_matrix.hpp>
14 #  include <amgcl/adapter/block_matrix.hpp>
15 #  include <amgcl/backend/vexcl.hpp>
16 #  include <amgcl/backend/vexcl_static_matrix.hpp>
17    typedef amgcl::backend::vexcl<double> Backend;
18 #elif defined(SOLVER_BACKEND_VIENNACL)
19 #  include <amgcl/backend/viennacl.hpp>
20    typedef amgcl::backend::viennacl< viennacl::compressed_matrix<double> > Backend;
21 #elif defined(SOLVER_BACKEND_CUDA)
22 #  include <amgcl/backend/cuda.hpp>
23 #  include <amgcl/relaxation/cusparse_ilu0.hpp>
24    typedef amgcl::backend::cuda<double> Backend;
25 #elif defined(SOLVER_BACKEND_EIGEN)
26 #  include <amgcl/backend/eigen.hpp>
27    typedef amgcl::backend::eigen<double> Backend;
28 #elif defined(SOLVER_BACKEND_BLAZE)
29 #  include <amgcl/backend/blaze.hpp>
30    typedef amgcl::backend::blaze<double> Backend;
31 #else
32 #  ifndef SOLVER_BACKEND_BUILTIN
33 #    define SOLVER_BACKEND_BUILTIN
34 #  endif
35 #  include <amgcl/backend/builtin.hpp>
36 #  include <amgcl/value_type/static_matrix.hpp>
37 #  include <amgcl/adapter/block_matrix.hpp>
38    typedef amgcl::backend::builtin<double> Backend;
39 #endif
40 
41 #include <amgcl/relaxation/runtime.hpp>
42 #include <amgcl/coarsening/runtime.hpp>
43 #include <amgcl/coarsening/rigid_body_modes.hpp>
44 #include <amgcl/solver/runtime.hpp>
45 #include <amgcl/preconditioner/runtime.hpp>
46 #include <amgcl/make_solver.hpp>
47 #include <amgcl/amg.hpp>
48 #include <amgcl/adapter/crs_tuple.hpp>
49 #include <amgcl/adapter/reorder.hpp>
50 #include <amgcl/io/mm.hpp>
51 #include <amgcl/io/binary.hpp>
52 
53 #include <amgcl/profiler.hpp>
54 
55 #include "sample_problem.hpp"
56 
57 #ifndef AMGCL_BLOCK_SIZES
58 #  define AMGCL_BLOCK_SIZES (3)(4)
59 #endif
60 
61 namespace amgcl { profiler<> prof; }
62 using amgcl::prof;
63 using amgcl::precondition;
64 
65 #ifdef SOLVER_BACKEND_BUILTIN
66 //---------------------------------------------------------------------------
67 template <int B>
block_solve(const boost::property_tree::ptree & prm,size_t rows,std::vector<ptrdiff_t> const & ptr,std::vector<ptrdiff_t> const & col,std::vector<double> const & val,std::vector<double> const & rhs,std::vector<double> & x,bool reorder)68 std::tuple<size_t, double> block_solve(
69         const boost::property_tree::ptree &prm,
70         size_t rows,
71         std::vector<ptrdiff_t> const &ptr,
72         std::vector<ptrdiff_t> const &col,
73         std::vector<double>    const &val,
74         std::vector<double>    const &rhs,
75         std::vector<double>          &x,
76         bool reorder
77         )
78 {
79     typedef amgcl::static_matrix<double, B, B> value_type;
80     typedef amgcl::static_matrix<double, B, 1> rhs_type;
81     typedef amgcl::backend::builtin<value_type> BBackend;
82 
83     typedef amgcl::make_solver<
84         amgcl::runtime::preconditioner<BBackend>,
85         amgcl::runtime::solver::wrapper<BBackend>
86         > Solver;
87 
88     auto As = std::tie(rows, ptr, col, val);
89     auto Ab = amgcl::adapter::block_matrix<value_type>(As);
90 
91     std::tuple<size_t, double> info;
92 
93     if (reorder) {
94         prof.tic("reorder");
95         amgcl::adapter::reorder<> perm(Ab);
96         prof.toc("reorder");
97 
98         prof.tic("setup");
99         Solver solve(perm(Ab), prm);
100         prof.toc("setup");
101 
102         std::cout << solve << std::endl;
103 
104         rhs_type const * fptr = reinterpret_cast<rhs_type const *>(&rhs[0]);
105         rhs_type       * xptr = reinterpret_cast<rhs_type       *>(&x[0]);
106 
107         amgcl::backend::numa_vector<rhs_type> F(perm(amgcl::make_iterator_range(fptr, fptr + rows/B)));
108         amgcl::backend::numa_vector<rhs_type> X(perm(amgcl::make_iterator_range(xptr, xptr + rows/B)));
109 
110         prof.tic("solve");
111         info = solve(F, X);
112         prof.toc("solve");
113 
114         perm.inverse(X, xptr);
115     } else {
116         prof.tic("setup");
117         Solver solve(Ab, prm);
118         prof.toc("setup");
119 
120         std::cout << solve << std::endl;
121 
122         rhs_type const * fptr = reinterpret_cast<rhs_type const *>(&rhs[0]);
123         rhs_type       * xptr = reinterpret_cast<rhs_type       *>(&x[0]);
124 
125         amgcl::backend::numa_vector<rhs_type> F(fptr, fptr + rows/B);
126         amgcl::backend::numa_vector<rhs_type> X(xptr, xptr + rows/B);
127 
128         prof.tic("solve");
129         info = solve(F, X);
130         prof.toc("solve");
131 
132         std::copy(X.data(), X.data() + X.size(), xptr);
133     }
134 
135     return info;
136 }
137 #endif
138 
139 #ifdef SOLVER_BACKEND_VEXCL
140 //---------------------------------------------------------------------------
141 template <int B>
block_solve(const boost::property_tree::ptree & prm,size_t rows,std::vector<ptrdiff_t> const & ptr,std::vector<ptrdiff_t> const & col,std::vector<double> const & val,std::vector<double> const & rhs,std::vector<double> & x,bool reorder)142 std::tuple<size_t, double> block_solve(
143         const boost::property_tree::ptree &prm,
144         size_t rows,
145         std::vector<ptrdiff_t> const &ptr,
146         std::vector<ptrdiff_t> const &col,
147         std::vector<double>    const &val,
148         std::vector<double>    const &rhs,
149         std::vector<double>          &x,
150         bool reorder
151         )
152 {
153     typedef amgcl::static_matrix<double, B, B> value_type;
154     typedef amgcl::static_matrix<double, B, 1> rhs_type;
155     typedef amgcl::backend::vexcl<value_type> BBackend;
156 
157     typedef amgcl::make_solver<
158         amgcl::runtime::preconditioner<BBackend>,
159         amgcl::runtime::solver::wrapper<BBackend>
160         > Solver;
161 
162     typename BBackend::params bprm;
163 
164     vex::Context ctx(vex::Filter::Env);
165     std::cout << ctx << std::endl;
166     bprm.q = ctx;
167     bprm.fast_matrix_setup = prm.get("fast", true);
168 
169     vex::scoped_program_header header(ctx,
170             amgcl::backend::vexcl_static_matrix_declaration<double,B>());
171 
172     auto As = std::tie(rows, ptr, col, val);
173     auto Ab = amgcl::adapter::block_matrix<value_type>(As);
174 
175     std::tuple<size_t, double> info;
176 
177     if (reorder) {
178         prof.tic("reorder");
179         amgcl::adapter::reorder<> perm(Ab);
180         prof.toc("reorder");
181 
182         prof.tic("setup");
183         Solver solve(perm(Ab), prm, bprm);
184         prof.toc("setup");
185 
186         std::cout << solve << std::endl;
187 
188         rhs_type const * fptr = reinterpret_cast<rhs_type const *>(&rhs[0]);
189         rhs_type       * xptr = reinterpret_cast<rhs_type       *>(&x[0]);
190 
191         std::vector<rhs_type> tmp(rows / B);
192 
193         perm.forward(amgcl::make_iterator_range(fptr, fptr + rows/B), tmp);
194         vex::vector<rhs_type> f_b(ctx, tmp);
195 
196         perm.forward(amgcl::make_iterator_range(xptr, xptr + rows/B), tmp);
197         vex::vector<rhs_type> x_b(ctx, tmp);
198 
199         prof.tic("solve");
200         info = solve(f_b, x_b);
201         prof.toc("solve");
202 
203         vex::copy(x_b, tmp);
204         perm.inverse(tmp, xptr);
205     } else {
206         prof.tic("setup");
207         Solver solve(Ab, prm, bprm);
208         prof.toc("setup");
209 
210         std::cout << solve << std::endl;
211 
212         rhs_type const * fptr = reinterpret_cast<rhs_type const *>(&rhs[0]);
213         rhs_type       * xptr = reinterpret_cast<rhs_type       *>(&x[0]);
214 
215         vex::vector<rhs_type> f_b(ctx, rows/B, fptr);
216         vex::vector<rhs_type> x_b(ctx, rows/B, xptr);
217 
218         prof.tic("solve");
219         info = solve(f_b, x_b);
220         prof.toc("solve");
221 
222         vex::copy(x_b.begin(), x_b.end(), xptr);
223     }
224 
225     return info;
226 }
227 #endif
228 
229 //---------------------------------------------------------------------------
scalar_solve(const boost::property_tree::ptree & prm,size_t rows,std::vector<ptrdiff_t> const & ptr,std::vector<ptrdiff_t> const & col,std::vector<double> const & val,std::vector<double> const & rhs,std::vector<double> & x,bool reorder)230 std::tuple<size_t, double> scalar_solve(
231         const boost::property_tree::ptree &prm,
232         size_t rows,
233         std::vector<ptrdiff_t> const &ptr,
234         std::vector<ptrdiff_t> const &col,
235         std::vector<double>    const &val,
236         std::vector<double>    const &rhs,
237         std::vector<double>          &x,
238         bool reorder
239         )
240 {
241     Backend::params bprm;
242 
243 #if defined(SOLVER_BACKEND_VEXCL)
244     vex::Context ctx(vex::Filter::Env);
245     std::cout << ctx << std::endl;
246     bprm.q = ctx;
247 #elif defined(SOLVER_BACKEND_VIENNACL)
248     std::cout
249         << viennacl::ocl::current_device().name()
250         << " (" << viennacl::ocl::current_device().vendor() << ")\n\n";
251 #elif defined(SOLVER_BACKEND_CUDA)
252     cusparseCreate(&bprm.cusparse_handle);
253     {
254         int dev;
255         cudaGetDevice(&dev);
256 
257         cudaDeviceProp prop;
258         cudaGetDeviceProperties(&prop, dev);
259         std::cout << prop.name << std::endl << std::endl;
260     }
261 #endif
262 
263     typedef amgcl::make_solver<
264         amgcl::runtime::preconditioner<Backend>,
265         amgcl::runtime::solver::wrapper<Backend>
266         > Solver;
267 
268     std::tuple<size_t, double> info;
269 
270     if (reorder) {
271         prof.tic("reorder");
272         amgcl::adapter::reorder<> perm(std::tie(rows, ptr, col, val));
273         prof.toc("reorder");
274 
275         prof.tic("setup");
276         Solver solve(perm(std::tie(rows, ptr, col, val)), prm, bprm);
277         prof.toc("setup");
278 
279         std::cout << solve << std::endl;
280 
281         std::vector<double> tmp(rows);
282 
283         perm.forward(rhs, tmp);
284         auto f_b = Backend::copy_vector(tmp, bprm);
285 
286         perm.forward(x, tmp);
287         auto x_b = Backend::copy_vector(tmp, bprm);
288 
289         prof.tic("solve");
290         info = solve(*f_b, *x_b);
291         prof.toc("solve");
292 
293 #if defined(SOLVER_BACKEND_VEXCL)
294         vex::copy(*x_b, tmp);
295 #elif defined(SOLVER_BACKEND_VIENNACL)
296         viennacl::fast_copy(*x_b, tmp);
297 #elif defined(SOLVER_BACKEND_CUDA)
298         thrust::copy(x_b->begin(), x_b->end(), tmp.begin());
299 #else
300         std::copy(&(*x_b)[0], &(*x_b)[0] + rows, &tmp[0]);
301 #endif
302 
303         perm.inverse(tmp, x);
304     } else {
305         prof.tic("setup");
306         Solver solve(std::tie(rows, ptr, col, val), prm, bprm);
307         prof.toc("setup");
308 
309         std::cout << solve << std::endl;
310 
311         auto f_b = Backend::copy_vector(rhs, bprm);
312         auto x_b = Backend::copy_vector(x,   bprm);
313 
314         prof.tic("solve");
315         info = solve(*f_b, *x_b);
316         prof.toc("solve");
317 
318 #if defined(SOLVER_BACKEND_VEXCL)
319         vex::copy(*x_b, x);
320 #elif defined(SOLVER_BACKEND_VIENNACL)
321         viennacl::fast_copy(*x_b, x);
322 #elif defined(SOLVER_BACKEND_CUDA)
323         thrust::copy(x_b->begin(), x_b->end(), x.begin());
324 #else
325         std::copy(&(*x_b)[0], &(*x_b)[0] + rows, &x[0]);
326 #endif
327     }
328 
329     return info;
330 }
331 
332 #define AMGCL_CALL_BLOCK_SOLVER(z, data, B)                                    \
333   case B:                                                                      \
334     return block_solve<B>(prm, rows, ptr, col, val, rhs, x, reorder);
335 
336 //---------------------------------------------------------------------------
solve(const boost::property_tree::ptree & prm,size_t rows,std::vector<ptrdiff_t> const & ptr,std::vector<ptrdiff_t> const & col,std::vector<double> const & val,std::vector<double> const & rhs,std::vector<double> & x,int block_size,bool reorder)337 std::tuple<size_t, double> solve(
338         const boost::property_tree::ptree &prm,
339         size_t rows,
340         std::vector<ptrdiff_t> const &ptr,
341         std::vector<ptrdiff_t> const &col,
342         std::vector<double>    const &val,
343         std::vector<double>    const &rhs,
344         std::vector<double>          &x,
345         int block_size,
346         bool reorder
347         )
348 {
349     switch (block_size) {
350         case 1:
351             return scalar_solve(prm, rows, ptr, col, val, rhs, x, reorder);
352 #if defined(SOLVER_BACKEND_BUILTIN) || defined(SOLVER_BACKEND_VEXCL)
353         BOOST_PP_SEQ_FOR_EACH(AMGCL_CALL_BLOCK_SOLVER, ~, AMGCL_BLOCK_SIZES)
354 #endif
355         default:
356             precondition(false, "Unsupported block size");
357             return std::make_tuple(0, 0.0);
358     }
359 }
360 
361 //---------------------------------------------------------------------------
main(int argc,char * argv[])362 int main(int argc, char *argv[]) {
363     namespace po = boost::program_options;
364     namespace io = amgcl::io;
365 
366     using amgcl::prof;
367     using std::vector;
368     using std::string;
369 
370     po::options_description desc("Options");
371 
372     desc.add_options()
373         ("help,h", "Show this help.")
374         ("prm-file,P",
375          po::value<string>(),
376          "Parameter file in json format. "
377         )
378         (
379          "prm,p",
380          po::value< vector<string> >()->multitoken(),
381          "Parameters specified as name=value pairs. "
382          "May be provided multiple times. Examples:\n"
383          "  -p solver.tol=1e-3\n"
384          "  -p precond.coarse_enough=300"
385         )
386         ("matrix,A",
387          po::value<string>(),
388          "System matrix in the MatrixMarket format. "
389          "When not specified, solves a Poisson problem in 3D unit cube. "
390         )
391         (
392          "rhs,f",
393          po::value<string>(),
394          "The RHS vector in the MatrixMarket format. "
395          "When omitted, a vector of ones is used by default. "
396          "Should only be provided together with a system matrix. "
397         )
398         (
399          "f0",
400          po::bool_switch()->default_value(false),
401          "Use zero RHS vector. Implies --random-initial and solver.ns_search=true"
402         )
403         (
404          "f1",
405          po::bool_switch()->default_value(false),
406          "Set RHS = Ax where x = 1"
407         )
408         (
409          "null,N",
410          po::value<string>(),
411          "The near null-space vectors in the MatrixMarket format. "
412          "Should be a dense matrix of size N*M, where N is the number of "
413          "unknowns, and M is the number of null-space vectors. "
414          "Should only be provided together with a system matrix. "
415         )
416         (
417          "coords,C",
418          po::value<string>(),
419          "Coordinate matrix where number of rows corresponds to the number of grid nodes "
420          "and the number of columns corresponds to the problem dimensionality (2 or 3). "
421          "Will be used to construct near null-space vectors as rigid body modes. "
422          "Should only be provided together with a system matrix. "
423         )
424         (
425          "binary,B",
426          po::bool_switch()->default_value(false),
427          "When specified, treat input files as binary instead of as MatrixMarket. "
428          "It is assumed the files were converted to binary format with mm2bin utility. "
429         )
430         (
431          "scale,s",
432          po::bool_switch()->default_value(false),
433          "Scale the matrix so that the diagonal is unit. "
434         )
435         (
436          "block-size,b",
437          po::value<int>()->default_value(1),
438          "The block size of the system matrix. "
439          "When specified, the system matrix is assumed to have block-wise structure. "
440          "This usually is the case for problems in elasticity, structural mechanics, "
441          "for coupled systems of PDE (such as Navier-Stokes equations), etc. "
442         )
443         (
444          "size,n",
445          po::value<int>()->default_value(32),
446          "The size of the Poisson problem to solve when no system matrix is given. "
447          "Specified as number of grid nodes along each dimension of a unit cube. "
448          "The resulting system will have n*n*n unknowns. "
449         )
450         (
451          "anisotropy,a",
452          po::value<double>()->default_value(1.0),
453          "The anisotropy value for the generated Poisson value. "
454          "Used to determine problem scaling along X, Y, and Z axes: "
455          "hy = hx * a, hz = hy * a."
456         )
457         (
458          "single-level,1",
459          po::bool_switch()->default_value(false),
460          "When specified, the AMG hierarchy is not constructed. "
461          "Instead, the problem is solved using a single-level smoother as preconditioner. "
462         )
463         (
464          "reorder,r",
465          po::bool_switch()->default_value(false),
466          "When specified, the matrix will be reordered to improve cache-locality"
467         )
468         (
469          "initial,x",
470          po::value<double>()->default_value(0),
471          "Value to use as initial approximation. "
472         )
473         (
474          "random-initial",
475          po::bool_switch()->default_value(false),
476          "Use random initial approximation. "
477         )
478         (
479          "output,o",
480          po::value<string>(),
481          "Output file. Will be saved in the MatrixMarket format. "
482          "When omitted, the solution is not saved. "
483         )
484         ;
485 
486     po::positional_options_description p;
487     p.add("prm", -1);
488 
489     po::variables_map vm;
490     po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
491     po::notify(vm);
492 
493     if (vm.count("help")) {
494         std::cout << desc << std::endl;
495         return 0;
496     }
497 
498     for (int i = 0; i < argc; ++i) {
499         if (i) std::cout << " ";
500         std::cout << argv[i];
501     }
502     std::cout << std::endl;
503 
504     boost::property_tree::ptree prm;
505     if (vm.count("prm-file")) {
506         read_json(vm["prm-file"].as<string>(), prm);
507     }
508 
509     if (vm.count("prm")) {
510         for(const string &v : vm["prm"].as<vector<string> >()) {
511             amgcl::put(prm, v);
512         }
513     }
514 
515     size_t rows, nv = 0;
516     vector<ptrdiff_t> ptr, col;
517     vector<double> val, rhs, null, x;
518 
519     if (vm.count("matrix")) {
520         auto t = prof.scoped_tic("reading");
521 
522         string Afile  = vm["matrix"].as<string>();
523         bool   binary = vm["binary"].as<bool>();
524 
525         if (binary) {
526             io::read_crs(Afile, rows, ptr, col, val);
527         } else {
528             size_t cols;
529             std::tie(rows, cols) = io::mm_reader(Afile)(ptr, col, val);
530             precondition(rows == cols, "Non-square system matrix");
531         }
532 
533         if (vm.count("rhs")) {
534             string bfile = vm["rhs"].as<string>();
535 
536             size_t n, m;
537 
538             if (binary) {
539                 io::read_dense(bfile, n, m, rhs);
540             } else {
541                 std::tie(n, m) = io::mm_reader(bfile)(rhs);
542             }
543 
544             precondition(n == rows && m == 1, "The RHS vector has wrong size");
545         } else if (vm["f1"].as<bool>()) {
546             rhs.resize(rows);
547             for(size_t i = 0; i < rows; ++i) {
548                 double s = 0;
549                 for(ptrdiff_t j = ptr[i], e = ptr[i+1]; j < e; ++j)
550                     s += val[j];
551                 rhs[i] = s;
552             }
553         } else {
554             rhs.resize(rows, vm["f0"].as<bool>() ? 0.0 : 1.0);
555         }
556 
557         if (vm.count("null")) {
558             string nfile = vm["null"].as<string>();
559 
560             size_t m;
561 
562             if (binary) {
563                 io::read_dense(nfile, m, nv, null);
564             } else {
565                 std::tie(m, nv) = io::mm_reader(nfile)(null);
566             }
567 
568             precondition(m == rows, "Near null-space vectors have wrong size");
569         } else if (vm.count("coords")) {
570             string cfile = vm["coords"].as<string>();
571             std::vector<double> coo;
572 
573             size_t m, ndim;
574 
575             if (binary) {
576                 io::read_dense(cfile, m, ndim, coo);
577             } else {
578                 std::tie(m, ndim) = io::mm_reader(cfile)(coo);
579             }
580 
581             precondition(m * ndim == rows && (ndim == 2 || ndim == 3), "Coordinate matrix has wrong size");
582 
583             nv = amgcl::coarsening::rigid_body_modes(ndim, coo, null);
584         }
585 
586         if (nv) {
587             prm.put("precond.coarsening.nullspace.cols", nv);
588             prm.put("precond.coarsening.nullspace.rows", rows);
589             prm.put("precond.coarsening.nullspace.B",    &null[0]);
590         }
591     } else {
592         auto t = prof.scoped_tic("assembling");
593         rows = sample_problem(vm["size"].as<int>(), val, col, ptr, rhs, vm["anisotropy"].as<double>());
594     }
595 
596     if (vm["scale"].as<bool>()) {
597         std::vector<double> dia(rows, 1.0);
598 
599         for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(rows); ++i) {
600             double d = 1.0;
601             for(ptrdiff_t j = ptr[i], e = ptr[i+1]; j < e; ++j) {
602                 if (col[j] == i) {
603                     d = 1 / sqrt(val[j]);
604                 }
605             }
606             if (!std::isnan(d)) dia[i] = d;
607         }
608 
609         for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(rows); ++i) {
610             rhs[i] *= dia[i];
611             for(ptrdiff_t j = ptr[i], e = ptr[i+1]; j < e; ++j) {
612                 val[j] *= dia[i] * dia[col[j]];
613             }
614         }
615     }
616 
617     x.resize(rows, vm["initial"].as<double>());
618     if (vm["random-initial"].as<bool>() || vm["f0"].as<bool>()) {
619         std::mt19937 rng;
620         std::uniform_real_distribution<double> rnd(-1, 1);
621         for(auto &v : x) v = rnd(rng);
622     }
623 
624     if (vm["f0"].as<bool>()) {
625         prm.put("solver.ns_search", true);
626     }
627 
628     size_t iters;
629     double error;
630 
631     int block_size = vm["block-size"].as<int>();
632 
633     if (vm["single-level"].as<bool>())
634         prm.put("precond.class", "relaxation");
635 
636     std::tie(iters, error) = solve(
637             prm, rows, ptr, col, val, rhs, x,
638             block_size, vm["reorder"].as<bool>());
639 
640     if (vm.count("output")) {
641         auto t = prof.scoped_tic("write");
642         amgcl::io::mm_write(vm["output"].as<string>(), &x[0], x.size());
643     }
644 
645     std::cout << "Iterations: " << iters << std::endl
646               << "Error:      " << error << std::endl
647               << prof << std::endl;
648 }
649