1 #ifndef VIENNACL_LINALG_CG_HPP_
2 #define VIENNACL_LINALG_CG_HPP_
3
4 /* =========================================================================
5 Copyright (c) 2010-2016, Institute for Microelectronics,
6 Institute for Analysis and Scientific Computing,
7 TU Wien.
8 Portions of this software are copyright by UChicago Argonne, LLC.
9
10 -----------------
11 ViennaCL - The Vienna Computing Library
12 -----------------
13
14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16 (A list of authors and contributors can be found in the manual)
17
18 License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20
21 /** @file viennacl/linalg/cg.hpp
22 @brief The conjugate gradient method is implemented here
23 */
24
25 #include <vector>
26 #include <map>
27 #include <cmath>
28 #include <numeric>
29
30 #include "viennacl/forwards.h"
31 #include "viennacl/tools/tools.hpp"
32 #include "viennacl/linalg/ilu.hpp"
33 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/linalg/inner_prod.hpp"
35 #include "viennacl/linalg/norm_2.hpp"
36 #include "viennacl/traits/clear.hpp"
37 #include "viennacl/traits/size.hpp"
38 #include "viennacl/meta/result_of.hpp"
39 #include "viennacl/linalg/iterative_operations.hpp"
40
41 namespace viennacl
42 {
43 namespace linalg
44 {
45
46 /** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function
47 */
48 class cg_tag
49 {
50 public:
51 /** @brief The constructor
52 *
53 * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||)
54 * @param max_iterations The maximum number of iterations
55 */
cg_tag(double tol=1e-8,unsigned int max_iterations=300)56 cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), abs_tol_(0), iterations_(max_iterations) {}
57
58 /** @brief Returns the relative tolerance */
tolerance() const59 double tolerance() const { return tol_; }
60
61 /** @brief Returns the absolute tolerance */
abs_tolerance() const62 double abs_tolerance() const { return abs_tol_; }
63 /** @brief Sets the absolute tolerance */
abs_tolerance(double new_tol)64 void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
65
66 /** @brief Returns the maximum number of iterations */
max_iterations() const67 unsigned int max_iterations() const { return iterations_; }
68
69 /** @brief Return the number of solver iterations: */
iters() const70 unsigned int iters() const { return iters_taken_; }
iters(unsigned int i) const71 void iters(unsigned int i) const { iters_taken_ = i; }
72
73 /** @brief Returns the estimated relative error at the end of the solver run */
error() const74 double error() const { return last_error_; }
75 /** @brief Sets the estimated relative error at the end of the solver run */
error(double e) const76 void error(double e) const { last_error_ = e; }
77
78
79 private:
80 double tol_;
81 double abs_tol_;
82 unsigned int iterations_;
83
84 //return values from solver
85 mutable unsigned int iters_taken_;
86 mutable double last_error_;
87 };
88
89 namespace detail
90 {
91
92 /** @brief handles the no_precond case at minimal overhead */
93 template<typename VectorT, typename PreconditionerT>
94 class z_handler{
95 public:
z_handler(VectorT & residual)96 z_handler(VectorT & residual) : z_(residual){ }
get()97 VectorT & get() { return z_; }
98 private:
99 VectorT z_;
100 };
101
102 template<typename VectorT>
103 class z_handler<VectorT, viennacl::linalg::no_precond>{
104 public:
z_handler(VectorT & residual)105 z_handler(VectorT & residual) : presidual_(&residual){ }
get()106 VectorT & get() { return *presidual_; }
107 private:
108 VectorT * presidual_;
109 };
110
111 }
112
113 namespace detail
114 {
115
116 /** @brief Implementation of a pipelined conjugate gradient algorithm (no preconditioner), specialized for ViennaCL types.
117 *
118 * Pipelined version from A. T. Chronopoulos and C. W. Gear, J. Comput. Appl. Math. 25(2), 153–168 (1989)
119 *
120 * @param A The system matrix
121 * @param rhs The load vector
122 * @param tag Solver configuration tag
123 * @param monitor A callback routine which is called at each GMRES restart
124 * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data
125 * @return The result vector
126 */
127 //template<typename MatrixType, typename ScalarType>
128 template<typename MatrixT, typename NumericT>
pipelined_solve(MatrixT const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)129 viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A,
130 viennacl::vector<NumericT> const & rhs,
131 cg_tag const & tag,
132 viennacl::linalg::no_precond,
133 bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
134 void *monitor_data = NULL)
135 {
136 typedef typename viennacl::vector<NumericT>::difference_type difference_type;
137
138 viennacl::vector<NumericT> result(rhs);
139 viennacl::traits::clear(result);
140
141 viennacl::vector<NumericT> residual(rhs);
142 viennacl::vector<NumericT> p(rhs);
143 viennacl::vector<NumericT> Ap = viennacl::linalg::prod(A, p);
144 viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // temporary buffer
145 std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size());
146 vcl_size_t buffer_size_per_vector = inner_prod_buffer.size() / 3;
147 difference_type buffer_offset_per_vector = static_cast<difference_type>(buffer_size_per_vector);
148
149 NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); norm_rhs_squared *= norm_rhs_squared;
150
151 if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check for early convergence of A*x = 0
152 return result;
153
154 NumericT inner_prod_rr = norm_rhs_squared;
155 NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap);
156 NumericT beta = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * beta * beta - inner_prod_rr) / inner_prod_rr;
157 NumericT inner_prod_ApAp = 0;
158 NumericT inner_prod_pAp = 0;
159
160 for (unsigned int i = 0; i < tag.max_iterations(); ++i)
161 {
162 tag.iters(i+1);
163
164 viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, Ap, beta, inner_prod_buffer);
165 viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer);
166
167 // bring back the partial results to the host:
168 viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
169
170 inner_prod_rr = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_offset_per_vector, NumericT(0));
171 inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() + buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, NumericT(0));
172 inner_prod_pAp = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_offset_per_vector, NumericT(0));
173
174 if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / norm_rhs_squared)), monitor_data))
175 break;
176 if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() * tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here
177 break;
178
179 alpha = inner_prod_rr / inner_prod_pAp;
180 beta = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr;
181 }
182
183 //store last error estimate:
184 tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared));
185
186 return result;
187 }
188
189
190 /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
191 template<typename NumericT>
solve_impl(viennacl::compressed_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)192 viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A,
193 viennacl::vector<NumericT> const & rhs,
194 cg_tag const & tag,
195 viennacl::linalg::no_precond,
196 bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
197 void *monitor_data = NULL)
198 {
199 return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
200 }
201
202
203 /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
204 template<typename NumericT>
solve_impl(viennacl::coordinate_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)205 viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A,
206 viennacl::vector<NumericT> const & rhs,
207 cg_tag const & tag,
208 viennacl::linalg::no_precond,
209 bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
210 void *monitor_data = NULL)
211 {
212 return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
213 }
214
215
216
217 /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
218 template<typename NumericT>
solve_impl(viennacl::ell_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)219 viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A,
220 viennacl::vector<NumericT> const & rhs,
221 cg_tag const & tag,
222 viennacl::linalg::no_precond,
223 bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
224 void *monitor_data = NULL)
225 {
226 return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
227 }
228
229
230
231 /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
232 template<typename NumericT>
solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)233 viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A,
234 viennacl::vector<NumericT> const & rhs,
235 cg_tag const & tag,
236 viennacl::linalg::no_precond,
237 bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
238 void *monitor_data = NULL)
239 {
240 return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
241 }
242
243
244 /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
245 template<typename NumericT>
solve_impl(viennacl::hyb_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)246 viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A,
247 viennacl::vector<NumericT> const & rhs,
248 cg_tag const & tag,
249 viennacl::linalg::no_precond,
250 bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
251 void *monitor_data = NULL)
252 {
253 return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
254 }
255
256
257 template<typename MatrixT, typename VectorT, typename PreconditionerT>
258 VectorT solve_impl(MatrixT const & matrix,
259 VectorT const & rhs,
260 cg_tag const & tag,
261 PreconditionerT const & precond,
262 bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
263 void *monitor_data = NULL)
264 {
265 typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
266 typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
267
268 VectorT result = rhs;
269 viennacl::traits::clear(result);
270
271 VectorT residual = rhs;
272 VectorT tmp = rhs;
273 detail::z_handler<VectorT, PreconditionerT> zhandler(residual);
274 VectorT & z = zhandler.get();
275
276 precond.apply(z);
277 VectorT p = z;
278
279 CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z);
280 CPU_NumericType alpha;
281 CPU_NumericType new_ip_rr = 0;
282 CPU_NumericType beta;
283 CPU_NumericType norm_rhs_squared = ip_rr;
284 CPU_NumericType new_ipp_rr_over_norm_rhs;
285
286 if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
287 return result;
288
289 for (unsigned int i = 0; i < tag.max_iterations(); ++i)
290 {
291 tag.iters(i+1);
292 tmp = viennacl::linalg::prod(matrix, p);
293
294 alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);
295
296 result += alpha * p;
297 residual -= alpha * tmp;
298 z = residual;
299 precond.apply(z);
300
301 if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z))
302 new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2);
303 else
304 new_ip_rr = viennacl::linalg::inner_prod(residual, z);
305
306 new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared;
307 if (monitor && monitor(result, std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data))
308 break;
309 if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() * tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here
310 break;
311
312 beta = new_ip_rr / ip_rr;
313 ip_rr = new_ip_rr;
314
315 p = z + beta*p;
316 }
317
318 //store last error estimate:
319 tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared)));
320
321 return result;
322 }
323
324 }
325
326
327
328 /** @brief Implementation of the preconditioned conjugate gradient solver, generic implementation for non-ViennaCL types.
329 *
330 * Following Algorithm 9.1 in "Iterative Methods for Sparse Linear Systems" by Y. Saad
331 *
332 * @param matrix The system matrix
333 * @param rhs The load vector
334 * @param tag Solver configuration tag
335 * @param precond A preconditioner. Precondition operation is done via member function apply()
336 * @return The result vector
337 */
338 template<typename MatrixT, typename VectorT, typename PreconditionerT>
339 VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, PreconditionerT const & precond)
340 {
341 return detail::solve_impl(matrix, rhs, tag, precond);
342 }
343
344 /** @brief Convenience overload for calling the CG solver using types from the C++ STL.
345 *
346 * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly.
347 * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out.
348 */
349 template<typename IndexT, typename NumericT, typename PreconditionerT>
solve(std::vector<std::map<IndexT,NumericT>> const & A,std::vector<NumericT> const & rhs,cg_tag const & tag,PreconditionerT const & precond)350 std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const & precond)
351 {
352 viennacl::compressed_matrix<NumericT> vcl_A;
353 viennacl::copy(A, vcl_A);
354
355 viennacl::vector<NumericT> vcl_rhs(rhs.size());
356 viennacl::copy(rhs, vcl_rhs);
357
358 viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
359
360 std::vector<NumericT> result(vcl_result.size());
361 viennacl::copy(vcl_result, result);
362 return result;
363 }
364
365 /** @brief Entry point for the unpreconditioned CG method.
366 *
367 * @param matrix The system matrix
368 * @param rhs Right hand side vector (load vector)
369 * @param tag A BiCGStab tag providing relative tolerances, etc.
370 */
371 template<typename MatrixT, typename VectorT>
solve(MatrixT const & matrix,VectorT const & rhs,cg_tag const & tag)372 VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag)
373 {
374 return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
375 }
376
377
378
379 template<typename VectorT>
380 class cg_solver
381 {
382 public:
383 typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type;
384
cg_solver(cg_tag const & tag)385 cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {}
386
387 template<typename MatrixT, typename PreconditionerT>
operator ()(MatrixT const & A,VectorT const & b,PreconditionerT const & precond) const388 VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const
389 {
390 if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account
391 {
392 VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
393 mod_rhs = b - mod_rhs;
394 VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
395 return init_guess_ + y;
396 }
397 return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_);
398 }
399
400
401 template<typename MatrixT>
operator ()(MatrixT const & A,VectorT const & b) const402 VectorT operator()(MatrixT const & A, VectorT const & b) const
403 {
404 return operator()(A, b, viennacl::linalg::no_precond());
405 }
406
407 /** @brief Specifies an initial guess for the iterative solver.
408 *
409 * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y.
410 */
set_initial_guess(VectorT const & x)411 void set_initial_guess(VectorT const & x) { init_guess_ = x; }
412
413 /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor.
414 *
415 * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument.
416 * The third argument is a pointer to user-defined data, through which additional information can be passed.
417 * This pointer needs to be set with set_monitor_data. If not set, NULL is passed.
418 * If the montior function returns true, the solver terminates (either convergence or divergence).
419 */
set_monitor(bool (* monitor_fun)(VectorT const &,numeric_type,void *),void * user_data)420 void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
421 {
422 monitor_callback_ = monitor_fun;
423 user_data_ = user_data;
424 }
425
426 /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */
tag() const427 cg_tag const & tag() const { return tag_; }
428
429 private:
430 cg_tag tag_;
431 VectorT init_guess_;
432 bool (*monitor_callback_)(VectorT const &, numeric_type, void *);
433 void *user_data_;
434 };
435
436
437 }
438 }
439
440 #endif
441