1 #ifndef VIENNACL_LINALG_CG_HPP_
2 #define VIENNACL_LINALG_CG_HPP_
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 /** @file viennacl/linalg/cg.hpp
22     @brief The conjugate gradient method is implemented here
23 */
24 
25 #include <vector>
26 #include <map>
27 #include <cmath>
28 #include <numeric>
29 
30 #include "viennacl/forwards.h"
31 #include "viennacl/tools/tools.hpp"
32 #include "viennacl/linalg/ilu.hpp"
33 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/linalg/inner_prod.hpp"
35 #include "viennacl/linalg/norm_2.hpp"
36 #include "viennacl/traits/clear.hpp"
37 #include "viennacl/traits/size.hpp"
38 #include "viennacl/meta/result_of.hpp"
39 #include "viennacl/linalg/iterative_operations.hpp"
40 
41 namespace viennacl
42 {
43 namespace linalg
44 {
45 
46 /** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function
47 */
48 class cg_tag
49 {
50 public:
51   /** @brief The constructor
52   *
53   * @param tol              Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||)
54   * @param max_iterations   The maximum number of iterations
55   */
cg_tag(double tol=1e-8,unsigned int max_iterations=300)56   cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), abs_tol_(0), iterations_(max_iterations) {}
57 
58   /** @brief Returns the relative tolerance */
tolerance() const59   double tolerance() const { return tol_; }
60 
61   /** @brief Returns the absolute tolerance */
abs_tolerance() const62   double abs_tolerance() const { return abs_tol_; }
63   /** @brief Sets the absolute tolerance */
abs_tolerance(double new_tol)64   void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
65 
66   /** @brief Returns the maximum number of iterations */
max_iterations() const67   unsigned int max_iterations() const { return iterations_; }
68 
69   /** @brief Return the number of solver iterations: */
iters() const70   unsigned int iters() const { return iters_taken_; }
iters(unsigned int i) const71   void iters(unsigned int i) const { iters_taken_ = i; }
72 
73   /** @brief Returns the estimated relative error at the end of the solver run */
error() const74   double error() const { return last_error_; }
75   /** @brief Sets the estimated relative error at the end of the solver run */
error(double e) const76   void error(double e) const { last_error_ = e; }
77 
78 
79 private:
80   double tol_;
81   double abs_tol_;
82   unsigned int iterations_;
83 
84   //return values from solver
85   mutable unsigned int iters_taken_;
86   mutable double last_error_;
87 };
88 
89 namespace detail
90 {
91 
92   /** @brief handles the no_precond case at minimal overhead */
93   template<typename VectorT, typename PreconditionerT>
94   class z_handler{
95   public:
z_handler(VectorT & residual)96     z_handler(VectorT & residual) : z_(residual){ }
get()97     VectorT & get() { return z_; }
98   private:
99     VectorT z_;
100   };
101 
102   template<typename VectorT>
103   class z_handler<VectorT, viennacl::linalg::no_precond>{
104   public:
z_handler(VectorT & residual)105     z_handler(VectorT & residual) : presidual_(&residual){ }
get()106     VectorT & get() { return *presidual_; }
107   private:
108     VectorT * presidual_;
109   };
110 
111 }
112 
113 namespace detail
114 {
115 
116   /** @brief Implementation of a pipelined conjugate gradient algorithm (no preconditioner), specialized for ViennaCL types.
117   *
118   * Pipelined version from A. T. Chronopoulos and C. W. Gear, J. Comput. Appl. Math. 25(2), 153–168 (1989)
119   *
120   * @param A            The system matrix
121   * @param rhs          The load vector
122   * @param tag          Solver configuration tag
123   * @param monitor      A callback routine which is called at each GMRES restart
124   * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data
125   * @return The result vector
126   */
127   //template<typename MatrixType, typename ScalarType>
128   template<typename MatrixT, typename NumericT>
pipelined_solve(MatrixT const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)129   viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A,
130                                              viennacl::vector<NumericT> const & rhs,
131                                              cg_tag const & tag,
132                                              viennacl::linalg::no_precond,
133                                              bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
134                                              void *monitor_data = NULL)
135   {
136     typedef typename viennacl::vector<NumericT>::difference_type   difference_type;
137 
138     viennacl::vector<NumericT> result(rhs);
139     viennacl::traits::clear(result);
140 
141     viennacl::vector<NumericT> residual(rhs);
142     viennacl::vector<NumericT> p(rhs);
143     viennacl::vector<NumericT> Ap = viennacl::linalg::prod(A, p);
144     viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // temporary buffer
145     std::vector<NumericT>      host_inner_prod_buffer(inner_prod_buffer.size());
146     vcl_size_t                 buffer_size_per_vector = inner_prod_buffer.size() / 3;
147     difference_type            buffer_offset_per_vector = static_cast<difference_type>(buffer_size_per_vector);
148 
149     NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); norm_rhs_squared *= norm_rhs_squared;
150 
151     if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check for early convergence of A*x = 0
152       return result;
153 
154     NumericT inner_prod_rr = norm_rhs_squared;
155     NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap);
156     NumericT beta  = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * beta * beta - inner_prod_rr) / inner_prod_rr;
157     NumericT inner_prod_ApAp = 0;
158     NumericT inner_prod_pAp  = 0;
159 
160     for (unsigned int i = 0; i < tag.max_iterations(); ++i)
161     {
162       tag.iters(i+1);
163 
164       viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, Ap, beta, inner_prod_buffer);
165       viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer);
166 
167       // bring back the partial results to the host:
168       viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
169 
170       inner_prod_rr   = std::accumulate(host_inner_prod_buffer.begin(),                                host_inner_prod_buffer.begin() +     buffer_offset_per_vector, NumericT(0));
171       inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() +     buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, NumericT(0));
172       inner_prod_pAp  = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_offset_per_vector, NumericT(0));
173 
174       if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / norm_rhs_squared)), monitor_data))
175         break;
176       if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() *  tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * tag.abs_tolerance())    //squared norms involved here
177         break;
178 
179       alpha = inner_prod_rr / inner_prod_pAp;
180       beta  = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr;
181     }
182 
183     //store last error estimate:
184     tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared));
185 
186     return result;
187   }
188 
189 
190   /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
191   template<typename NumericT>
solve_impl(viennacl::compressed_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)192   viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A,
193                                         viennacl::vector<NumericT> const & rhs,
194                                         cg_tag const & tag,
195                                         viennacl::linalg::no_precond,
196                                         bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
197                                         void *monitor_data = NULL)
198   {
199     return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
200   }
201 
202 
203   /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
204   template<typename NumericT>
solve_impl(viennacl::coordinate_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)205   viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A,
206                                         viennacl::vector<NumericT> const & rhs,
207                                         cg_tag const & tag,
208                                         viennacl::linalg::no_precond,
209                                         bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
210                                         void *monitor_data = NULL)
211   {
212     return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
213   }
214 
215 
216 
217   /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
218   template<typename NumericT>
solve_impl(viennacl::ell_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)219   viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A,
220                                         viennacl::vector<NumericT> const & rhs,
221                                         cg_tag const & tag,
222                                         viennacl::linalg::no_precond,
223                                         bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
224                                         void *monitor_data = NULL)
225   {
226     return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
227   }
228 
229 
230 
231   /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
232   template<typename NumericT>
solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)233   viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A,
234                                         viennacl::vector<NumericT> const & rhs,
235                                         cg_tag const & tag,
236                                         viennacl::linalg::no_precond,
237                                         bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
238                                         void *monitor_data = NULL)
239   {
240     return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
241   }
242 
243 
244   /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */
245   template<typename NumericT>
solve_impl(viennacl::hyb_matrix<NumericT> const & A,viennacl::vector<NumericT> const & rhs,cg_tag const & tag,viennacl::linalg::no_precond,bool (* monitor)(viennacl::vector<NumericT> const &,NumericT,void *)=NULL,void * monitor_data=NULL)246   viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A,
247                                         viennacl::vector<NumericT> const & rhs,
248                                         cg_tag const & tag,
249                                         viennacl::linalg::no_precond,
250                                         bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
251                                         void *monitor_data = NULL)
252   {
253     return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data);
254   }
255 
256 
257   template<typename MatrixT, typename VectorT, typename PreconditionerT>
258   VectorT solve_impl(MatrixT const & matrix,
259                      VectorT const & rhs,
260                      cg_tag const & tag,
261                      PreconditionerT const & precond,
262                      bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
263                      void *monitor_data = NULL)
264   {
265     typedef typename viennacl::result_of::value_type<VectorT>::type           NumericType;
266     typedef typename viennacl::result_of::cpu_value_type<NumericType>::type   CPU_NumericType;
267 
268     VectorT result = rhs;
269     viennacl::traits::clear(result);
270 
271     VectorT residual = rhs;
272     VectorT tmp = rhs;
273     detail::z_handler<VectorT, PreconditionerT> zhandler(residual);
274     VectorT & z = zhandler.get();
275 
276     precond.apply(z);
277     VectorT p = z;
278 
279     CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z);
280     CPU_NumericType alpha;
281     CPU_NumericType new_ip_rr = 0;
282     CPU_NumericType beta;
283     CPU_NumericType norm_rhs_squared = ip_rr;
284     CPU_NumericType new_ipp_rr_over_norm_rhs;
285 
286     if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero
287       return result;
288 
289     for (unsigned int i = 0; i < tag.max_iterations(); ++i)
290     {
291       tag.iters(i+1);
292       tmp = viennacl::linalg::prod(matrix, p);
293 
294       alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);
295 
296       result += alpha * p;
297       residual -= alpha * tmp;
298       z = residual;
299       precond.apply(z);
300 
301       if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z))
302         new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2);
303       else
304         new_ip_rr = viennacl::linalg::inner_prod(residual, z);
305 
306       new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared;
307       if (monitor && monitor(result, std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data))
308         break;
309       if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() *  tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * tag.abs_tolerance())    //squared norms involved here
310         break;
311 
312       beta = new_ip_rr / ip_rr;
313       ip_rr = new_ip_rr;
314 
315       p = z + beta*p;
316     }
317 
318     //store last error estimate:
319     tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared)));
320 
321     return result;
322   }
323 
324 }
325 
326 
327 
328 /** @brief Implementation of the preconditioned conjugate gradient solver, generic implementation for non-ViennaCL types.
329 *
330 * Following Algorithm 9.1 in "Iterative Methods for Sparse Linear Systems" by Y. Saad
331 *
332 * @param matrix     The system matrix
333 * @param rhs        The load vector
334 * @param tag        Solver configuration tag
335 * @param precond    A preconditioner. Precondition operation is done via member function apply()
336 * @return The result vector
337 */
338 template<typename MatrixT, typename VectorT, typename PreconditionerT>
339 VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, PreconditionerT const & precond)
340 {
341   return detail::solve_impl(matrix, rhs, tag, precond);
342 }
343 
344 /** @brief Convenience overload for calling the CG solver using types from the C++ STL.
345   *
346   * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly.
347   * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out.
348   */
349 template<typename IndexT, typename NumericT, typename PreconditionerT>
solve(std::vector<std::map<IndexT,NumericT>> const & A,std::vector<NumericT> const & rhs,cg_tag const & tag,PreconditionerT const & precond)350 std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const & precond)
351 {
352   viennacl::compressed_matrix<NumericT> vcl_A;
353   viennacl::copy(A, vcl_A);
354 
355   viennacl::vector<NumericT> vcl_rhs(rhs.size());
356   viennacl::copy(rhs, vcl_rhs);
357 
358   viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
359 
360   std::vector<NumericT> result(vcl_result.size());
361   viennacl::copy(vcl_result, result);
362   return result;
363 }
364 
365 /** @brief Entry point for the unpreconditioned CG method.
366  *
367  *  @param matrix    The system matrix
368  *  @param rhs       Right hand side vector (load vector)
369  *  @param tag       A BiCGStab tag providing relative tolerances, etc.
370  */
371 template<typename MatrixT, typename VectorT>
solve(MatrixT const & matrix,VectorT const & rhs,cg_tag const & tag)372 VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag)
373 {
374   return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
375 }
376 
377 
378 
379 template<typename VectorT>
380 class cg_solver
381 {
382 public:
383   typedef typename viennacl::result_of::cpu_value_type<VectorT>::type   numeric_type;
384 
cg_solver(cg_tag const & tag)385   cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {}
386 
387   template<typename MatrixT, typename PreconditionerT>
operator ()(MatrixT const & A,VectorT const & b,PreconditionerT const & precond) const388   VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const
389   {
390     if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account
391     {
392       VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
393       mod_rhs = b - mod_rhs;
394       VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_);
395       return init_guess_ + y;
396     }
397     return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_);
398   }
399 
400 
401   template<typename MatrixT>
operator ()(MatrixT const & A,VectorT const & b) const402   VectorT operator()(MatrixT const & A, VectorT const & b) const
403   {
404     return operator()(A, b, viennacl::linalg::no_precond());
405   }
406 
407   /** @brief Specifies an initial guess for the iterative solver.
408     *
409     * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y.
410     */
set_initial_guess(VectorT const & x)411   void set_initial_guess(VectorT const & x) { init_guess_ = x; }
412 
413   /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor.
414    *
415    *  The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument.
416    *  The third argument is a pointer to user-defined data, through which additional information can be passed.
417    *  This pointer needs to be set with set_monitor_data. If not set, NULL is passed.
418    *  If the montior function returns true, the solver terminates (either convergence or divergence).
419    */
set_monitor(bool (* monitor_fun)(VectorT const &,numeric_type,void *),void * user_data)420   void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data)
421   {
422     monitor_callback_ = monitor_fun;
423     user_data_ = user_data;
424   }
425 
426   /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */
tag() const427   cg_tag const & tag() const { return tag_; }
428 
429 private:
430   cg_tag   tag_;
431   VectorT  init_guess_;
432   bool     (*monitor_callback_)(VectorT const &, numeric_type, void *);
433   void     *user_data_;
434 };
435 
436 
437 }
438 }
439 
440 #endif
441