1 #ifndef AMGCL_SOLVER_BICGSTABL_HPP
2 #define AMGCL_SOLVER_BICGSTABL_HPP
3 
4 /*
5 The MIT License
6 
7 Copyright (c) 2012-2021 Denis Demidov <dennis.demidov@gmail.com>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 */
27 
28 /**
29 \file   amgcl/solver/bicgstabl.hpp
30 \author Denis Demidov <dennis.demidov@gmail.com>
31 \brief  BiCGStab(L) iterative method.
32 
33 The code is ported from PETSC BCGSL [1] and is based on [2].
34 
35 [1] http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/KSP/KSPBCGSL.html
36 [2] Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving
37     linear systems of equations. Universiteit Utrecht. Mathematisch Instituut,
38     1996.
39 
40 The original code came with the following license:
41 
42 \verbatim
43 Copyright (c) 1991-2014, UChicago Argonne, LLC and the PETSc Development Team
44 All rights reserved.
45 
46 Redistribution and use in source and binary forms, with or without modification,
47 are permitted provided that the following conditions are met:
48 
49 * Redistributions of source code must retain the above copyright notice, this
50   list of conditions and the following disclaimer.
51 * Redistributions in binary form must reproduce the above copyright notice, this
52   list of conditions and the following disclaimer in the documentation and/or
53   other materials provided with the distribution.
54 
55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
56 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
57 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
58 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
59 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
60 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
61 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
62 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65  \endverbatim
66  */
67 
68 #include <tuple>
69 #include <iostream>
70 
71 #include <amgcl/backend/interface.hpp>
72 #include <amgcl/solver/detail/default_inner_product.hpp>
73 #include <amgcl/solver/precond_side.hpp>
74 #include <amgcl/detail/qr.hpp>
75 #include <amgcl/util.hpp>
76 
77 namespace amgcl {
78 namespace solver {
79 
80 /** BiCGStab(L) method.
81  * \rst
82  * Generalization of BiCGStab method [SlDi93]_.
83  * \endrst
84  */
85 template <
86     class Backend,
87     class InnerProduct = detail::default_inner_product
88     >
89 class bicgstabl {
90     public:
91         typedef Backend backend_type;
92 
93         typedef typename Backend::vector     vector;
94         typedef typename Backend::value_type value_type;
95         typedef typename Backend::params     backend_params;
96 
97         typedef typename math::scalar_of<value_type>::type scalar_type;
98 
99         typedef typename math::inner_product_impl<
100             typename math::rhs_of<value_type>::type
101             >::return_type coef_type;
102 
103 
104         /// Solver parameters.
105         struct params {
106             // Order of the method.
107             int L;
108 
109             // Threshold used to decide when to refresh computed residuals.
110             scalar_type delta;
111 
112             // Use a convex function of the MinRes and OR polynomials
113             // after the BiCG step instead of default MinRes
114             bool convex;
115 
116             // Preconditioning kind (left/right).
117             preconditioner::side::type pside;
118 
119             // Maximum number of iterations.
120             size_t maxiter;
121 
122             // Target relative residual error.
123             scalar_type tol;
124 
125             // Target absolute residual error.
126             scalar_type abstol;
127 
128             /// Ignore the trivial solution x=0 when rhs is zero.
129             //** Useful for searching for the null-space vectors of the system */
130             bool ns_search;
131 
132             /// Verbose output (show iterations and error)
133             bool verbose;
134 
paramsamgcl::solver::bicgstabl::params135             params()
136                 : L(2), delta(0), convex(true),
137                   pside(preconditioner::side::right), maxiter(100), tol(1e-8),
138                   abstol(std::numeric_limits<scalar_type>::min()),
139                   ns_search(false), verbose(false)
140             {
141             }
142 
143 #ifndef AMGCL_NO_BOOST
paramsamgcl::solver::bicgstabl::params144             params(const boost::property_tree::ptree &p)
145                 : AMGCL_PARAMS_IMPORT_VALUE(p, L),
146                   AMGCL_PARAMS_IMPORT_VALUE(p, delta),
147                   AMGCL_PARAMS_IMPORT_VALUE(p, convex),
148                   AMGCL_PARAMS_IMPORT_VALUE(p, pside),
149                   AMGCL_PARAMS_IMPORT_VALUE(p, maxiter),
150                   AMGCL_PARAMS_IMPORT_VALUE(p, tol),
151                   AMGCL_PARAMS_IMPORT_VALUE(p, abstol),
152                   AMGCL_PARAMS_IMPORT_VALUE(p, ns_search),
153                   AMGCL_PARAMS_IMPORT_VALUE(p, verbose)
154             {
155                 check_params(p, {"L", "delta", "convex", "pside", "maxiter",
156                         "tol", "abstol", "ns_search", "verbose"});
157             }
158 
getamgcl::solver::bicgstabl::params159             void get(boost::property_tree::ptree &p, const std::string &path) const {
160                 AMGCL_PARAMS_EXPORT_VALUE(p, path, L);
161                 AMGCL_PARAMS_EXPORT_VALUE(p, path, delta);
162                 AMGCL_PARAMS_EXPORT_VALUE(p, path, convex);
163                 AMGCL_PARAMS_EXPORT_VALUE(p, path, pside);
164                 AMGCL_PARAMS_EXPORT_VALUE(p, path, maxiter);
165                 AMGCL_PARAMS_EXPORT_VALUE(p, path, tol);
166                 AMGCL_PARAMS_EXPORT_VALUE(p, path, abstol);
167                 AMGCL_PARAMS_EXPORT_VALUE(p, path, ns_search);
168                 AMGCL_PARAMS_EXPORT_VALUE(p, path, verbose);
169             }
170 #endif
171         };
172 
173         /// Preallocates necessary data structures for the system of size \p n.
bicgstabl(size_t n,const params & prm=params (),const backend_params & backend_prm=backend_params (),const InnerProduct & inner_product=InnerProduct ())174         bicgstabl(
175                 size_t n,
176                 const params &prm = params(),
177                 const backend_params &backend_prm = backend_params(),
178                 const InnerProduct &inner_product = InnerProduct()
179                 )
180             : prm(prm), n(n),
181               Rt( Backend::create_vector(n, backend_prm) ),
182               X ( Backend::create_vector(n, backend_prm) ),
183               B ( Backend::create_vector(n, backend_prm) ),
184               T ( Backend::create_vector(n, backend_prm) ),
185               R(prm.L + 1), U(prm.L + 1),
186               MZa(prm.L + 1, prm.L + 1),
187               MZb(prm.L + 1, prm.L + 1),
188               Y0(prm.L + 1), YL(prm.L + 1),
189               inner_product(inner_product)
190         {
191             precondition(prm.L > 0, "L in BiCGStab(L) should be >=1");
192 
193             for(int i = 0; i <= prm.L; ++i) {
194                 R[i] = Backend::create_vector(n, backend_prm);
195                 U[i] = Backend::create_vector(n, backend_prm);
196             }
197         }
198 
199         /* Computes the solution for the given system matrix \p A and the
200          * right-hand side \p rhs.  Returns the number of iterations made and
201          * the achieved residual as a ``std::tuple``. The solution vector
202          * \p x provides initial approximation in input and holds the computed
203          * solution on output.
204          *
205          * The system matrix may differ from the matrix used during
206          * initialization. This may be used for the solution of non-stationary
207          * problems with slowly changing coefficients. There is a strong chance
208          * that a preconditioner built for a time step will act as a reasonably
209          * good preconditioner for several subsequent time steps [DeSh12]_.
210          */
211         template <class Matrix, class Precond, class Vec1, class Vec2>
operator ()(const Matrix & A,const Precond & P,const Vec1 & rhs,Vec2 && x) const212         std::tuple<size_t, scalar_type> operator()(
213                 const Matrix &A, const Precond &P, const Vec1 &rhs, Vec2 &&x) const
214         {
215             namespace side = preconditioner::side;
216 
217             static const coef_type one  = math::identity<coef_type>();
218             static const coef_type zero = math::zero<coef_type>();
219 
220             const int L = prm.L;
221 
222             ios_saver ss(std::cout);
223 
224             scalar_type norm_rhs = norm(rhs);
225 
226             // Check if there is a trivial solution
227             if (norm_rhs < amgcl::detail::eps<scalar_type>(1)) {
228                 if (prm.ns_search) {
229                     norm_rhs = math::identity<scalar_type>();
230                 } else {
231                     backend::clear(x);
232                     return std::make_tuple(0, norm_rhs);
233                 }
234             }
235 
236             if (prm.pside == side::left) {
237                 backend::residual(rhs, A, x, *T);
238                 P.apply(*T, *B);
239             } else {
240                 backend::residual(rhs, A, x, *B);
241             }
242 
243             scalar_type zeta0 = norm(*B);
244             scalar_type eps = std::max(prm.tol * norm_rhs, prm.abstol);
245 
246             coef_type alpha = zero;
247             coef_type rho0  = one;
248             coef_type omega = one;
249 
250             // Go
251             backend::copy(*B, *R[0]);
252             backend::copy(*B, *Rt);
253             backend::clear(*X);
254             backend::clear(*U[0]);
255 
256             scalar_type zeta           = zeta0;
257             scalar_type rnmax_computed = zeta0;
258             scalar_type rnmax_true     = zeta0;
259 
260             size_t iter = 0;
261             for(; iter < prm.maxiter && zeta >= eps; iter += L) {
262                 // BiCG part
263                 rho0 = -omega * rho0;
264 
265                 for(int j = 0; j < L; ++j) {
266                     coef_type rho1 = inner_product(*R[j], *Rt);
267                     precondition(!math::is_zero(rho1),
268                             "BiCGStab(L) breakdown: diverged (zero rho)");
269 
270                     coef_type beta = alpha * (rho1 / rho0);
271                     rho0 = rho1;
272 
273                     for(int i = 0; i <= j; ++i)
274                         backend::axpby(one, *R[i], -beta, *U[i]);
275 
276                     preconditioner::spmv(prm.pside, P, A, *U[j], *U[j+1], *T);
277 
278                     coef_type sigma = inner_product(*U[j+1], *Rt);
279                     precondition(!math::is_zero(sigma),
280                             "BiCGStab(L) breakdown: diverged (zero sigma)");
281                     alpha = rho1 / sigma;
282 
283                     backend::axpby(alpha, *U[0], one, *X);
284 
285                     for(int i = 0; i <= j; ++i)
286                         backend::axpby(-alpha, *U[i+1], one, *R[i]);
287 
288                     preconditioner::spmv(prm.pside, P, A, *R[j], *R[j+1], *T);
289 
290                     zeta = norm(*R[0]);
291 
292                     rnmax_computed = std::max(zeta, rnmax_computed);
293                     rnmax_true     = std::max(zeta, rnmax_true);
294 
295                     // Check for early exit
296                     if (zeta < eps) {
297                         iter += j+1;
298                         goto done;
299                     }
300                 }
301 
302                 // Polynomial part
303                 for(int i = 0; i <= L; ++i) {
304                     for(int j = 0; j <= i; ++j) {
305                         MZa(i, j) = inner_product(*R[i], *R[j]);
306                     }
307                 }
308 
309                 // Symmetrize MZa
310                 for (int i = 0; i <= L; ++i) {
311                     for (int j = i+1; j <= L; ++j) {
312                         MZa(i, j) = MZa(j, i) = math::adjoint(MZa(j, i));
313                     }
314                 }
315 
316                 std::copy(MZa.data(), MZa.data() + MZa.size(), MZb.data());
317 
318                 if (prm.convex || L == 1) {
319                     Y0[0] = -one;
320 
321                     qr.solve(L, L, MZa.stride(0), MZa.stride(1),
322                             &MZa(1, 1), &MZb(0, 1), &Y0[1]);
323                 } else {
324                     Y0[0] = -one;
325                     Y0[L] = zero;
326                     qr.solve(L-1, L-1, MZa.stride(0), MZa.stride(1),
327                             &MZa(1, 1), &MZb(0, 1), &Y0[1]);
328 
329                     YL[0] = zero;
330                     YL[L] = -one;
331                     qr.solve(L-1, L-1, MZa.stride(0), MZa.stride(1),
332                             &MZa(1, 1), &MZb(L, 1), &YL[1], /*computed=*/true);
333 
334                     coef_type dot0 = zero;
335                     coef_type dot1 = zero;
336                     coef_type dotA = zero;
337                     for(int i = 0; i <= L; ++i) {
338                         coef_type s0 = zero;
339                         coef_type sL = zero;
340 
341                         for(int j = 0; j <= L; ++j) {
342                             coef_type M = MZb(i, j);
343                             s0 += M * Y0[j];
344                             sL += M * YL[j];
345                         }
346 
347                         dot0 += Y0[i] * s0;
348                         dotA += YL[i] * s0;
349                         dot1 += YL[i] * sL;
350                     }
351 
352                     scalar_type kappa0 = sqrt(std::abs(std::real(dot0)));
353                     scalar_type kappa1 = sqrt(std::abs(std::real(dot1)));
354                     scalar_type kappaA = std::real(dotA);
355 
356                     if (!math::is_zero(kappa0) && !math::is_zero(kappa1)) {
357                         scalar_type ghat;
358                         if (kappaA < 0.7 * kappa0 * kappa1) {
359                             ghat = (kappaA < 0) ? -0.7 * kappa0 / kappa1 : 0.7 * kappa0 / kappa1;
360                         } else {
361                             ghat = kappaA / (kappa1 * kappa1);
362                         }
363 
364                         for (int i = 0; i <= L; ++i)
365                             Y0[i] -= ghat * YL[i];
366                     }
367                 }
368 
369                 omega = Y0[L];
370                 for(int h = L; h > 0 && math::is_zero(omega); --h)
371                     omega = Y0[h];
372                 precondition(!math::is_zero(omega),
373                         "BiCGStab(L) breakdown: diverged (zero omega)");
374 
375                 backend::lin_comb(L, &Y0[1], &R[0], one, *X);
376 
377                 for(int i = 1; i <= L; ++i) Y0[i] = -one * Y0[i];
378 
379                 backend::lin_comb(L, &Y0[1], &U[1], one, *U[0]);
380                 backend::lin_comb(L, &Y0[1], &R[1], one, *R[0]);
381 
382                 for(int i = 1; i <= L; ++i) Y0[i] = -one * Y0[i];
383 
384                 zeta = norm(*R[0]);
385 
386                 // Accurate update
387                 if (prm.delta > 0) {
388                     rnmax_computed = std::max(zeta, rnmax_computed);
389                     rnmax_true     = std::max(zeta, rnmax_true);
390 
391                     bool update_x = zeta < prm.delta * zeta0 && zeta0 <= rnmax_computed;
392 
393                     if ((zeta < prm.delta * rnmax_true && zeta <= rnmax_true) || update_x) {
394                         preconditioner::spmv(prm.pside, P, A, *X, *R[0], *T);
395                         backend::axpby(one, *B, -one, *R[0]);
396                         rnmax_true = zeta;
397 
398                         if (update_x) {
399                             if (prm.pside == side::left) {
400                                 backend::axpby(one, *X, one, x);
401                             } else {
402                                 backend::axpby(one, *T, one, x);
403                             }
404                             backend::clear(*X);
405                             backend::copy(*R[0], *B);
406 
407                             rnmax_computed = zeta;
408                         }
409                     }
410                 }
411                 if (prm.verbose && iter % 5 == 0)
412                     std::cout << iter << "\t" << std::scientific << zeta / norm_rhs << std::endl;
413             }
414 
415 done:
416             if (prm.pside == side::left) {
417                 backend::axpby(one, *X, one, x);
418             } else {
419                 P.apply(*X, *T);
420                 backend::axpby(one, *T, one, x);
421             }
422 
423             return std::make_tuple(iter, zeta / norm_rhs);
424         }
425 
426         /* Computes the solution for the given right-hand side \p rhs. The
427          * system matrix is the same that was used for the setup of the
428          * preconditioner \p P.  Returns the number of iterations made and the
429          * achieved residual as a ``std::tuple``. The solution vector \p x
430          * provides initial approximation in input and holds the computed
431          * solution on output.
432          */
433         template <class Precond, class Vec1, class Vec2>
operator ()(const Precond & P,const Vec1 & rhs,Vec2 && x) const434         std::tuple<size_t, scalar_type> operator()(
435                 const Precond &P, const Vec1 &rhs, Vec2 &&x) const
436         {
437             return (*this)(P.system_matrix(), P, rhs, x);
438         }
439 
bytes() const440         size_t bytes() const {
441             size_t b = 0;
442 
443             b += backend::bytes(*Rt);
444             b += backend::bytes(*X);
445             b += backend::bytes(*B);
446             b += backend::bytes(*T);
447 
448             for(const auto &v : R) b += backend::bytes(*v);
449             for(const auto &v : U) b += backend::bytes(*v);
450 
451             b += MZa.size() * sizeof(coef_type);
452             b += MZb.size() * sizeof(coef_type);
453 
454             b += backend::bytes(Y0);
455             b += backend::bytes(YL);
456 
457             b += qr.bytes();
458 
459             return b;
460         }
461 
operator <<(std::ostream & os,const bicgstabl & s)462         friend std::ostream& operator<<(std::ostream &os, const bicgstabl &s) {
463             return os
464                 << "Type:             BiCGStab(" << s.prm.L << ")"
465                 << "\nUnknowns:         " << s.n
466                 << "\nMemory footprint: " << human_readable_memory(s.bytes())
467                 << std::endl;
468         }
469     public:
470         params prm;
471 
472     private:
473         size_t n;
474 
475         mutable std::shared_ptr< vector > Rt;
476         mutable std::shared_ptr< vector > X;
477         mutable std::shared_ptr< vector > B;
478         mutable std::shared_ptr< vector > T;
479 
480         mutable std::vector< std::shared_ptr< vector > > R;
481         mutable std::vector< std::shared_ptr< vector > > U;
482 
483         mutable multi_array<coef_type, 2> MZa, MZb;
484         mutable std::vector<coef_type> Y0, YL;
485         mutable amgcl::detail::QR<coef_type> qr;
486 
487         InnerProduct inner_product;
488 
489         template <class Vec>
norm(const Vec & x) const490         scalar_type norm(const Vec &x) const {
491             return sqrt(math::norm(inner_product(x, x)));
492         }
493 };
494 
495 } // namespace solver
496 } // namespace amgcl
497 
498 
499 #endif
500