1 #ifndef AMGCL_SOLVER_BICGSTABL_HPP 2 #define AMGCL_SOLVER_BICGSTABL_HPP 3 4 /* 5 The MIT License 6 7 Copyright (c) 2012-2021 Denis Demidov <dennis.demidov@gmail.com> 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 THE SOFTWARE. 26 */ 27 28 /** 29 \file amgcl/solver/bicgstabl.hpp 30 \author Denis Demidov <dennis.demidov@gmail.com> 31 \brief BiCGStab(L) iterative method. 32 33 The code is ported from PETSC BCGSL [1] and is based on [2]. 34 35 [1] http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/KSP/KSPBCGSL.html 36 [2] Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving 37 linear systems of equations. Universiteit Utrecht. Mathematisch Instituut, 38 1996. 39 40 The original code came with the following license: 41 42 \verbatim 43 Copyright (c) 1991-2014, UChicago Argonne, LLC and the PETSc Development Team 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without modification, 47 are permitted provided that the following conditions are met: 48 49 * Redistributions of source code must retain the above copyright notice, this 50 list of conditions and the following disclaimer. 51 * Redistributions in binary form must reproduce the above copyright notice, this 52 list of conditions and the following disclaimer in the documentation and/or 53 other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 56 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 57 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 58 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 59 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 60 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 62 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 64 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 \endverbatim 66 */ 67 68 #include <tuple> 69 #include <iostream> 70 71 #include <amgcl/backend/interface.hpp> 72 #include <amgcl/solver/detail/default_inner_product.hpp> 73 #include <amgcl/solver/precond_side.hpp> 74 #include <amgcl/detail/qr.hpp> 75 #include <amgcl/util.hpp> 76 77 namespace amgcl { 78 namespace solver { 79 80 /** BiCGStab(L) method. 81 * \rst 82 * Generalization of BiCGStab method [SlDi93]_. 83 * \endrst 84 */ 85 template < 86 class Backend, 87 class InnerProduct = detail::default_inner_product 88 > 89 class bicgstabl { 90 public: 91 typedef Backend backend_type; 92 93 typedef typename Backend::vector vector; 94 typedef typename Backend::value_type value_type; 95 typedef typename Backend::params backend_params; 96 97 typedef typename math::scalar_of<value_type>::type scalar_type; 98 99 typedef typename math::inner_product_impl< 100 typename math::rhs_of<value_type>::type 101 >::return_type coef_type; 102 103 104 /// Solver parameters. 105 struct params { 106 // Order of the method. 107 int L; 108 109 // Threshold used to decide when to refresh computed residuals. 110 scalar_type delta; 111 112 // Use a convex function of the MinRes and OR polynomials 113 // after the BiCG step instead of default MinRes 114 bool convex; 115 116 // Preconditioning kind (left/right). 117 preconditioner::side::type pside; 118 119 // Maximum number of iterations. 120 size_t maxiter; 121 122 // Target relative residual error. 123 scalar_type tol; 124 125 // Target absolute residual error. 126 scalar_type abstol; 127 128 /// Ignore the trivial solution x=0 when rhs is zero. 129 //** Useful for searching for the null-space vectors of the system */ 130 bool ns_search; 131 132 /// Verbose output (show iterations and error) 133 bool verbose; 134 paramsamgcl::solver::bicgstabl::params135 params() 136 : L(2), delta(0), convex(true), 137 pside(preconditioner::side::right), maxiter(100), tol(1e-8), 138 abstol(std::numeric_limits<scalar_type>::min()), 139 ns_search(false), verbose(false) 140 { 141 } 142 143 #ifndef AMGCL_NO_BOOST paramsamgcl::solver::bicgstabl::params144 params(const boost::property_tree::ptree &p) 145 : AMGCL_PARAMS_IMPORT_VALUE(p, L), 146 AMGCL_PARAMS_IMPORT_VALUE(p, delta), 147 AMGCL_PARAMS_IMPORT_VALUE(p, convex), 148 AMGCL_PARAMS_IMPORT_VALUE(p, pside), 149 AMGCL_PARAMS_IMPORT_VALUE(p, maxiter), 150 AMGCL_PARAMS_IMPORT_VALUE(p, tol), 151 AMGCL_PARAMS_IMPORT_VALUE(p, abstol), 152 AMGCL_PARAMS_IMPORT_VALUE(p, ns_search), 153 AMGCL_PARAMS_IMPORT_VALUE(p, verbose) 154 { 155 check_params(p, {"L", "delta", "convex", "pside", "maxiter", 156 "tol", "abstol", "ns_search", "verbose"}); 157 } 158 getamgcl::solver::bicgstabl::params159 void get(boost::property_tree::ptree &p, const std::string &path) const { 160 AMGCL_PARAMS_EXPORT_VALUE(p, path, L); 161 AMGCL_PARAMS_EXPORT_VALUE(p, path, delta); 162 AMGCL_PARAMS_EXPORT_VALUE(p, path, convex); 163 AMGCL_PARAMS_EXPORT_VALUE(p, path, pside); 164 AMGCL_PARAMS_EXPORT_VALUE(p, path, maxiter); 165 AMGCL_PARAMS_EXPORT_VALUE(p, path, tol); 166 AMGCL_PARAMS_EXPORT_VALUE(p, path, abstol); 167 AMGCL_PARAMS_EXPORT_VALUE(p, path, ns_search); 168 AMGCL_PARAMS_EXPORT_VALUE(p, path, verbose); 169 } 170 #endif 171 }; 172 173 /// Preallocates necessary data structures for the system of size \p n. bicgstabl(size_t n,const params & prm=params (),const backend_params & backend_prm=backend_params (),const InnerProduct & inner_product=InnerProduct ())174 bicgstabl( 175 size_t n, 176 const params &prm = params(), 177 const backend_params &backend_prm = backend_params(), 178 const InnerProduct &inner_product = InnerProduct() 179 ) 180 : prm(prm), n(n), 181 Rt( Backend::create_vector(n, backend_prm) ), 182 X ( Backend::create_vector(n, backend_prm) ), 183 B ( Backend::create_vector(n, backend_prm) ), 184 T ( Backend::create_vector(n, backend_prm) ), 185 R(prm.L + 1), U(prm.L + 1), 186 MZa(prm.L + 1, prm.L + 1), 187 MZb(prm.L + 1, prm.L + 1), 188 Y0(prm.L + 1), YL(prm.L + 1), 189 inner_product(inner_product) 190 { 191 precondition(prm.L > 0, "L in BiCGStab(L) should be >=1"); 192 193 for(int i = 0; i <= prm.L; ++i) { 194 R[i] = Backend::create_vector(n, backend_prm); 195 U[i] = Backend::create_vector(n, backend_prm); 196 } 197 } 198 199 /* Computes the solution for the given system matrix \p A and the 200 * right-hand side \p rhs. Returns the number of iterations made and 201 * the achieved residual as a ``std::tuple``. The solution vector 202 * \p x provides initial approximation in input and holds the computed 203 * solution on output. 204 * 205 * The system matrix may differ from the matrix used during 206 * initialization. This may be used for the solution of non-stationary 207 * problems with slowly changing coefficients. There is a strong chance 208 * that a preconditioner built for a time step will act as a reasonably 209 * good preconditioner for several subsequent time steps [DeSh12]_. 210 */ 211 template <class Matrix, class Precond, class Vec1, class Vec2> operator ()(const Matrix & A,const Precond & P,const Vec1 & rhs,Vec2 && x) const212 std::tuple<size_t, scalar_type> operator()( 213 const Matrix &A, const Precond &P, const Vec1 &rhs, Vec2 &&x) const 214 { 215 namespace side = preconditioner::side; 216 217 static const coef_type one = math::identity<coef_type>(); 218 static const coef_type zero = math::zero<coef_type>(); 219 220 const int L = prm.L; 221 222 ios_saver ss(std::cout); 223 224 scalar_type norm_rhs = norm(rhs); 225 226 // Check if there is a trivial solution 227 if (norm_rhs < amgcl::detail::eps<scalar_type>(1)) { 228 if (prm.ns_search) { 229 norm_rhs = math::identity<scalar_type>(); 230 } else { 231 backend::clear(x); 232 return std::make_tuple(0, norm_rhs); 233 } 234 } 235 236 if (prm.pside == side::left) { 237 backend::residual(rhs, A, x, *T); 238 P.apply(*T, *B); 239 } else { 240 backend::residual(rhs, A, x, *B); 241 } 242 243 scalar_type zeta0 = norm(*B); 244 scalar_type eps = std::max(prm.tol * norm_rhs, prm.abstol); 245 246 coef_type alpha = zero; 247 coef_type rho0 = one; 248 coef_type omega = one; 249 250 // Go 251 backend::copy(*B, *R[0]); 252 backend::copy(*B, *Rt); 253 backend::clear(*X); 254 backend::clear(*U[0]); 255 256 scalar_type zeta = zeta0; 257 scalar_type rnmax_computed = zeta0; 258 scalar_type rnmax_true = zeta0; 259 260 size_t iter = 0; 261 for(; iter < prm.maxiter && zeta >= eps; iter += L) { 262 // BiCG part 263 rho0 = -omega * rho0; 264 265 for(int j = 0; j < L; ++j) { 266 coef_type rho1 = inner_product(*R[j], *Rt); 267 precondition(!math::is_zero(rho1), 268 "BiCGStab(L) breakdown: diverged (zero rho)"); 269 270 coef_type beta = alpha * (rho1 / rho0); 271 rho0 = rho1; 272 273 for(int i = 0; i <= j; ++i) 274 backend::axpby(one, *R[i], -beta, *U[i]); 275 276 preconditioner::spmv(prm.pside, P, A, *U[j], *U[j+1], *T); 277 278 coef_type sigma = inner_product(*U[j+1], *Rt); 279 precondition(!math::is_zero(sigma), 280 "BiCGStab(L) breakdown: diverged (zero sigma)"); 281 alpha = rho1 / sigma; 282 283 backend::axpby(alpha, *U[0], one, *X); 284 285 for(int i = 0; i <= j; ++i) 286 backend::axpby(-alpha, *U[i+1], one, *R[i]); 287 288 preconditioner::spmv(prm.pside, P, A, *R[j], *R[j+1], *T); 289 290 zeta = norm(*R[0]); 291 292 rnmax_computed = std::max(zeta, rnmax_computed); 293 rnmax_true = std::max(zeta, rnmax_true); 294 295 // Check for early exit 296 if (zeta < eps) { 297 iter += j+1; 298 goto done; 299 } 300 } 301 302 // Polynomial part 303 for(int i = 0; i <= L; ++i) { 304 for(int j = 0; j <= i; ++j) { 305 MZa(i, j) = inner_product(*R[i], *R[j]); 306 } 307 } 308 309 // Symmetrize MZa 310 for (int i = 0; i <= L; ++i) { 311 for (int j = i+1; j <= L; ++j) { 312 MZa(i, j) = MZa(j, i) = math::adjoint(MZa(j, i)); 313 } 314 } 315 316 std::copy(MZa.data(), MZa.data() + MZa.size(), MZb.data()); 317 318 if (prm.convex || L == 1) { 319 Y0[0] = -one; 320 321 qr.solve(L, L, MZa.stride(0), MZa.stride(1), 322 &MZa(1, 1), &MZb(0, 1), &Y0[1]); 323 } else { 324 Y0[0] = -one; 325 Y0[L] = zero; 326 qr.solve(L-1, L-1, MZa.stride(0), MZa.stride(1), 327 &MZa(1, 1), &MZb(0, 1), &Y0[1]); 328 329 YL[0] = zero; 330 YL[L] = -one; 331 qr.solve(L-1, L-1, MZa.stride(0), MZa.stride(1), 332 &MZa(1, 1), &MZb(L, 1), &YL[1], /*computed=*/true); 333 334 coef_type dot0 = zero; 335 coef_type dot1 = zero; 336 coef_type dotA = zero; 337 for(int i = 0; i <= L; ++i) { 338 coef_type s0 = zero; 339 coef_type sL = zero; 340 341 for(int j = 0; j <= L; ++j) { 342 coef_type M = MZb(i, j); 343 s0 += M * Y0[j]; 344 sL += M * YL[j]; 345 } 346 347 dot0 += Y0[i] * s0; 348 dotA += YL[i] * s0; 349 dot1 += YL[i] * sL; 350 } 351 352 scalar_type kappa0 = sqrt(std::abs(std::real(dot0))); 353 scalar_type kappa1 = sqrt(std::abs(std::real(dot1))); 354 scalar_type kappaA = std::real(dotA); 355 356 if (!math::is_zero(kappa0) && !math::is_zero(kappa1)) { 357 scalar_type ghat; 358 if (kappaA < 0.7 * kappa0 * kappa1) { 359 ghat = (kappaA < 0) ? -0.7 * kappa0 / kappa1 : 0.7 * kappa0 / kappa1; 360 } else { 361 ghat = kappaA / (kappa1 * kappa1); 362 } 363 364 for (int i = 0; i <= L; ++i) 365 Y0[i] -= ghat * YL[i]; 366 } 367 } 368 369 omega = Y0[L]; 370 for(int h = L; h > 0 && math::is_zero(omega); --h) 371 omega = Y0[h]; 372 precondition(!math::is_zero(omega), 373 "BiCGStab(L) breakdown: diverged (zero omega)"); 374 375 backend::lin_comb(L, &Y0[1], &R[0], one, *X); 376 377 for(int i = 1; i <= L; ++i) Y0[i] = -one * Y0[i]; 378 379 backend::lin_comb(L, &Y0[1], &U[1], one, *U[0]); 380 backend::lin_comb(L, &Y0[1], &R[1], one, *R[0]); 381 382 for(int i = 1; i <= L; ++i) Y0[i] = -one * Y0[i]; 383 384 zeta = norm(*R[0]); 385 386 // Accurate update 387 if (prm.delta > 0) { 388 rnmax_computed = std::max(zeta, rnmax_computed); 389 rnmax_true = std::max(zeta, rnmax_true); 390 391 bool update_x = zeta < prm.delta * zeta0 && zeta0 <= rnmax_computed; 392 393 if ((zeta < prm.delta * rnmax_true && zeta <= rnmax_true) || update_x) { 394 preconditioner::spmv(prm.pside, P, A, *X, *R[0], *T); 395 backend::axpby(one, *B, -one, *R[0]); 396 rnmax_true = zeta; 397 398 if (update_x) { 399 if (prm.pside == side::left) { 400 backend::axpby(one, *X, one, x); 401 } else { 402 backend::axpby(one, *T, one, x); 403 } 404 backend::clear(*X); 405 backend::copy(*R[0], *B); 406 407 rnmax_computed = zeta; 408 } 409 } 410 } 411 if (prm.verbose && iter % 5 == 0) 412 std::cout << iter << "\t" << std::scientific << zeta / norm_rhs << std::endl; 413 } 414 415 done: 416 if (prm.pside == side::left) { 417 backend::axpby(one, *X, one, x); 418 } else { 419 P.apply(*X, *T); 420 backend::axpby(one, *T, one, x); 421 } 422 423 return std::make_tuple(iter, zeta / norm_rhs); 424 } 425 426 /* Computes the solution for the given right-hand side \p rhs. The 427 * system matrix is the same that was used for the setup of the 428 * preconditioner \p P. Returns the number of iterations made and the 429 * achieved residual as a ``std::tuple``. The solution vector \p x 430 * provides initial approximation in input and holds the computed 431 * solution on output. 432 */ 433 template <class Precond, class Vec1, class Vec2> operator ()(const Precond & P,const Vec1 & rhs,Vec2 && x) const434 std::tuple<size_t, scalar_type> operator()( 435 const Precond &P, const Vec1 &rhs, Vec2 &&x) const 436 { 437 return (*this)(P.system_matrix(), P, rhs, x); 438 } 439 bytes() const440 size_t bytes() const { 441 size_t b = 0; 442 443 b += backend::bytes(*Rt); 444 b += backend::bytes(*X); 445 b += backend::bytes(*B); 446 b += backend::bytes(*T); 447 448 for(const auto &v : R) b += backend::bytes(*v); 449 for(const auto &v : U) b += backend::bytes(*v); 450 451 b += MZa.size() * sizeof(coef_type); 452 b += MZb.size() * sizeof(coef_type); 453 454 b += backend::bytes(Y0); 455 b += backend::bytes(YL); 456 457 b += qr.bytes(); 458 459 return b; 460 } 461 operator <<(std::ostream & os,const bicgstabl & s)462 friend std::ostream& operator<<(std::ostream &os, const bicgstabl &s) { 463 return os 464 << "Type: BiCGStab(" << s.prm.L << ")" 465 << "\nUnknowns: " << s.n 466 << "\nMemory footprint: " << human_readable_memory(s.bytes()) 467 << std::endl; 468 } 469 public: 470 params prm; 471 472 private: 473 size_t n; 474 475 mutable std::shared_ptr< vector > Rt; 476 mutable std::shared_ptr< vector > X; 477 mutable std::shared_ptr< vector > B; 478 mutable std::shared_ptr< vector > T; 479 480 mutable std::vector< std::shared_ptr< vector > > R; 481 mutable std::vector< std::shared_ptr< vector > > U; 482 483 mutable multi_array<coef_type, 2> MZa, MZb; 484 mutable std::vector<coef_type> Y0, YL; 485 mutable amgcl::detail::QR<coef_type> qr; 486 487 InnerProduct inner_product; 488 489 template <class Vec> norm(const Vec & x) const490 scalar_type norm(const Vec &x) const { 491 return sqrt(math::norm(inner_product(x, x))); 492 } 493 }; 494 495 } // namespace solver 496 } // namespace amgcl 497 498 499 #endif 500