1 /* -*- c++ -*- (enables emacs c++ mode) */ 2 /*=========================================================================== 3 4 Copyright (C) 2002-2020 Yves Renard 5 6 This file is a part of GetFEM 7 8 GetFEM is free software; you can redistribute it and/or modify it 9 under the terms of the GNU Lesser General Public License as published 10 by the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version along with the GCC Runtime Library 12 Exception either version 3.1 or (at your option) any later version. 13 This program is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License and GCC Runtime Library Exception for more details. 17 You should have received a copy of the GNU Lesser General Public License 18 along with this program; if not, write to the Free Software Foundation, 19 Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 20 21 As a special exception, you may use this file as it is a part of a free 22 software library without restriction. Specifically, if other files 23 instantiate templates or use macros or inline functions from this file, 24 or you compile this file and link it with other files to produce an 25 executable, this file does not by itself cause the resulting executable 26 to be covered by the GNU Lesser General Public License. This exception 27 does not however invalidate any other reasons why the executable file 28 might be covered by the GNU Lesser General Public License. 29 30 ===========================================================================*/ 31 32 /**@file gmm_solver_Schwarz_additive.h 33 @author Yves Renard <Yves.Renard@insa-lyon.fr> 34 @author Michel Fournie <fournie@mip.ups-tlse.fr> 35 @date October 13, 2002. 36 */ 37 38 #ifndef GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ 39 #define GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ 40 41 #include "gmm_kernel.h" 42 #include "gmm_superlu_interface.h" 43 #include "gmm_solver_cg.h" 44 #include "gmm_solver_gmres.h" 45 #include "gmm_solver_bicgstab.h" 46 #include "gmm_solver_qmr.h" 47 48 namespace gmm { 49 50 /* ******************************************************************** */ 51 /* Additive Schwarz interfaced local solvers */ 52 /* ******************************************************************** */ 53 54 struct using_cg {}; 55 struct using_gmres {}; 56 struct using_bicgstab {}; 57 struct using_qmr {}; 58 59 template <typename P, typename local_solver, typename Matrix> 60 struct actual_precond { 61 typedef P APrecond; transformactual_precond62 static const APrecond &transform(const P &PP) { return PP; } 63 }; 64 65 template <typename Matrix1, typename Precond, typename Vector> AS_local_solve(using_cg,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)66 void AS_local_solve(using_cg, const Matrix1 &A, Vector &x, const Vector &b, 67 const Precond &P, iteration &iter) 68 { cg(A, x, b, P, iter); } 69 70 template <typename Matrix1, typename Precond, typename Vector> AS_local_solve(using_gmres,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)71 void AS_local_solve(using_gmres, const Matrix1 &A, Vector &x, 72 const Vector &b, const Precond &P, iteration &iter) 73 { gmres(A, x, b, P, 100, iter); } 74 75 template <typename Matrix1, typename Precond, typename Vector> AS_local_solve(using_bicgstab,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)76 void AS_local_solve(using_bicgstab, const Matrix1 &A, Vector &x, 77 const Vector &b, const Precond &P, iteration &iter) 78 { bicgstab(A, x, b, P, iter); } 79 80 template <typename Matrix1, typename Precond, typename Vector> AS_local_solve(using_qmr,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)81 void AS_local_solve(using_qmr, const Matrix1 &A, Vector &x, 82 const Vector &b, const Precond &P, iteration &iter) 83 { qmr(A, x, b, P, iter); } 84 85 #if defined(GMM_USES_SUPERLU) 86 struct using_superlu {}; 87 88 template <typename P, typename Matrix> 89 struct actual_precond<P, using_superlu, Matrix> { 90 typedef typename linalg_traits<Matrix>::value_type value_type; 91 typedef SuperLU_factor<value_type> APrecond; 92 template <typename PR> 93 static APrecond transform(const PR &) { return APrecond(); } 94 static const APrecond &transform(const APrecond &PP) { return PP; } 95 }; 96 97 template <typename Matrix1, typename Precond, typename Vector> 98 void AS_local_solve(using_superlu, const Matrix1 &, Vector &x, 99 const Vector &b, const Precond &P, iteration &iter) 100 { P.solve(x, b); iter.set_iteration(1); } 101 #endif 102 103 /* ******************************************************************** */ 104 /* Additive Schwarz Linear system */ 105 /* ******************************************************************** */ 106 107 template <typename Matrix1, typename Matrix2, typename Precond, 108 typename local_solver> 109 struct add_schwarz_mat{ 110 typedef typename linalg_traits<Matrix1>::value_type value_type; 111 112 const Matrix1 *A; 113 const std::vector<Matrix2> *vB; 114 std::vector<Matrix2> vAloc; 115 mutable iteration iter; 116 double residual; 117 mutable size_type itebilan; 118 mutable std::vector<std::vector<value_type> > gi, fi; 119 std::vector<typename actual_precond<Precond, local_solver, 120 Matrix1>::APrecond> precond1; 121 122 void init(const Matrix1 &A_, const std::vector<Matrix2> &vB_, 123 iteration iter_, const Precond &P, double residual_); 124 125 add_schwarz_mat(void) {} 126 add_schwarz_mat(const Matrix1 &A_, const std::vector<Matrix2> &vB_, 127 iteration iter_, const Precond &P, double residual_) 128 { init(A_, vB_, iter_, P, residual_); } 129 }; 130 131 template <typename Matrix1, typename Matrix2, typename Precond, 132 typename local_solver> 133 void add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>::init( 134 const Matrix1 &A_, const std::vector<Matrix2> &vB_, 135 iteration iter_, const Precond &P, double residual_) { 136 137 vB = &vB_; A = &A_; iter = iter_; 138 residual = residual_; 139 140 size_type nb_sub = vB->size(); 141 vAloc.resize(nb_sub); 142 gi.resize(nb_sub); fi.resize(nb_sub); 143 precond1.resize(nb_sub); 144 std::fill(precond1.begin(), precond1.end(), 145 actual_precond<Precond, local_solver, Matrix1>::transform(P)); 146 itebilan = 0; 147 148 if (iter.get_noisy()) cout << "Init pour sub dom "; 149 #ifdef GMM_USES_MPI 150 int size,tranche,borne_sup,borne_inf,rank,tag1=11,tag2=12,tag3=13,sizepr = 0; 151 // int tab[4]; 152 double t_ref,t_final; 153 MPI_Status status; 154 t_ref=MPI_Wtime(); 155 MPI_Comm_rank(MPI_COMM_WORLD, &rank); 156 MPI_Comm_size(MPI_COMM_WORLD, &size); 157 tranche=nb_sub/size; 158 borne_inf=rank*tranche; 159 borne_sup=(rank+1)*tranche; 160 // if (rank==size-1) borne_sup = nb_sub; 161 162 cout << "Nombre de sous domaines " << borne_sup - borne_inf << endl; 163 164 int sizeA = mat_nrows(*A); 165 gmm::csr_matrix<value_type> Acsr(sizeA, sizeA), Acsrtemp(sizeA, sizeA); 166 gmm::copy(gmm::eff_matrix(*A), Acsr); 167 int next = (rank + 1) % size; 168 int previous = (rank + size - 1) % size; 169 //communication of local information on ring pattern 170 //Each process receive Nproc-1 contributions 171 172 for (int nproc = 0; nproc < size; ++nproc) { 173 for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) { 174 // for (size_type i = 0; i < nb_sub/size; ++i) { 175 // for (size_type i = 0; i < nb_sub; ++i) { 176 // size_type i=(rank+size*(j-1)+nb_sub)%nb_sub; 177 178 cout << "Sous domaines " << i << " : " << mat_ncols((*vB)[i]) << endl; 179 #else 180 for (size_type i = 0; i < nb_sub; ++i) { 181 #endif 182 183 if (iter.get_noisy()) cout << i << " " << std::flush; 184 Matrix2 Maux(mat_ncols((*vB)[i]), mat_nrows((*vB)[i])); 185 186 #ifdef GMM_USES_MPI 187 Matrix2 Maux2(mat_ncols((*vB)[i]), mat_ncols((*vB)[i])); 188 if (nproc == 0) { 189 gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i])); 190 gmm::clear(vAloc[i]); 191 } 192 gmm::mult(gmm::transposed((*vB)[i]), Acsr, Maux); 193 gmm::mult(Maux, (*vB)[i], Maux2); 194 gmm::add(Maux2, vAloc[i]); 195 #else 196 gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i])); 197 gmm::mult(gmm::transposed((*vB)[i]), *A, Maux); 198 gmm::mult(Maux, (*vB)[i], vAloc[i]); 199 #endif 200 201 #ifdef GMM_USES_MPI 202 if (nproc == size - 1 ) { 203 #endif 204 precond1[i].build_with(vAloc[i]); 205 gmm::resize(fi[i], mat_ncols((*vB)[i])); 206 gmm::resize(gi[i], mat_ncols((*vB)[i])); 207 #ifdef GMM_USES_MPI 208 } 209 #else 210 } 211 #endif 212 #ifdef GMM_USES_MPI 213 } 214 if (nproc != size - 1) { 215 MPI_Sendrecv(&(Acsr.jc[0]), sizeA+1, MPI_INT, next, tag2, 216 &(Acsrtemp.jc[0]), sizeA+1, MPI_INT, previous, tag2, 217 MPI_COMM_WORLD, &status); 218 if (Acsrtemp.jc[sizeA] > size_type(sizepr)) { 219 sizepr = Acsrtemp.jc[sizeA]; 220 gmm::resize(Acsrtemp.pr, sizepr); 221 gmm::resize(Acsrtemp.ir, sizepr); 222 } 223 MPI_Sendrecv(&(Acsr.ir[0]), Acsr.jc[sizeA], MPI_INT, next, tag1, 224 &(Acsrtemp.ir[0]), Acsrtemp.jc[sizeA], MPI_INT, previous, tag1, 225 MPI_COMM_WORLD, &status); 226 227 MPI_Sendrecv(&(Acsr.pr[0]), Acsr.jc[sizeA], mpi_type(value_type()), next, tag3, 228 &(Acsrtemp.pr[0]), Acsrtemp.jc[sizeA], mpi_type(value_type()), previous, tag3, 229 MPI_COMM_WORLD, &status); 230 gmm::copy(Acsrtemp, Acsr); 231 } 232 } 233 t_final=MPI_Wtime(); 234 cout<<"temps boucle precond "<< t_final-t_ref<<endl; 235 #endif 236 if (iter.get_noisy()) cout << "\n"; 237 } 238 239 template <typename Matrix1, typename Matrix2, typename Precond, 240 typename Vector2, typename Vector3, typename local_solver> 241 void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M, 242 const Vector2 &p, Vector3 &q) { 243 size_type itebilan = 0; 244 #ifdef GMM_USES_MPI 245 static double tmult_tot = 0.0; 246 double t_ref = MPI_Wtime(); 247 #endif 248 // cout << "tmult AS begin " << endl; 249 mult(*(M.A), p, q); 250 #ifdef GMM_USES_MPI 251 tmult_tot += MPI_Wtime()-t_ref; 252 cout << "tmult_tot = " << tmult_tot << endl; 253 #endif 254 std::vector<double> qbis(gmm::vect_size(q)); 255 std::vector<double> qter(gmm::vect_size(q)); 256 #ifdef GMM_USES_MPI 257 // MPI_Status status; 258 // MPI_Request request,request1; 259 // int tag=111; 260 int size,tranche,borne_sup,borne_inf,rank; 261 size_type nb_sub=M.fi.size(); 262 MPI_Comm_rank(MPI_COMM_WORLD, &rank); 263 MPI_Comm_size(MPI_COMM_WORLD, &size); 264 tranche=nb_sub/size; 265 borne_inf=rank*tranche; 266 borne_sup=(rank+1)*tranche; 267 // if (rank==size-1) borne_sup=nb_sub; 268 // int next = (rank + 1) % size; 269 // int previous = (rank + size - 1) % size; 270 t_ref = MPI_Wtime(); 271 for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) 272 // for (size_type i = 0; i < nb_sub/size; ++i) 273 // for (size_type j = 0; j < nb_sub; ++j) 274 #else 275 for (size_type i = 0; i < M.fi.size(); ++i) 276 #endif 277 { 278 #ifdef GMM_USES_MPI 279 // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub; 280 #endif 281 gmm::mult(gmm::transposed((*(M.vB))[i]), q, M.fi[i]); 282 M.iter.init(); 283 AS_local_solve(local_solver(), (M.vAloc)[i], (M.gi)[i], 284 (M.fi)[i],(M.precond1)[i],M.iter); 285 itebilan = std::max(itebilan, M.iter.get_iteration()); 286 } 287 288 #ifdef GMM_USES_MPI 289 cout << "First AS loop time " << MPI_Wtime() - t_ref << endl; 290 #endif 291 292 gmm::clear(q); 293 #ifdef GMM_USES_MPI 294 t_ref = MPI_Wtime(); 295 // for (size_type j = 0; j < nb_sub; ++j) 296 for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) 297 298 #else 299 for (size_type i = 0; i < M.gi.size(); ++i) 300 #endif 301 { 302 303 #ifdef GMM_USES_MPI 304 // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub; 305 // gmm::mult((*(M.vB))[i], M.gi[i], qbis,qbis); 306 gmm::mult((*(M.vB))[i], M.gi[i], qter); 307 add(qter,qbis,qbis); 308 #else 309 gmm::mult((*(M.vB))[i], M.gi[i], q, q); 310 #endif 311 } 312 #ifdef GMM_USES_MPI 313 //WARNING this add only if you use the ring pattern below 314 // need to do this below if using a n explicit ring pattern communication 315 316 // add(qbis,q,q); 317 cout << "Second AS loop time " << MPI_Wtime() - t_ref << endl; 318 #endif 319 320 321 #ifdef GMM_USES_MPI 322 // int tag1=11; 323 static double t_tot = 0.0; 324 double t_final; 325 t_ref=MPI_Wtime(); 326 // int next = (rank + 1) % size; 327 // int previous = (rank + size - 1) % size; 328 //communication of local information on ring pattern 329 //Each process receive Nproc-1 contributions 330 331 // if (size > 1) { 332 // for (int nproc = 0; nproc < size-1; ++nproc) 333 // { 334 335 // MPI_Sendrecv(&(qbis[0]), gmm::vect_size(q), MPI_DOUBLE, next, tag1, 336 // &(qter[0]), gmm::vect_size(q),MPI_DOUBLE,previous,tag1, 337 // MPI_COMM_WORLD,&status); 338 // gmm::copy(qter, qbis); 339 // add(qbis,q,q); 340 // } 341 // } 342 MPI_Allreduce(&(qbis[0]), &(q[0]),gmm::vect_size(q), MPI_DOUBLE, 343 MPI_SUM,MPI_COMM_WORLD); 344 t_final=MPI_Wtime(); 345 t_tot += t_final-t_ref; 346 cout<<"["<< rank<<"] temps reduce Resol "<< t_final-t_ref << " t_tot = " << t_tot << endl; 347 #endif 348 349 if (M.iter.get_noisy() > 0) cout << "itebloc = " << itebilan << endl; 350 M.itebilan += itebilan; 351 M.iter.set_resmax((M.iter.get_resmax() + M.residual) * 0.5); 352 } 353 354 template <typename Matrix1, typename Matrix2, typename Precond, 355 typename Vector2, typename Vector3, typename local_solver> 356 void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M, 357 const Vector2 &p, const Vector3 &q) { 358 mult(M, p, const_cast<Vector3 &>(q)); 359 } 360 361 template <typename Matrix1, typename Matrix2, typename Precond, 362 typename Vector2, typename Vector3, typename Vector4, 363 typename local_solver> 364 void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M, 365 const Vector2 &p, const Vector3 &p2, Vector4 &q) 366 { mult(M, p, q); add(p2, q); } 367 368 template <typename Matrix1, typename Matrix2, typename Precond, 369 typename Vector2, typename Vector3, typename Vector4, 370 typename local_solver> 371 void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M, 372 const Vector2 &p, const Vector3 &p2, const Vector4 &q) 373 { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); } 374 375 /* ******************************************************************** */ 376 /* Additive Schwarz interfaced global solvers */ 377 /* ******************************************************************** */ 378 379 template <typename ASM_type, typename Vect> 380 void AS_global_solve(using_cg, const ASM_type &ASM, Vect &x, 381 const Vect &b, iteration &iter) 382 { cg(ASM, x, b, *(ASM.A), identity_matrix(), iter); } 383 384 template <typename ASM_type, typename Vect> 385 void AS_global_solve(using_gmres, const ASM_type &ASM, Vect &x, 386 const Vect &b, iteration &iter) 387 { gmres(ASM, x, b, identity_matrix(), 100, iter); } 388 389 template <typename ASM_type, typename Vect> 390 void AS_global_solve(using_bicgstab, const ASM_type &ASM, Vect &x, 391 const Vect &b, iteration &iter) 392 { bicgstab(ASM, x, b, identity_matrix(), iter); } 393 394 template <typename ASM_type, typename Vect> 395 void AS_global_solve(using_qmr,const ASM_type &ASM, Vect &x, 396 const Vect &b, iteration &iter) 397 { qmr(ASM, x, b, identity_matrix(), iter); } 398 399 #if defined(GMM_USES_SUPERLU) 400 template <typename ASM_type, typename Vect> 401 void AS_global_solve(using_superlu, const ASM_type &, Vect &, 402 const Vect &, iteration &) { 403 GMM_ASSERT1(false, "You cannot use SuperLU as " 404 "global solver in additive Schwarz meethod"); 405 } 406 #endif 407 408 /* ******************************************************************** */ 409 /* Linear Additive Schwarz method */ 410 /* ******************************************************************** */ 411 /* ref : Domain decomposition algorithms for the p-version finite */ 412 /* element method for elliptic problems, Luca F. Pavarino, */ 413 /* PhD thesis, Courant Institute of Mathematical Sciences, 1992. */ 414 /* ******************************************************************** */ 415 416 /** Function to call if the ASM matrix is precomputed for successive solve 417 * with the same system. 418 */ 419 template <typename Matrix1, typename Matrix2, 420 typename Vector2, typename Vector3, typename Precond, 421 typename local_solver, typename global_solver> 422 void additive_schwarz( 423 add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &ASM, Vector3 &u, 424 const Vector2 &f, iteration &iter, const global_solver&) { 425 426 typedef typename linalg_traits<Matrix1>::value_type value_type; 427 428 size_type nb_sub = ASM.vB->size(), nb_dof = gmm::vect_size(f); 429 ASM.itebilan = 0; 430 std::vector<value_type> g(nb_dof); 431 std::vector<value_type> gbis(nb_dof); 432 #ifdef GMM_USES_MPI 433 double t_init=MPI_Wtime(); 434 int size,tranche,borne_sup,borne_inf,rank; 435 MPI_Comm_rank(MPI_COMM_WORLD, &rank); 436 MPI_Comm_size(MPI_COMM_WORLD, &size); 437 tranche=nb_sub/size; 438 borne_inf=rank*tranche; 439 borne_sup=(rank+1)*tranche; 440 // if (rank==size-1) borne_sup=nb_sub*size; 441 for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) 442 // for (size_type i = 0; i < nb_sub/size; ++i) 443 // for (size_type j = 0; j < nb_sub; ++j) 444 // for (size_type i = rank; i < nb_sub; i+=size) 445 #else 446 for (size_type i = 0; i < nb_sub; ++i) 447 #endif 448 { 449 450 #ifdef GMM_USES_MPI 451 // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub; 452 #endif 453 gmm::mult(gmm::transposed((*(ASM.vB))[i]), f, ASM.fi[i]); 454 ASM.iter.init(); 455 AS_local_solve(local_solver(), ASM.vAloc[i], ASM.gi[i], ASM.fi[i], 456 ASM.precond1[i], ASM.iter); 457 ASM.itebilan = std::max(ASM.itebilan, ASM.iter.get_iteration()); 458 #ifdef GMM_USES_MPI 459 gmm::mult((*(ASM.vB))[i], ASM.gi[i], gbis,gbis); 460 #else 461 gmm::mult((*(ASM.vB))[i], ASM.gi[i], g, g); 462 #endif 463 } 464 #ifdef GMM_USES_MPI 465 cout<<"temps boucle init "<< MPI_Wtime()-t_init<<endl; 466 double t_ref,t_final; 467 t_ref=MPI_Wtime(); 468 MPI_Allreduce(&(gbis[0]), &(g[0]),gmm::vect_size(g), MPI_DOUBLE, 469 MPI_SUM,MPI_COMM_WORLD); 470 t_final=MPI_Wtime(); 471 cout<<"temps reduce init "<< t_final-t_ref<<endl; 472 #endif 473 #ifdef GMM_USES_MPI 474 t_ref=MPI_Wtime(); 475 cout<<"begin global AS"<<endl; 476 #endif 477 AS_global_solve(global_solver(), ASM, u, g, iter); 478 #ifdef GMM_USES_MPI 479 t_final=MPI_Wtime(); 480 cout<<"temps AS Global Solve "<< t_final-t_ref<<endl; 481 #endif 482 if (iter.get_noisy()) 483 cout << "Total number of internal iterations : " << ASM.itebilan << endl; 484 } 485 486 /** Global function. Compute the ASM matrix and call the previous function. 487 * The ASM matrix represent the preconditionned linear system. 488 */ 489 template <typename Matrix1, typename Matrix2, 490 typename Vector2, typename Vector3, typename Precond, 491 typename local_solver, typename global_solver> 492 void additive_schwarz(const Matrix1 &A, Vector3 &u, 493 const Vector2 &f, const Precond &P, 494 const std::vector<Matrix2> &vB, 495 iteration &iter, local_solver, 496 global_solver) { 497 iter.set_rhsnorm(vect_norm2(f)); 498 if (iter.get_rhsnorm() == 0.0) { gmm::clear(u); return; } 499 iteration iter2 = iter; iter2.reduce_noisy(); 500 iter2.set_maxiter(size_type(-1)); 501 add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> 502 ASM(A, vB, iter2, P, iter.get_resmax()); 503 additive_schwarz(ASM, u, f, iter, global_solver()); 504 } 505 506 /* ******************************************************************** */ 507 /* Sequential Non-Linear Additive Schwarz method */ 508 /* ******************************************************************** */ 509 /* ref : Nonlinearly Preconditionned Inexact Newton Algorithms, */ 510 /* Xiao-Chuan Cai, David E. Keyes, */ 511 /* SIAM J. Sci. Comp. 24: p183-200. l */ 512 /* ******************************************************************** */ 513 514 template <typename Matrixt, typename MatrixBi> 515 class NewtonAS_struct { 516 517 public : 518 typedef Matrixt tangent_matrix_type; 519 typedef MatrixBi B_matrix_type; 520 typedef typename linalg_traits<Matrixt>::value_type value_type; 521 typedef std::vector<value_type> Vector; 522 523 virtual size_type size(void) = 0; 524 virtual const std::vector<MatrixBi> &get_vB() = 0; 525 526 virtual void compute_F(Vector &f, Vector &x) = 0; 527 virtual void compute_tangent_matrix(Matrixt &M, Vector &x) = 0; 528 // compute Bi^T grad(F(X)) Bi 529 virtual void compute_sub_tangent_matrix(Matrixt &Mloc, Vector &x, 530 size_type i) = 0; 531 // compute Bi^T F(X) 532 virtual void compute_sub_F(Vector &fi, Vector &x, size_type i) = 0; 533 534 virtual ~NewtonAS_struct() {} 535 }; 536 537 template <typename Matrixt, typename MatrixBi> 538 struct AS_exact_gradient { 539 const std::vector<MatrixBi> &vB; 540 std::vector<Matrixt> vM; 541 std::vector<Matrixt> vMloc; 542 543 void init(void) { 544 for (size_type i = 0; i < vB.size(); ++i) { 545 Matrixt aux(gmm::mat_ncols(vB[i]), gmm::mat_ncols(vM[i])); 546 gmm::resize(vMloc[i], gmm::mat_ncols(vB[i]), gmm::mat_ncols(vB[i])); 547 gmm::mult(gmm::transposed(vB[i]), vM[i], aux); 548 gmm::mult(aux, vB[i], vMloc[i]); 549 } 550 } 551 AS_exact_gradient(const std::vector<MatrixBi> &vB_) : vB(vB_) { 552 vM.resize(vB.size()); vMloc.resize(vB.size()); 553 for (size_type i = 0; i < vB.size(); ++i) { 554 gmm::resize(vM[i], gmm::mat_nrows(vB[i]), gmm::mat_nrows(vB[i])); 555 } 556 } 557 }; 558 559 template <typename Matrixt, typename MatrixBi, 560 typename Vector2, typename Vector3> 561 void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M, 562 const Vector2 &p, Vector3 &q) { 563 gmm::clear(q); 564 typedef typename gmm::linalg_traits<Vector3>::value_type T; 565 std::vector<T> v(gmm::vect_size(p)), w, x; 566 for (size_type i = 0; i < M.vB.size(); ++i) { 567 w.resize(gmm::mat_ncols(M.vB[i])); 568 x.resize(gmm::mat_ncols(M.vB[i])); 569 gmm::mult(M.vM[i], p, v); 570 gmm::mult(gmm::transposed(M.vB[i]), v, w); 571 double rcond; 572 SuperLU_solve(M.vMloc[i], x, w, rcond); 573 // gmm::iteration iter(1E-10, 0, 100000); 574 //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter); 575 gmm::mult_add(M.vB[i], x, q); 576 } 577 } 578 579 template <typename Matrixt, typename MatrixBi, 580 typename Vector2, typename Vector3> 581 void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M, 582 const Vector2 &p, const Vector3 &q) { 583 mult(M, p, const_cast<Vector3 &>(q)); 584 } 585 586 template <typename Matrixt, typename MatrixBi, 587 typename Vector2, typename Vector3, typename Vector4> 588 void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M, 589 const Vector2 &p, const Vector3 &p2, Vector4 &q) 590 { mult(M, p, q); add(p2, q); } 591 592 template <typename Matrixt, typename MatrixBi, 593 typename Vector2, typename Vector3, typename Vector4> 594 void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M, 595 const Vector2 &p, const Vector3 &p2, const Vector4 &q) 596 { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); } 597 598 struct S_default_newton_line_search { 599 600 double conv_alpha, conv_r; 601 size_t it, itmax, glob_it; 602 603 double alpha, alpha_old, alpha_mult, first_res, alpha_max_ratio; 604 double alpha_min_ratio, alpha_min; 605 size_type count, count_pat; 606 bool max_ratio_reached; 607 double alpha_max_ratio_reached, r_max_ratio_reached; 608 size_type it_max_ratio_reached; 609 610 611 double converged_value(void) { return conv_alpha; }; 612 double converged_residual(void) { return conv_r; }; 613 614 virtual void init_search(double r, size_t git, double = 0.0) { 615 alpha_min_ratio = 0.9; 616 alpha_min = 1e-10; 617 alpha_max_ratio = 10.0; 618 alpha_mult = 0.25; 619 itmax = size_type(-1); 620 glob_it = git; if (git <= 1) count_pat = 0; 621 conv_alpha = alpha = alpha_old = 1.; 622 conv_r = first_res = r; it = 0; 623 count = 0; 624 max_ratio_reached = false; 625 } 626 virtual double next_try(void) { 627 alpha_old = alpha; 628 if (alpha >= 0.4) alpha *= 0.5; else alpha *= alpha_mult; ++it; 629 return alpha_old; 630 } 631 virtual bool is_converged(double r, double = 0.0) { 632 // cout << "r = " << r << " alpha = " << alpha / alpha_mult << " count_pat = " << count_pat << endl; 633 if (!max_ratio_reached && r < first_res * alpha_max_ratio) { 634 alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r; 635 it_max_ratio_reached = it; max_ratio_reached = true; 636 } 637 if (max_ratio_reached && r < r_max_ratio_reached * 0.5 638 && r > first_res * 1.1 && it <= it_max_ratio_reached+1) { 639 alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r; 640 it_max_ratio_reached = it; 641 } 642 if (count == 0 || r < conv_r) 643 { conv_r = r; conv_alpha = alpha_old; count = 1; } 644 if (conv_r < first_res) ++count; 645 646 if (r < first_res * alpha_min_ratio) 647 { count_pat = 0; return true; } 648 if (count >= 5 || (alpha < alpha_min && max_ratio_reached)) { 649 if (conv_r < first_res * 0.99) count_pat = 0; 650 if (/*gmm::random() * 50. < -log(conv_alpha)-4.0 ||*/ count_pat >= 3) 651 { conv_r=r_max_ratio_reached; conv_alpha=alpha_max_ratio_reached; } 652 if (conv_r >= first_res * 0.9999) count_pat++; 653 return true; 654 } 655 return false; 656 } 657 S_default_newton_line_search(void) { count_pat = 0; } 658 }; 659 660 661 662 template <typename Matrixt, typename MatrixBi, typename Vector, 663 typename Precond, typename local_solver, typename global_solver> 664 void Newton_additive_Schwarz(NewtonAS_struct<Matrixt, MatrixBi> &NS, 665 const Vector &u_, 666 iteration &iter, const Precond &P, 667 local_solver, global_solver) { 668 Vector &u = const_cast<Vector &>(u_); 669 typedef typename linalg_traits<Vector>::value_type value_type; 670 typedef typename number_traits<value_type>::magnitude_type mtype; 671 typedef actual_precond<Precond, local_solver, Matrixt> chgt_precond; 672 673 double residual = iter.get_resmax(); 674 675 S_default_newton_line_search internal_ls; 676 S_default_newton_line_search external_ls; 677 678 typename chgt_precond::APrecond PP = chgt_precond::transform(P); 679 iter.set_rhsnorm(mtype(1)); 680 iteration iternc(iter); 681 iternc.reduce_noisy(); iternc.set_maxiter(size_type(-1)); 682 iteration iter2(iternc); 683 iteration iter3(iter2); iter3.reduce_noisy(); 684 iteration iter4(iter3); 685 iternc.set_name("Local Newton"); 686 iter2.set_name("Linear System for Global Newton"); 687 iternc.set_resmax(residual/100.0); 688 iter3.set_resmax(residual/10000.0); 689 iter2.set_resmax(residual/1000.0); 690 iter4.set_resmax(residual/1000.0); 691 std::vector<value_type> rhs(NS.size()), x(NS.size()), d(NS.size()); 692 std::vector<value_type> xi, xii, fi, di; 693 694 std::vector< std::vector<value_type> > vx(NS.get_vB().size()); 695 for (size_type i = 0; i < NS.get_vB().size(); ++i) // for exact gradient 696 vx[i].resize(NS.size()); // for exact gradient 697 698 Matrixt Mloc, M(NS.size(), NS.size()); 699 NS.compute_F(rhs, u); 700 mtype act_res=gmm::vect_norm2(rhs), act_res_new(0), precond_res = act_res; 701 mtype alpha; 702 703 while(!iter.finished(std::min(act_res, precond_res))) { 704 for (int SOR_step = 0; SOR_step >= 0; --SOR_step) { 705 gmm::clear(rhs); 706 for (size_type isd = 0; isd < NS.get_vB().size(); ++isd) { 707 const MatrixBi &Bi = (NS.get_vB())[isd]; 708 size_type si = mat_ncols(Bi); 709 gmm::resize(Mloc, si, si); 710 xi.resize(si); xii.resize(si); fi.resize(si); di.resize(si); 711 712 iternc.init(); 713 iternc.set_maxiter(30); // ? 714 if (iternc.get_noisy()) 715 cout << "Non-linear local problem " << isd << endl; 716 gmm::clear(xi); 717 gmm::copy(u, x); 718 NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1)); 719 mtype r = gmm::vect_norm2(fi), r_t(r); 720 if (r > value_type(0)) { 721 iternc.set_rhsnorm(std::max(r, mtype(1))); 722 while(!iternc.finished(r)) { 723 NS.compute_sub_tangent_matrix(Mloc, x, isd); 724 725 PP.build_with(Mloc); 726 iter3.init(); 727 AS_local_solve(local_solver(), Mloc, di, fi, PP, iter3); 728 729 internal_ls.init_search(r, iternc.get_iteration()); 730 do { 731 alpha = internal_ls.next_try(); 732 gmm::add(xi, gmm::scaled(di, -alpha), xii); 733 gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x); 734 NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1)); 735 r_t = gmm::vect_norm2(fi); 736 } while (!internal_ls.is_converged(r_t)); 737 738 if (alpha != internal_ls.converged_value()) { 739 alpha = internal_ls.converged_value(); 740 gmm::add(xi, gmm::scaled(di, -alpha), xii); 741 gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x); 742 NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1)); 743 r_t = gmm::vect_norm2(fi); 744 } 745 gmm::copy(x, vx[isd]); // for exact gradient 746 747 if (iternc.get_noisy()) cout << "(step=" << alpha << ")\t"; 748 ++iternc; r = r_t; gmm::copy(xii, xi); 749 } 750 if (SOR_step) gmm::mult(Bi, gmm::scaled(xii, -1.0), u, u); 751 gmm::mult(Bi, gmm::scaled(xii, -1.0), rhs, rhs); 752 } 753 } 754 precond_res = gmm::vect_norm2(rhs); 755 if (SOR_step) cout << "SOR step residual = " << precond_res << endl; 756 if (precond_res < residual) break; 757 cout << "Precond residual = " << precond_res << endl; 758 } 759 760 iter2.init(); 761 // solving linear system for the global Newton method 762 if (0) { 763 NS.compute_tangent_matrix(M, u); 764 add_schwarz_mat<Matrixt, MatrixBi, Precond, local_solver> 765 ASM(M, NS.get_vB(), iter4, P, iter.get_resmax()); 766 AS_global_solve(global_solver(), ASM, d, rhs, iter2); 767 } 768 else { // for exact gradient 769 AS_exact_gradient<Matrixt, MatrixBi> eg(NS.get_vB()); 770 for (size_type i = 0; i < NS.get_vB().size(); ++i) { 771 NS.compute_tangent_matrix(eg.vM[i], vx[i]); 772 } 773 eg.init(); 774 gmres(eg, d, rhs, gmm::identity_matrix(), 50, iter2); 775 } 776 777 // gmm::add(gmm::scaled(rhs, 0.1), u); ++iter; 778 external_ls.init_search(act_res, iter.get_iteration()); 779 do { 780 alpha = external_ls.next_try(); 781 gmm::add(gmm::scaled(d, alpha), u, x); 782 NS.compute_F(rhs, x); 783 act_res_new = gmm::vect_norm2(rhs); 784 } while (!external_ls.is_converged(act_res_new)); 785 786 if (alpha != external_ls.converged_value()) { 787 alpha = external_ls.converged_value(); 788 gmm::add(gmm::scaled(d, alpha), u, x); 789 NS.compute_F(rhs, x); 790 act_res_new = gmm::vect_norm2(rhs); 791 } 792 793 if (iter.get_noisy() > 1) cout << endl; 794 act_res = act_res_new; 795 if (iter.get_noisy()) cout << "(step=" << alpha << ")\t unprecond res = " << act_res << " "; 796 797 798 ++iter; gmm::copy(x, u); 799 } 800 } 801 802 } 803 804 805 #endif // GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ 806