1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2002-2020 Yves Renard
5 
6  This file is a part of GetFEM
7 
8  GetFEM  is  free software;  you  can  redistribute  it  and/or modify it
9  under  the  terms  of the  GNU  Lesser General Public License as published
10  by  the  Free Software Foundation;  either version 3 of the License,  or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program  is  distributed  in  the  hope  that it will be useful,  but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You  should  have received a copy of the GNU Lesser General Public License
18  along  with  this program;  if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
20 
21  As a special exception, you  may use  this file  as it is a part of a free
22  software  library  without  restriction.  Specifically,  if   other  files
23  instantiate  templates  or  use macros or inline functions from this file,
24  or  you compile this  file  and  link  it  with other files  to produce an
25  executable, this file  does  not  by itself cause the resulting executable
26  to be covered  by the GNU Lesser General Public License.  This   exception
27  does not  however  invalidate  any  other  reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file gmm_solver_Schwarz_additive.h
33    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
34    @author  Michel Fournie <fournie@mip.ups-tlse.fr>
35    @date October 13, 2002.
36 */
37 
38 #ifndef GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
39 #define GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
40 
41 #include "gmm_kernel.h"
42 #include "gmm_superlu_interface.h"
43 #include "gmm_solver_cg.h"
44 #include "gmm_solver_gmres.h"
45 #include "gmm_solver_bicgstab.h"
46 #include "gmm_solver_qmr.h"
47 
48 namespace gmm {
49 
50   /* ******************************************************************** */
51   /*		Additive Schwarz interfaced local solvers                 */
52   /* ******************************************************************** */
53 
54   struct using_cg {};
55   struct using_gmres {};
56   struct using_bicgstab {};
57   struct using_qmr {};
58 
59   template <typename P, typename local_solver, typename Matrix>
60   struct actual_precond {
61     typedef P APrecond;
transformactual_precond62     static const APrecond &transform(const P &PP) { return PP; }
63   };
64 
65   template <typename Matrix1, typename Precond, typename Vector>
AS_local_solve(using_cg,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)66   void AS_local_solve(using_cg, const Matrix1 &A, Vector &x, const Vector &b,
67 		 const Precond &P, iteration &iter)
68   { cg(A, x, b, P, iter); }
69 
70   template <typename Matrix1, typename Precond, typename Vector>
AS_local_solve(using_gmres,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)71   void AS_local_solve(using_gmres, const Matrix1 &A, Vector &x,
72 		      const Vector &b, const Precond &P, iteration &iter)
73   { gmres(A, x, b, P, 100, iter); }
74 
75   template <typename Matrix1, typename Precond, typename Vector>
AS_local_solve(using_bicgstab,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)76   void AS_local_solve(using_bicgstab, const Matrix1 &A, Vector &x,
77 		      const Vector &b, const Precond &P, iteration &iter)
78   { bicgstab(A, x, b, P, iter); }
79 
80   template <typename Matrix1, typename Precond, typename Vector>
AS_local_solve(using_qmr,const Matrix1 & A,Vector & x,const Vector & b,const Precond & P,iteration & iter)81   void AS_local_solve(using_qmr, const Matrix1 &A, Vector &x,
82 		      const Vector &b, const Precond &P, iteration &iter)
83   { qmr(A, x, b, P, iter); }
84 
85 #if defined(GMM_USES_SUPERLU)
86   struct using_superlu {};
87 
88   template <typename P, typename Matrix>
89   struct actual_precond<P, using_superlu, Matrix> {
90     typedef typename linalg_traits<Matrix>::value_type value_type;
91     typedef SuperLU_factor<value_type> APrecond;
92     template <typename PR>
93     static APrecond transform(const PR &) { return APrecond(); }
94     static const APrecond &transform(const APrecond &PP) { return PP; }
95   };
96 
97   template <typename Matrix1, typename Precond, typename Vector>
98   void AS_local_solve(using_superlu, const Matrix1 &, Vector &x,
99 		      const Vector &b, const Precond &P, iteration &iter)
100   { P.solve(x, b); iter.set_iteration(1); }
101 #endif
102 
103   /* ******************************************************************** */
104   /*		Additive Schwarz Linear system                            */
105   /* ******************************************************************** */
106 
107   template <typename Matrix1, typename Matrix2, typename Precond,
108 	    typename local_solver>
109   struct add_schwarz_mat{
110     typedef typename linalg_traits<Matrix1>::value_type value_type;
111 
112     const Matrix1 *A;
113     const std::vector<Matrix2> *vB;
114     std::vector<Matrix2> vAloc;
115     mutable iteration iter;
116     double residual;
117     mutable size_type itebilan;
118     mutable std::vector<std::vector<value_type> > gi, fi;
119     std::vector<typename actual_precond<Precond, local_solver,
120 					Matrix1>::APrecond> precond1;
121 
122     void init(const Matrix1 &A_, const std::vector<Matrix2> &vB_,
123 	      iteration iter_, const Precond &P, double residual_);
124 
125     add_schwarz_mat(void) {}
126     add_schwarz_mat(const Matrix1 &A_, const std::vector<Matrix2> &vB_,
127 		iteration iter_, const Precond &P, double residual_)
128     { init(A_, vB_, iter_, P, residual_); }
129   };
130 
131   template <typename Matrix1, typename Matrix2, typename Precond,
132 	    typename local_solver>
133   void add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>::init(
134        const Matrix1 &A_, const std::vector<Matrix2> &vB_,
135        iteration iter_, const Precond &P, double residual_) {
136 
137     vB = &vB_; A = &A_; iter = iter_;
138     residual = residual_;
139 
140     size_type nb_sub = vB->size();
141     vAloc.resize(nb_sub);
142     gi.resize(nb_sub); fi.resize(nb_sub);
143     precond1.resize(nb_sub);
144     std::fill(precond1.begin(), precond1.end(),
145 	      actual_precond<Precond, local_solver, Matrix1>::transform(P));
146     itebilan = 0;
147 
148     if (iter.get_noisy()) cout << "Init pour sub dom ";
149 #ifdef GMM_USES_MPI
150     int size,tranche,borne_sup,borne_inf,rank,tag1=11,tag2=12,tag3=13,sizepr = 0;
151     //    int tab[4];
152     double t_ref,t_final;
153     MPI_Status status;
154     t_ref=MPI_Wtime();
155     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
156     MPI_Comm_size(MPI_COMM_WORLD, &size);
157     tranche=nb_sub/size;
158     borne_inf=rank*tranche;
159     borne_sup=(rank+1)*tranche;
160     // if (rank==size-1) borne_sup = nb_sub;
161 
162     cout << "Nombre de sous domaines " << borne_sup - borne_inf << endl;
163 
164     int sizeA = mat_nrows(*A);
165     gmm::csr_matrix<value_type> Acsr(sizeA, sizeA), Acsrtemp(sizeA, sizeA);
166     gmm::copy(gmm::eff_matrix(*A), Acsr);
167     int next = (rank + 1) % size;
168     int previous = (rank + size - 1) % size;
169     //communication of local information on ring pattern
170     //Each process receive  Nproc-1 contributions
171 
172     for (int nproc = 0; nproc < size; ++nproc) {
173        for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) {
174 // 	for (size_type i = 0; i < nb_sub/size; ++i) {
175 // 	for (size_type i = 0; i < nb_sub; ++i) {
176 	// size_type i=(rank+size*(j-1)+nb_sub)%nb_sub;
177 
178 	cout << "Sous domaines " << i << " : " << mat_ncols((*vB)[i]) << endl;
179 #else
180 	for (size_type i = 0; i < nb_sub; ++i) {
181 #endif
182 
183 	  if (iter.get_noisy()) cout << i << " " << std::flush;
184 	  Matrix2 Maux(mat_ncols((*vB)[i]), mat_nrows((*vB)[i]));
185 
186 #ifdef GMM_USES_MPI
187 	  Matrix2 Maux2(mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
188 	  if (nproc == 0) {
189 	    gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
190 	    gmm::clear(vAloc[i]);
191 	  }
192 	  gmm::mult(gmm::transposed((*vB)[i]), Acsr, Maux);
193 	  gmm::mult(Maux, (*vB)[i], Maux2);
194 	  gmm::add(Maux2, vAloc[i]);
195 #else
196 	  gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
197 	  gmm::mult(gmm::transposed((*vB)[i]), *A, Maux);
198 	  gmm::mult(Maux, (*vB)[i], vAloc[i]);
199 #endif
200 
201 #ifdef GMM_USES_MPI
202 	  if (nproc == size - 1 ) {
203 #endif
204 	    precond1[i].build_with(vAloc[i]);
205 	    gmm::resize(fi[i], mat_ncols((*vB)[i]));
206 	    gmm::resize(gi[i], mat_ncols((*vB)[i]));
207 #ifdef GMM_USES_MPI
208 	  }
209 #else
210 	}
211 #endif
212 #ifdef GMM_USES_MPI
213      }
214       if (nproc != size - 1) {
215         MPI_Sendrecv(&(Acsr.jc[0]), sizeA+1, MPI_INT, next, tag2,
216                      &(Acsrtemp.jc[0]), sizeA+1, MPI_INT, previous, tag2,
217                      MPI_COMM_WORLD, &status);
218         if (Acsrtemp.jc[sizeA] > size_type(sizepr)) {
219           sizepr = Acsrtemp.jc[sizeA];
220           gmm::resize(Acsrtemp.pr, sizepr);
221           gmm::resize(Acsrtemp.ir, sizepr);
222         }
223         MPI_Sendrecv(&(Acsr.ir[0]), Acsr.jc[sizeA], MPI_INT, next, tag1,
224                      &(Acsrtemp.ir[0]), Acsrtemp.jc[sizeA], MPI_INT, previous, tag1,
225                      MPI_COMM_WORLD, &status);
226 
227         MPI_Sendrecv(&(Acsr.pr[0]), Acsr.jc[sizeA], mpi_type(value_type()), next, tag3,
228                      &(Acsrtemp.pr[0]), Acsrtemp.jc[sizeA], mpi_type(value_type()), previous, tag3,
229                      MPI_COMM_WORLD, &status);
230         gmm::copy(Acsrtemp, Acsr);
231       }
232     }
233       t_final=MPI_Wtime();
234     cout<<"temps boucle precond "<< t_final-t_ref<<endl;
235 #endif
236     if (iter.get_noisy()) cout << "\n";
237   }
238 
239   template <typename Matrix1, typename Matrix2, typename Precond,
240 	    typename Vector2, typename Vector3, typename local_solver>
241   void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
242 	    const Vector2 &p, Vector3 &q) {
243     size_type itebilan = 0;
244 #ifdef GMM_USES_MPI
245     static double tmult_tot = 0.0;
246     double t_ref = MPI_Wtime();
247 #endif
248     // cout << "tmult AS begin " << endl;
249     mult(*(M.A), p, q);
250 #ifdef GMM_USES_MPI
251     tmult_tot += MPI_Wtime()-t_ref;
252     cout << "tmult_tot = " << tmult_tot << endl;
253 #endif
254     std::vector<double> qbis(gmm::vect_size(q));
255     std::vector<double> qter(gmm::vect_size(q));
256 #ifdef GMM_USES_MPI
257     //    MPI_Status status;
258     //    MPI_Request request,request1;
259     //    int tag=111;
260     int size,tranche,borne_sup,borne_inf,rank;
261     size_type nb_sub=M.fi.size();
262     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
263     MPI_Comm_size(MPI_COMM_WORLD, &size);
264     tranche=nb_sub/size;
265     borne_inf=rank*tranche;
266     borne_sup=(rank+1)*tranche;
267     // if (rank==size-1) borne_sup=nb_sub;
268     //    int next = (rank + 1) % size;
269     //    int previous = (rank + size - 1) % size;
270     t_ref = MPI_Wtime();
271      for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
272 //        for (size_type i = 0; i < nb_sub/size; ++i)
273       // for (size_type j = 0; j < nb_sub; ++j)
274 #else
275     for (size_type i = 0; i < M.fi.size(); ++i)
276 #endif
277       {
278 #ifdef GMM_USES_MPI
279 	// size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
280 #endif
281 	gmm::mult(gmm::transposed((*(M.vB))[i]), q, M.fi[i]);
282        M.iter.init();
283        AS_local_solve(local_solver(), (M.vAloc)[i], (M.gi)[i],
284 		      (M.fi)[i],(M.precond1)[i],M.iter);
285        itebilan = std::max(itebilan, M.iter.get_iteration());
286        }
287 
288 #ifdef GMM_USES_MPI
289     cout << "First  AS loop time " <<  MPI_Wtime() - t_ref << endl;
290 #endif
291 
292     gmm::clear(q);
293 #ifdef GMM_USES_MPI
294     t_ref = MPI_Wtime();
295     // for (size_type j = 0; j < nb_sub; ++j)
296     for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
297 
298 #else
299       for (size_type i = 0; i < M.gi.size(); ++i)
300 #endif
301 	{
302 
303 #ifdef GMM_USES_MPI
304 	  // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
305 // 	  gmm::mult((*(M.vB))[i], M.gi[i], qbis,qbis);
306 	  gmm::mult((*(M.vB))[i], M.gi[i], qter);
307 	  add(qter,qbis,qbis);
308 #else
309 	  gmm::mult((*(M.vB))[i], M.gi[i], q, q);
310 #endif
311 	}
312 #ifdef GMM_USES_MPI
313      //WARNING this add only if you use the ring pattern below
314   // need to do this below if using a n explicit ring pattern communication
315 
316 //      add(qbis,q,q);
317     cout << "Second AS loop time " <<  MPI_Wtime() - t_ref << endl;
318 #endif
319 
320 
321 #ifdef GMM_USES_MPI
322     //    int tag1=11;
323     static double t_tot = 0.0;
324     double t_final;
325     t_ref=MPI_Wtime();
326 //     int next = (rank + 1) % size;
327 //     int previous = (rank + size - 1) % size;
328     //communication of local information on ring pattern
329     //Each process receive  Nproc-1 contributions
330 
331 //     if (size > 1) {
332 //     for (int nproc = 0; nproc < size-1; ++nproc)
333 //       {
334 
335 // 	MPI_Sendrecv(&(qbis[0]), gmm::vect_size(q), MPI_DOUBLE, next, tag1,
336 // 		   &(qter[0]), gmm::vect_size(q),MPI_DOUBLE,previous,tag1,
337 // 		   MPI_COMM_WORLD,&status);
338 // 	gmm::copy(qter, qbis);
339 // 	add(qbis,q,q);
340 //       }
341 //     }
342     MPI_Allreduce(&(qbis[0]), &(q[0]),gmm::vect_size(q), MPI_DOUBLE,
343 		  MPI_SUM,MPI_COMM_WORLD);
344     t_final=MPI_Wtime();
345     t_tot += t_final-t_ref;
346      cout<<"["<< rank<<"] temps reduce Resol "<< t_final-t_ref << " t_tot = " << t_tot << endl;
347 #endif
348 
349     if (M.iter.get_noisy() > 0) cout << "itebloc = " << itebilan << endl;
350     M.itebilan += itebilan;
351     M.iter.set_resmax((M.iter.get_resmax() + M.residual) * 0.5);
352   }
353 
354   template <typename Matrix1, typename Matrix2, typename Precond,
355 	    typename Vector2, typename Vector3, typename local_solver>
356   void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
357 	    const Vector2 &p, const Vector3 &q) {
358     mult(M, p, const_cast<Vector3 &>(q));
359   }
360 
361   template <typename Matrix1, typename Matrix2, typename Precond,
362 	    typename Vector2, typename Vector3, typename Vector4,
363 	    typename local_solver>
364   void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
365 	    const Vector2 &p, const Vector3 &p2, Vector4 &q)
366   { mult(M, p, q); add(p2, q); }
367 
368   template <typename Matrix1, typename Matrix2, typename Precond,
369 	    typename Vector2, typename Vector3, typename Vector4,
370 	    typename local_solver>
371   void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
372 	    const Vector2 &p, const Vector3 &p2, const Vector4 &q)
373   { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); }
374 
375   /* ******************************************************************** */
376   /*		Additive Schwarz interfaced global solvers                */
377   /* ******************************************************************** */
378 
379   template <typename ASM_type, typename Vect>
380   void AS_global_solve(using_cg, const ASM_type &ASM, Vect &x,
381 		       const Vect &b, iteration &iter)
382   { cg(ASM, x, b, *(ASM.A), identity_matrix(), iter); }
383 
384   template <typename ASM_type, typename Vect>
385   void AS_global_solve(using_gmres, const ASM_type &ASM, Vect &x,
386 		       const Vect &b, iteration &iter)
387   { gmres(ASM, x, b, identity_matrix(), 100, iter); }
388 
389   template <typename ASM_type, typename Vect>
390   void AS_global_solve(using_bicgstab, const ASM_type &ASM, Vect &x,
391 		       const Vect &b, iteration &iter)
392   { bicgstab(ASM, x, b, identity_matrix(), iter); }
393 
394   template <typename ASM_type, typename Vect>
395   void AS_global_solve(using_qmr,const ASM_type &ASM, Vect &x,
396 		       const Vect &b, iteration &iter)
397   { qmr(ASM, x, b, identity_matrix(), iter); }
398 
399 #if defined(GMM_USES_SUPERLU)
400   template <typename ASM_type, typename Vect>
401   void AS_global_solve(using_superlu, const ASM_type &, Vect &,
402 		       const Vect &, iteration &) {
403     GMM_ASSERT1(false, "You cannot use SuperLU as "
404 		"global solver in additive Schwarz meethod");
405   }
406 #endif
407 
408   /* ******************************************************************** */
409   /*	            Linear Additive Schwarz method                        */
410   /* ******************************************************************** */
411   /* ref : Domain decomposition algorithms for the p-version finite       */
412   /*       element method for elliptic problems, Luca F. Pavarino,        */
413   /*       PhD thesis, Courant Institute of Mathematical Sciences, 1992.  */
414   /* ******************************************************************** */
415 
416   /** Function to call if the ASM matrix is precomputed for successive solve
417    * with the same system.
418    */
419   template <typename Matrix1, typename Matrix2,
420 	    typename Vector2, typename Vector3, typename Precond,
421 	    typename local_solver, typename global_solver>
422   void additive_schwarz(
423     add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &ASM, Vector3 &u,
424     const Vector2 &f, iteration &iter, const global_solver&) {
425 
426     typedef typename linalg_traits<Matrix1>::value_type value_type;
427 
428     size_type nb_sub = ASM.vB->size(), nb_dof = gmm::vect_size(f);
429     ASM.itebilan = 0;
430     std::vector<value_type> g(nb_dof);
431     std::vector<value_type> gbis(nb_dof);
432 #ifdef GMM_USES_MPI
433     double t_init=MPI_Wtime();
434     int size,tranche,borne_sup,borne_inf,rank;
435     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
436     MPI_Comm_size(MPI_COMM_WORLD, &size);
437     tranche=nb_sub/size;
438     borne_inf=rank*tranche;
439     borne_sup=(rank+1)*tranche;
440     // if (rank==size-1) borne_sup=nb_sub*size;
441     for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
442 //     for (size_type i = 0; i < nb_sub/size; ++i)
443       // for (size_type j = 0; j < nb_sub; ++j)
444       // for (size_type i = rank; i < nb_sub; i+=size)
445 #else
446     for (size_type i = 0; i < nb_sub; ++i)
447 #endif
448     {
449 
450 #ifdef GMM_USES_MPI
451       // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
452 #endif
453       gmm::mult(gmm::transposed((*(ASM.vB))[i]), f, ASM.fi[i]);
454       ASM.iter.init();
455       AS_local_solve(local_solver(), ASM.vAloc[i], ASM.gi[i], ASM.fi[i],
456 		     ASM.precond1[i], ASM.iter);
457       ASM.itebilan = std::max(ASM.itebilan, ASM.iter.get_iteration());
458 #ifdef GMM_USES_MPI
459     gmm::mult((*(ASM.vB))[i], ASM.gi[i], gbis,gbis);
460 #else
461     gmm::mult((*(ASM.vB))[i], ASM.gi[i], g, g);
462 #endif
463     }
464 #ifdef GMM_USES_MPI
465     cout<<"temps boucle init "<< MPI_Wtime()-t_init<<endl;
466     double t_ref,t_final;
467     t_ref=MPI_Wtime();
468     MPI_Allreduce(&(gbis[0]), &(g[0]),gmm::vect_size(g), MPI_DOUBLE,
469 		  MPI_SUM,MPI_COMM_WORLD);
470     t_final=MPI_Wtime();
471     cout<<"temps reduce init "<< t_final-t_ref<<endl;
472 #endif
473 #ifdef GMM_USES_MPI
474     t_ref=MPI_Wtime();
475     cout<<"begin global AS"<<endl;
476 #endif
477     AS_global_solve(global_solver(), ASM, u, g, iter);
478 #ifdef GMM_USES_MPI
479     t_final=MPI_Wtime();
480     cout<<"temps AS Global Solve "<< t_final-t_ref<<endl;
481 #endif
482     if (iter.get_noisy())
483       cout << "Total number of internal iterations : " << ASM.itebilan << endl;
484   }
485 
486   /** Global function. Compute the ASM matrix and call the previous function.
487    *  The ASM matrix represent the preconditionned linear system.
488    */
489   template <typename Matrix1, typename Matrix2,
490 	    typename Vector2, typename Vector3, typename Precond,
491 	    typename local_solver, typename global_solver>
492   void additive_schwarz(const Matrix1 &A, Vector3 &u,
493 				  const Vector2 &f, const Precond &P,
494 				  const std::vector<Matrix2> &vB,
495 				  iteration &iter, local_solver,
496 				  global_solver) {
497     iter.set_rhsnorm(vect_norm2(f));
498     if (iter.get_rhsnorm() == 0.0) { gmm::clear(u); return; }
499     iteration iter2 = iter; iter2.reduce_noisy();
500     iter2.set_maxiter(size_type(-1));
501     add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>
502       ASM(A, vB, iter2, P, iter.get_resmax());
503     additive_schwarz(ASM, u, f, iter, global_solver());
504   }
505 
506   /* ******************************************************************** */
507   /*		Sequential Non-Linear Additive Schwarz method             */
508   /* ******************************************************************** */
509   /* ref : Nonlinearly Preconditionned Inexact Newton Algorithms,         */
510   /*       Xiao-Chuan Cai, David E. Keyes,                                */
511   /*       SIAM J. Sci. Comp. 24: p183-200.  l                             */
512   /* ******************************************************************** */
513 
514   template <typename Matrixt, typename MatrixBi>
515   class NewtonAS_struct {
516 
517   public :
518     typedef Matrixt tangent_matrix_type;
519     typedef MatrixBi B_matrix_type;
520     typedef typename linalg_traits<Matrixt>::value_type value_type;
521     typedef std::vector<value_type> Vector;
522 
523     virtual size_type size(void) = 0;
524     virtual const std::vector<MatrixBi> &get_vB() = 0;
525 
526     virtual void compute_F(Vector &f, Vector &x) = 0;
527     virtual void compute_tangent_matrix(Matrixt &M, Vector &x) = 0;
528     // compute Bi^T grad(F(X)) Bi
529     virtual void compute_sub_tangent_matrix(Matrixt &Mloc, Vector &x,
530 					    size_type i) = 0;
531     // compute Bi^T F(X)
532     virtual void compute_sub_F(Vector &fi, Vector &x, size_type i) = 0;
533 
534     virtual ~NewtonAS_struct() {}
535   };
536 
537   template <typename Matrixt, typename MatrixBi>
538   struct AS_exact_gradient {
539     const std::vector<MatrixBi> &vB;
540     std::vector<Matrixt> vM;
541     std::vector<Matrixt> vMloc;
542 
543     void init(void) {
544       for (size_type i = 0; i < vB.size(); ++i) {
545 	Matrixt aux(gmm::mat_ncols(vB[i]), gmm::mat_ncols(vM[i]));
546 	gmm::resize(vMloc[i], gmm::mat_ncols(vB[i]), gmm::mat_ncols(vB[i]));
547 	gmm::mult(gmm::transposed(vB[i]), vM[i], aux);
548 	gmm::mult(aux, vB[i], vMloc[i]);
549       }
550     }
551     AS_exact_gradient(const std::vector<MatrixBi> &vB_) : vB(vB_) {
552       vM.resize(vB.size()); vMloc.resize(vB.size());
553       for (size_type i = 0; i < vB.size(); ++i) {
554 	gmm::resize(vM[i], gmm::mat_nrows(vB[i]), gmm::mat_nrows(vB[i]));
555       }
556     }
557   };
558 
559   template <typename Matrixt, typename MatrixBi,
560 	    typename Vector2, typename Vector3>
561   void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
562 	    const Vector2 &p, Vector3 &q) {
563     gmm::clear(q);
564     typedef typename gmm::linalg_traits<Vector3>::value_type T;
565     std::vector<T> v(gmm::vect_size(p)), w, x;
566     for (size_type i = 0; i < M.vB.size(); ++i) {
567       w.resize(gmm::mat_ncols(M.vB[i]));
568       x.resize(gmm::mat_ncols(M.vB[i]));
569       gmm::mult(M.vM[i], p, v);
570       gmm::mult(gmm::transposed(M.vB[i]), v, w);
571       double rcond;
572       SuperLU_solve(M.vMloc[i], x, w, rcond);
573       // gmm::iteration iter(1E-10, 0, 100000);
574       //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter);
575       gmm::mult_add(M.vB[i], x, q);
576     }
577   }
578 
579   template <typename Matrixt, typename MatrixBi,
580 	    typename Vector2, typename Vector3>
581   void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
582 	    const Vector2 &p, const Vector3 &q) {
583     mult(M, p, const_cast<Vector3 &>(q));
584   }
585 
586   template <typename Matrixt, typename MatrixBi,
587 	    typename Vector2, typename Vector3, typename Vector4>
588   void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
589 	    const Vector2 &p, const Vector3 &p2, Vector4 &q)
590   { mult(M, p, q); add(p2, q); }
591 
592   template <typename Matrixt, typename MatrixBi,
593 	    typename Vector2, typename Vector3, typename Vector4>
594   void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
595 	    const Vector2 &p, const Vector3 &p2, const Vector4 &q)
596   { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); }
597 
598   struct S_default_newton_line_search {
599 
600     double conv_alpha, conv_r;
601     size_t it, itmax, glob_it;
602 
603     double alpha, alpha_old, alpha_mult, first_res, alpha_max_ratio;
604     double alpha_min_ratio, alpha_min;
605     size_type count, count_pat;
606     bool max_ratio_reached;
607     double alpha_max_ratio_reached, r_max_ratio_reached;
608     size_type it_max_ratio_reached;
609 
610 
611     double converged_value(void) { return conv_alpha; };
612     double converged_residual(void) { return conv_r; };
613 
614     virtual void init_search(double r, size_t git, double = 0.0) {
615       alpha_min_ratio = 0.9;
616       alpha_min = 1e-10;
617       alpha_max_ratio = 10.0;
618       alpha_mult = 0.25;
619       itmax = size_type(-1);
620       glob_it = git; if (git <= 1) count_pat = 0;
621       conv_alpha = alpha = alpha_old = 1.;
622       conv_r = first_res = r; it = 0;
623       count = 0;
624       max_ratio_reached = false;
625     }
626     virtual double next_try(void) {
627       alpha_old = alpha;
628       if (alpha >= 0.4) alpha *= 0.5; else alpha *= alpha_mult; ++it;
629       return alpha_old;
630     }
631     virtual bool is_converged(double r, double = 0.0) {
632       // cout << "r = " << r << " alpha = " << alpha / alpha_mult << " count_pat = " << count_pat << endl;
633       if (!max_ratio_reached && r < first_res * alpha_max_ratio) {
634 	alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r;
635 	it_max_ratio_reached = it; max_ratio_reached = true;
636       }
637       if (max_ratio_reached && r < r_max_ratio_reached * 0.5
638 	  && r > first_res * 1.1 && it <= it_max_ratio_reached+1) {
639 	alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r;
640 	it_max_ratio_reached = it;
641       }
642       if (count == 0 || r < conv_r)
643 	{ conv_r = r; conv_alpha = alpha_old; count = 1; }
644       if (conv_r < first_res) ++count;
645 
646       if (r < first_res *  alpha_min_ratio)
647 	{ count_pat = 0; return true; }
648       if (count >= 5 || (alpha < alpha_min && max_ratio_reached)) {
649 	if (conv_r < first_res * 0.99) count_pat = 0;
650 	if (/*gmm::random() * 50. < -log(conv_alpha)-4.0 ||*/ count_pat >= 3)
651 	  { conv_r=r_max_ratio_reached; conv_alpha=alpha_max_ratio_reached; }
652 	if (conv_r >= first_res * 0.9999) count_pat++;
653 	return true;
654       }
655       return false;
656     }
657     S_default_newton_line_search(void) { count_pat = 0; }
658   };
659 
660 
661 
662   template <typename Matrixt, typename MatrixBi, typename Vector,
663 	    typename Precond, typename local_solver, typename global_solver>
664   void Newton_additive_Schwarz(NewtonAS_struct<Matrixt, MatrixBi> &NS,
665 			       const Vector &u_,
666 			       iteration &iter, const Precond &P,
667 			       local_solver, global_solver) {
668     Vector &u = const_cast<Vector &>(u_);
669     typedef typename linalg_traits<Vector>::value_type value_type;
670     typedef typename number_traits<value_type>::magnitude_type mtype;
671     typedef actual_precond<Precond, local_solver, Matrixt> chgt_precond;
672 
673     double residual = iter.get_resmax();
674 
675     S_default_newton_line_search internal_ls;
676     S_default_newton_line_search external_ls;
677 
678     typename chgt_precond::APrecond PP = chgt_precond::transform(P);
679     iter.set_rhsnorm(mtype(1));
680     iteration iternc(iter);
681     iternc.reduce_noisy(); iternc.set_maxiter(size_type(-1));
682     iteration iter2(iternc);
683     iteration iter3(iter2); iter3.reduce_noisy();
684     iteration iter4(iter3);
685     iternc.set_name("Local Newton");
686     iter2.set_name("Linear System for Global Newton");
687     iternc.set_resmax(residual/100.0);
688     iter3.set_resmax(residual/10000.0);
689     iter2.set_resmax(residual/1000.0);
690     iter4.set_resmax(residual/1000.0);
691     std::vector<value_type> rhs(NS.size()), x(NS.size()), d(NS.size());
692     std::vector<value_type> xi, xii, fi, di;
693 
694     std::vector< std::vector<value_type> > vx(NS.get_vB().size());
695     for (size_type i = 0; i < NS.get_vB().size(); ++i) // for exact gradient
696       vx[i].resize(NS.size()); // for exact gradient
697 
698     Matrixt Mloc, M(NS.size(), NS.size());
699     NS.compute_F(rhs, u);
700     mtype act_res=gmm::vect_norm2(rhs), act_res_new(0), precond_res = act_res;
701     mtype alpha;
702 
703     while(!iter.finished(std::min(act_res, precond_res))) {
704       for (int SOR_step = 0;  SOR_step >= 0; --SOR_step) {
705 	gmm::clear(rhs);
706 	for (size_type isd = 0; isd < NS.get_vB().size(); ++isd) {
707 	  const MatrixBi &Bi = (NS.get_vB())[isd];
708 	  size_type si = mat_ncols(Bi);
709 	  gmm::resize(Mloc, si, si);
710 	  xi.resize(si); xii.resize(si); fi.resize(si); di.resize(si);
711 
712 	  iternc.init();
713 	  iternc.set_maxiter(30); // ?
714 	  if (iternc.get_noisy())
715 	    cout << "Non-linear local problem " << isd << endl;
716 	  gmm::clear(xi);
717 	  gmm::copy(u, x);
718 	  NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
719 	  mtype r = gmm::vect_norm2(fi), r_t(r);
720 	  if (r > value_type(0)) {
721 	    iternc.set_rhsnorm(std::max(r, mtype(1)));
722 	    while(!iternc.finished(r)) {
723 	      NS.compute_sub_tangent_matrix(Mloc, x, isd);
724 
725 	      PP.build_with(Mloc);
726 	      iter3.init();
727 	      AS_local_solve(local_solver(), Mloc, di, fi, PP, iter3);
728 
729 	      internal_ls.init_search(r, iternc.get_iteration());
730 	      do {
731 		alpha = internal_ls.next_try();
732 		gmm::add(xi, gmm::scaled(di, -alpha), xii);
733 		gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x);
734 		NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
735 		r_t = gmm::vect_norm2(fi);
736 	      } while (!internal_ls.is_converged(r_t));
737 
738 	      if (alpha != internal_ls.converged_value()) {
739 		alpha = internal_ls.converged_value();
740 		gmm::add(xi, gmm::scaled(di, -alpha), xii);
741 		gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x);
742 		NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
743 		r_t = gmm::vect_norm2(fi);
744 	      }
745 	      gmm::copy(x, vx[isd]); // for exact gradient
746 
747 	      if (iternc.get_noisy()) cout << "(step=" << alpha << ")\t";
748 	      ++iternc; r = r_t; gmm::copy(xii, xi);
749 	    }
750 	    if (SOR_step) gmm::mult(Bi, gmm::scaled(xii, -1.0), u, u);
751 	    gmm::mult(Bi, gmm::scaled(xii, -1.0), rhs, rhs);
752 	  }
753 	}
754 	precond_res = gmm::vect_norm2(rhs);
755 	if (SOR_step) cout << "SOR step residual = " << precond_res << endl;
756 	if (precond_res < residual) break;
757 	cout << "Precond residual = " << precond_res << endl;
758       }
759 
760       iter2.init();
761       // solving linear system for the global Newton method
762       if (0) {
763 	NS.compute_tangent_matrix(M, u);
764 	add_schwarz_mat<Matrixt, MatrixBi, Precond, local_solver>
765 	  ASM(M, NS.get_vB(), iter4, P, iter.get_resmax());
766 	AS_global_solve(global_solver(), ASM, d, rhs, iter2);
767       }
768       else {  // for exact gradient
769 	AS_exact_gradient<Matrixt, MatrixBi> eg(NS.get_vB());
770 	for (size_type i = 0; i < NS.get_vB().size(); ++i) {
771 	  NS.compute_tangent_matrix(eg.vM[i], vx[i]);
772 	}
773 	eg.init();
774 	gmres(eg, d, rhs, gmm::identity_matrix(), 50, iter2);
775       }
776 
777       //      gmm::add(gmm::scaled(rhs, 0.1), u); ++iter;
778       external_ls.init_search(act_res, iter.get_iteration());
779       do {
780 	alpha = external_ls.next_try();
781 	gmm::add(gmm::scaled(d, alpha), u, x);
782 	NS.compute_F(rhs, x);
783 	act_res_new = gmm::vect_norm2(rhs);
784       } while (!external_ls.is_converged(act_res_new));
785 
786       if (alpha != external_ls.converged_value()) {
787 	alpha = external_ls.converged_value();
788 	gmm::add(gmm::scaled(d, alpha), u, x);
789 	NS.compute_F(rhs, x);
790 	act_res_new = gmm::vect_norm2(rhs);
791       }
792 
793       if (iter.get_noisy() > 1) cout << endl;
794       act_res = act_res_new;
795       if (iter.get_noisy()) cout << "(step=" << alpha << ")\t unprecond res = " << act_res << " ";
796 
797 
798       ++iter; gmm::copy(x, u);
799     }
800   }
801 
802 }
803 
804 
805 #endif //  GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
806