1 /*============================================================================
2  * Sparse Linear Equation Solvers
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39 
40 #if defined(HAVE_MPI)
41 #include <mpi.h>
42 #endif
43 
44 /*----------------------------------------------------------------------------
45  * Local headers
46  *----------------------------------------------------------------------------*/
47 
48 /*----------------------------------------------------------------------------
49  *  Header for the current file
50  *----------------------------------------------------------------------------*/
51 
52 #include "cs_sles_it_priv.h"
53 
54 /*----------------------------------------------------------------------------*/
55 
56 BEGIN_C_DECLS
57 
58 /*=============================================================================
59  * Additional doxygen documentation
60  *============================================================================*/
61 
62 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
63 
64 /*=============================================================================
65  * Local Macro Definitions
66  *============================================================================*/
67 
68 /*============================================================================
69  * Private function definitions
70  *============================================================================*/
71 
72 /*----------------------------------------------------------------------------
73  * Compute inverses of dense 3*3 matrices.
74  *
75  * parameters:
76  *   n_blocks <-- number of blocks
77  *   ad       <-- diagonal part of linear equation matrix
78  *   ad_inv   --> inverse of the diagonal part of linear equation matrix
79  *----------------------------------------------------------------------------*/
80 
81 static void
_fact_lu33(cs_lnum_t n_blocks,const cs_real_t * ad,cs_real_t * ad_inv)82 _fact_lu33(cs_lnum_t         n_blocks,
83            const cs_real_t  *ad,
84            cs_real_t        *ad_inv)
85 {
86 # pragma omp parallel for if(n_blocks > CS_THR_MIN)
87   for (cs_lnum_t i = 0; i < n_blocks; i++) {
88 
89     cs_real_t *restrict _ad_inv = &ad_inv[9*i];
90     const cs_real_t *restrict  _ad = &ad[9*i];
91 
92     _ad_inv[0] = _ad[0];
93     _ad_inv[1] = _ad[1];
94     _ad_inv[2] = _ad[2];
95 
96     _ad_inv[3] = _ad[3]/_ad[0];
97     _ad_inv[4] = _ad[4] - _ad_inv[3]*_ad[1];
98     _ad_inv[5] = _ad[5] - _ad_inv[3]*_ad[2];
99 
100     _ad_inv[6] = _ad[6]/_ad[0];
101     _ad_inv[7] = (_ad[7] - _ad_inv[6]*_ad[1])/_ad_inv[4];
102     _ad_inv[8] = _ad[8] - _ad_inv[6]*_ad[2] - _ad_inv[7]*_ad_inv[5];
103 
104   }
105 }
106 
107 /*----------------------------------------------------------------------------
108  * Compute inverses of dense matrices.
109  *
110  * parameters:
111  *   n_blocks <-- number of blocks
112  *   ad       <-- diagonal part of linear equation matrix
113  *   ad_inv   --> inverse of the diagonal part of linear equation matrix
114  *----------------------------------------------------------------------------*/
115 
116 static void
_fact_lu(cs_lnum_t n_blocks,const int db_size,const cs_real_t * ad,cs_real_t * ad_inv)117 _fact_lu(cs_lnum_t         n_blocks,
118          const int         db_size,
119          const cs_real_t  *ad,
120          cs_real_t        *ad_inv)
121 {
122 # pragma omp parallel for if(n_blocks > CS_THR_MIN)
123   for (cs_lnum_t i = 0; i < n_blocks; i++) {
124 
125     cs_real_t *restrict _ad_inv = &ad_inv[db_size*db_size*i];
126     const cs_real_t *restrict  _ad = &ad[db_size*db_size*i];
127 
128     _ad_inv[0] = _ad[0];
129     // ad_inv(1,j) = ad(1,j)
130     // ad_inv(j,1) = ad(j,1)/a(1,1)
131     for (cs_lnum_t ii = 1; ii < db_size; ii++) {
132       _ad_inv[ii] = _ad[ii];
133       _ad_inv[ii*db_size] = _ad[ii*db_size]/_ad[0];
134     }
135     // ad_inv(i,i) = ad(i,i) - Sum( ad_inv(i,k)*ad_inv(k,i)) k=1 to i-1
136     for (cs_lnum_t ii = 1; ii < db_size - 1; ii++) {
137       _ad_inv[ii + ii*db_size] = _ad[ii + ii*db_size];
138       for (cs_lnum_t kk = 0; kk < ii; kk++) {
139         _ad_inv[ii + ii*db_size] -= _ad_inv[ii*db_size + kk]
140                                    *_ad_inv[kk*db_size + ii];
141       }
142 
143       for (cs_lnum_t jj = ii + 1; jj < db_size; jj++) {
144         _ad_inv[ii*db_size + jj] = _ad[ii*db_size + jj];
145         _ad_inv[jj*db_size + ii] =   _ad[jj*db_size + ii]
146                                    / _ad_inv[ii*db_size + ii];
147         for (cs_lnum_t kk = 0; kk < ii; kk++) {
148           _ad_inv[ii*db_size + jj] -=  _ad_inv[ii*db_size + kk]
149                                       *_ad_inv[kk*db_size + jj];
150           _ad_inv[jj*db_size + ii] -=  _ad_inv[jj*db_size + kk]
151                                       *_ad_inv[kk*db_size + ii]
152                                       /_ad_inv[ii*db_size + ii];
153         }
154       }
155     }
156     _ad_inv[db_size*db_size -1] = _ad[db_size*db_size - 1];
157     for (cs_lnum_t kk = 0; kk < db_size - 1; kk++) {
158       _ad_inv[db_size*db_size - 1] -=  _ad_inv[(db_size-1)*db_size + kk]
159                                       *_ad_inv[kk*db_size + db_size -1];
160     }
161   }
162 }
163 
164 /*============================================================================
165  * Private function definitions
166  *============================================================================*/
167 
168 /*----------------------------------------------------------------------------*/
169 /*!
170  * \brief Initialize or reset convergence info structure.
171  *        At this stage, the initial residue is set to HUGE_VAL, as it is
172  *        unknown.
173  *
174  * \param[in, out] convergence   convergence info structure
175  * \param[in]      solver_name   solver name
176  * \param[in]      verbosity     verbosity level
177  * \param[in]      n_iter_max    maximum number of iterations
178  * \param[in]      precision     precision limit
179  * \param[in]      r_norm        residue normalization
180  * \param[in, out] residue       initial residue
181  */
182 /*----------------------------------------------------------------------------*/
183 
184 void
cs_sles_it_convergence_init(cs_sles_it_convergence_t * convergence,const char * solver_name,int verbosity,unsigned n_iter_max,double precision,double r_norm,double * residue)185 cs_sles_it_convergence_init(cs_sles_it_convergence_t  *convergence,
186                             const char                *solver_name,
187                             int                        verbosity,
188                             unsigned                   n_iter_max,
189                             double                     precision,
190                             double                     r_norm,
191                             double                    *residue)
192 {
193   *residue = HUGE_VAL;  /* Unknown at this stage */
194 
195   convergence->name = solver_name;
196   convergence->verbosity = verbosity;
197 
198   convergence->n_iterations = 0;
199   convergence->n_iterations_max = n_iter_max;
200 
201   convergence->precision = precision;
202   convergence->r_norm = r_norm;
203   convergence->residue = *residue;
204 }
205 
206 /*----------------------------------------------------------------------------*/
207 /*!
208  * \brief Setup context for iterative linear solver.
209  *        This function is common to most solvers/smoothers
210  *
211  * \param[in, out] c                 pointer to solver context info
212  * \param[in]      name              pointer to system name
213  * \param[in]      a                 matrix
214  * \param[in]      verbosity         verbosity level
215  * \param[in]      diag_block_size   diagonal block size
216  * \param[in]      block_nn_inverse  if diagonal block size is 3 or 6, compute
217  *                                   inverse of block if true, inverse of block
218  *                                   diagonal otherwise
219  */
220 /*----------------------------------------------------------------------------*/
221 
222 void
cs_sles_it_setup_priv(cs_sles_it_t * c,const char * name,const cs_matrix_t * a,int verbosity,int diag_block_size,bool block_nn_inverse)223 cs_sles_it_setup_priv(cs_sles_it_t       *c,
224                       const char         *name,
225                       const cs_matrix_t  *a,
226                       int                 verbosity,
227                       int                 diag_block_size,
228                       bool                block_nn_inverse)
229 {
230   cs_sles_it_setup_t *sd = c->setup_data;
231 
232   if (sd == NULL) {
233     BFT_MALLOC(c->setup_data, 1, cs_sles_it_setup_t);
234     sd = c->setup_data;
235     sd->ad_inv = NULL;
236     sd->_ad_inv = NULL;
237     sd->pc_context = NULL;
238     sd->pc_apply = NULL;
239   }
240 
241   sd->n_rows = cs_matrix_get_n_rows(a) * diag_block_size;
242 
243   sd->initial_residue = -1;
244 
245   const cs_sles_it_t  *s = c->shared;
246 
247   if (c->pc != NULL) {
248 
249     if (s != NULL) {
250       if (s->setup_data == NULL)
251         s = NULL;
252     }
253 
254     if (s == NULL)
255       cs_sles_pc_setup(c->pc,
256                        name,
257                        a,
258                        verbosity);
259 
260     sd->pc_context = cs_sles_pc_get_context(c->pc);
261     sd->pc_apply = cs_sles_pc_get_apply_func(c->pc);
262 
263   }
264 
265   /* Setup diagonal inverse for Jacobi and Gauss-Seidel */
266 
267   else if (block_nn_inverse) {
268 
269     if (s != NULL) {
270       if (s->setup_data == NULL)
271         s = NULL;
272       else if (s->setup_data->ad_inv == NULL)
273         s = NULL;
274     }
275 
276     if (s != NULL) {
277       sd->ad_inv = s->setup_data->ad_inv;
278       BFT_FREE(sd->_ad_inv);
279     }
280     else {
281 
282       const cs_lnum_t n_rows = sd->n_rows;
283       if (diag_block_size < 3 || block_nn_inverse == false)
284         BFT_REALLOC(sd->_ad_inv, sd->n_rows, cs_real_t);
285       else
286         BFT_REALLOC(sd->_ad_inv, sd->n_rows*diag_block_size, cs_real_t);
287 
288       sd->ad_inv = sd->_ad_inv;
289 
290       if (diag_block_size == 1) {
291 
292         cs_matrix_copy_diagonal(a, sd->_ad_inv);
293 
294 #       pragma omp parallel for if(n_rows > CS_THR_MIN)
295         for (cs_lnum_t i = 0; i < n_rows; i++)
296           sd->_ad_inv[i] = 1.0 / sd->_ad_inv[i];
297 
298       }
299       else {
300 
301         const cs_real_t  *restrict ad = cs_matrix_get_diagonal(a);
302         const cs_lnum_t  n_blocks = sd->n_rows / diag_block_size;
303 
304         if (diag_block_size == 3)
305           _fact_lu33(n_blocks, ad, sd->_ad_inv);
306         else
307           _fact_lu(n_blocks, diag_block_size, ad, sd->_ad_inv);
308 
309       }
310 
311     }
312 
313   }
314 }
315 
316 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
317 
318 /*----------------------------------------------------------------------------*/
319 
320 END_C_DECLS
321