1 /*============================================================================
2  * Multigrid solver.
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39 
40 #if defined(HAVE_MPI)
41 #include <mpi.h>
42 #endif
43 
44 /*----------------------------------------------------------------------------
45  * Local headers
46  *----------------------------------------------------------------------------*/
47 
48 #include "bft_mem.h"
49 #include "bft_error.h"
50 #include "bft_printf.h"
51 
52 #include "cs_base.h"
53 #include "cs_blas.h"
54 #include "cs_file.h"
55 #include "cs_grid.h"
56 #include "cs_halo.h"
57 #include "cs_log.h"
58 #include "cs_matrix.h"
59 #include "cs_matrix_default.h"
60 #include "cs_matrix_util.h"
61 #include "cs_mesh.h"
62 #include "cs_mesh_quantities.h"
63 #include "cs_multigrid_smoother.h"
64 #include "cs_post.h"
65 #include "cs_sles.h"
66 #include "cs_sles_it.h"
67 #include "cs_sles_pc.h"
68 #include "cs_timer.h"
69 #include "cs_time_plot.h"
70 #include "cs_time_step.h"
71 
72 /*----------------------------------------------------------------------------
73  *  Header for the current file
74  *----------------------------------------------------------------------------*/
75 
76 #include "cs_multigrid.h"
77 
78 /*----------------------------------------------------------------------------*/
79 
80 BEGIN_C_DECLS
81 
82 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
83 
84 /*=============================================================================
85  * Local Macro Definitions
86  *============================================================================*/
87 
88 #define EPZERO  1.E-12
89 #define RINFIN  1.E+30
90 
91 #if !defined(HUGE_VAL)
92 #define HUGE_VAL  1.E+12
93 #endif
94 
95 /* Minimum size for OpenMP loops (needs benchmarking to adjust) */
96 #define CS_THR_MIN 128
97 
98 /* SIMD unit size to ensure SIMD alignement (2 to 4 required on most
99  * current architectures, so 16 should be enough on most architectures
100  * through at least 2012) */
101 
102 #define CS_SIMD_SIZE(s) (((s-1)/16+1)*16)
103 
104 /*=============================================================================
105  * Local Type Definitions
106  *============================================================================*/
107 
108 /*----------------------------------------------------------------------------
109  * Multigrid types
110  *----------------------------------------------------------------------------*/
111 
112 typedef enum {
113 
114   CS_MULTIGRID_MAIN,           /*!< Main multigrid type */
115   CS_MULTIGRID_COARSE,         /*!< Coarse level solver */
116   CS_MULTIGRID_BOTTOM,         /*!< Bottom level solver */
117   CS_MULTIGRID_BOTTOM_SMOOTHE  /*!< Bottom level smoother */
118 
119 } cs_multigrid_subtype_t;
120 
121 /*----------------------------------------------------------------------------
122  * Local Structure Definitions
123  *----------------------------------------------------------------------------*/
124 
125 /* Basic linear system solver or smoother context and functions */
126 /*--------------------------------------------------------------*/
127 
128 typedef struct _cs_mg_sles_t {
129 
130   void                     *context;       /* solver context
131                                               (options, state, logging) */
132 
133   cs_sles_setup_t          *setup_func;    /* solver setup function */
134   cs_sles_solve_t          *solve_func;    /* solve function */
135   cs_sles_destroy_t        *destroy_func;  /* destruction function */
136 
137 } cs_mg_sles_t;
138 
139 /* Basic per linear system options and logging */
140 /*---------------------------------------------*/
141 
142 typedef struct _cs_multigrid_info_t {
143 
144   /* Settings */
145 
146   cs_sles_it_type_t    type[3];             /* Descent/ascent smoothers
147                                                Coarse solver */
148 
149   bool                 is_pc;               /* True if used as preconditioner */
150   int                  n_max_cycles;        /* Maximum allowed cycles */
151 
152   int                  n_max_iter[3];       /* maximum iterations allowed
153                                                (descent/ascent/coarse) */
154   int                  poly_degree[3];      /* polynomial preconditioning degree
155                                                (descent/ascent/coarse) */
156 
157   double               precision_mult[3];   /* solver precision multiplier
158                                                (descent/ascent/coarse) */
159 
160   /* Logging */
161 
162   unsigned             n_calls[2];          /* Number of times grids built
163                                                (0) or solved (1) */
164 
165   unsigned long long   n_levels_tot;        /* Total accumulated number of
166                                                grid levels built */
167   unsigned             n_levels[3];         /* Number of grid levels:
168                                                [last, min, max] */
169 
170   unsigned             n_cycles[3];         /* Number of cycles for
171                                                system resolution:
172                                                [min, max, total] */
173 
174   cs_timer_counter_t   t_tot[2];            /* Total time used:
175                                                [build, solve] */
176 
177 } cs_multigrid_info_t;
178 
179 /* Per level info and logging */
180 /*----------------------------*/
181 
182 typedef struct _cs_multigrid_level_info_t {
183 
184   unsigned long long   n_ranks[4];          /* Number of ranks for this level:
185                                                [last, min, max, total] */
186   unsigned long long   n_g_rows[4];        /* Global number of rows
187                                                (last, min, max, total) */
188   unsigned long long   n_elts[3][4];        /* Mean number of rows,
189                                                rows + ghosts, and entries
190                                                across ranks (last, min, max,
191                                                total) */
192   double               imbalance[3][4];     /* Imbalance for rows, rows
193                                                + ghosts, and entries
194                                                (last, min, max, total) */
195 
196   unsigned long long   n_it_solve[4];       /* Number of iterations for
197                                                solving [last, min, max, total] */
198   unsigned long long   n_it_ds_smoothe[4];  /* Number of iterations for
199                                                descent smoothing:
200                                                  [last, min, max, total] */
201   unsigned long long   n_it_as_smoothe[4];  /* Number of iterations for
202                                                ascent smoothing:
203                                                  [last, min, max, total] */
204 
205   unsigned             n_calls[7];          /* Total number of calls:
206                                                build, solve, descent smoothe,
207                                                ascent smoothe, restrict from
208                                                finer, prolong to finer,
209                                                BLAS */
210 
211   cs_timer_counter_t   t_tot[7];            /* Total timers count:
212                                                [build, solve, descent smoothe,
213                                                ascent smoothe, restrict from
214                                                finer, prolong to finer,
215                                                BLAS] */
216 
217 } cs_multigrid_level_info_t;
218 
219 /* Grid hierarchy */
220 /*----------------*/
221 
222 typedef struct _cs_multigrid_setup_data_t {
223 
224   /* Setup */
225 
226   unsigned        n_levels;           /* Current number of grid levels */
227   unsigned        n_levels_alloc;     /* Allocated number of grid levels */
228 
229   cs_grid_t     **grid_hierarchy;     /* Array of grid pointers */
230   cs_mg_sles_t   *sles_hierarchy;     /* Contexts for  associated smoothers
231                                          and solvers (i*2: descent, coarse;
232                                          i*2+1: ascent) */
233 
234   /* Arrays used only for solving, but maintained until free,
235      so as to be usable by convergence error handler. */
236 
237   double         exit_initial_residue;  /* Last level initial residue */
238   double         exit_residue;          /* Last residue */
239   int            exit_level;            /* Last level during solve */
240   int            exit_cycle_id;         /* Last cycle id during solve */
241 
242   cs_real_t     *rhs_vx_buf;            /* Coarse grid "right hand sides"
243                                            and corrections buffer */
244   cs_real_t    **rhs_vx;                /* Coarse grid "right hand sides"
245                                            and corrections */
246 
247   /* Options used only when used as a preconditioner */
248 
249   char          *pc_name;               /* name of preconditioning system */
250 
251   int            pc_verbosity;          /* preconditioner verbosity */
252 
253   cs_real_t     *pc_aux;                /* preconditioner auxiliary array */
254 
255 
256 } cs_multigrid_setup_data_t;
257 
258 /* Grid hierarchy */
259 /*----------------*/
260 
261 struct _cs_multigrid_t {
262 
263   /* Settings */
264 
265   cs_multigrid_type_t     type;     /* Multigrid type */
266   cs_multigrid_subtype_t  subtype;  /* Multigrid subtype */
267 
268   int        aggregation_limit;  /* Maximum allowed fine rows per coarse cell */
269   int        coarsening_type;    /* Coarsening traversal type */
270   int        n_levels_max;       /* Maximum number of grid levels */
271   cs_gnum_t  n_g_rows_min;       /* Global number of rows on coarse grids
272                                     under which no coarsening occurs */
273 
274   int        post_row_max;       /* If > 0, activates postprocessing of
275                                     coarsening, projecting coarse cell
276                                     numbers (modulo post_row_max)
277                                     on the base grid */
278 
279   double     p0p1_relax;         /* p0/p1 relaxation_parameter */
280   double     k_cycle_threshold;  /* threshold for k cycle */
281 
282   /* Setting for use as a preconditioner */
283 
284   double     pc_precision;       /* preconditioner precision */
285   double     pc_r_norm;          /* preconditioner residue normalization */
286 
287   /* Data for postprocessing callback */
288 
289   int        post_location;      /* associated mesh location */
290   int        n_levels_post;      /* Current number of postprocessed levels */
291 
292   int      **post_row_num;       /* If post_row_max > 0, array of
293                                     (n_levels - 1) arrays of projected
294                                     coarse cell numbers on the base grid */
295 
296   int      **post_row_rank;      /* If post_row_max > 0 and grid merging
297                                     is active, array of (n_levels - 1) arrays
298                                     of projected coarse cell ranks on the
299                                     base grid */
300   char      *post_name;          /* Name for postprocessing */
301 
302   /* Options and maintained state (statistics) */
303 
304   cs_multigrid_level_info_t  *lv_info;      /* Info for each level */
305   cs_multigrid_t             *lv_mg[3];     /* Optional recursive multigrid
306                                                descent, ascent, or NULL */
307   cs_multigrid_t             *p_mg;         /* Optional parent multigrid,
308                                                or NULL */
309 
310   cs_multigrid_info_t         info;         /* Base multigrid info */
311 
312   /* Communicator used for reduction operations
313      (if left at NULL, main communicator will be used) */
314 
315 # if defined(HAVE_MPI)
316 
317   MPI_Comm comm;
318   MPI_Comm caller_comm;
319 
320 # endif
321 
322   cs_gnum_t merge_mean_threshold;
323   cs_gnum_t merge_glob_threshold;
324 
325   int      merge_stride;
326   int      caller_n_ranks;
327 
328   /* Data available between "setup" and "solve" states */
329 
330   cs_multigrid_setup_data_t  *setup_data;   /* setup data */
331 
332   cs_time_plot_t             *cycle_plot;       /* plotting of cycles */
333   int                         plot_time_stamp;  /* plotting time stamp;
334                                                    if < 0, use wall clock */
335 };
336 
337 /*============================================================================
338  *  Global variables
339  *============================================================================*/
340 
341 /* Multigrid type names */
342 
343 const char *cs_multigrid_type_name[]
344   = {N_("V-cycle"),
345      N_("K-cycle"),
346      N_("K-cycle, HPC variant"),
347      N_("K-cycle, HPC coarse level")};
348 
349 /* Tunable settings */
350 
351 static int _k_cycle_hpc_merge_stride = 512;
352 static int _k_cycle_hpc_recurse_threshold = 256; /* under this size, coarsest
353                                                     level solver does not
354                                                     use k-cycle preconditioning */
355 
356 /*============================================================================
357  * Private function prototypes for recursive
358  *============================================================================*/
359 
360 static void
361 _setup_k_cycle_hpc_sub(cs_multigrid_t     *mg,
362                        const char         *name,
363                        const cs_matrix_t  *a,
364                        int                 n_ranks,
365                        int                 verbosity);
366 
367 /*============================================================================
368  * Private function definitions
369  *============================================================================*/
370 
371 /*----------------------------------------------------------------------------
372  * Initialize multigrid info structure.
373  *
374  * parameters:
375  *   name <-- system name
376  *   info <-- pointer to multigrid info structure
377  *----------------------------------------------------------------------------*/
378 
379 static void
_multigrid_info_init(cs_multigrid_info_t * info)380 _multigrid_info_init(cs_multigrid_info_t *info)
381 {
382   int i;
383 
384   /* Options */
385 
386   info->type[0] = CS_SLES_PCG;
387   info->type[1] = CS_SLES_PCG;
388   info->type[2] = CS_SLES_PCG;
389 
390   info->is_pc        = false;
391   info->n_max_cycles = 100;
392 
393   info->n_max_iter[0] = 2;
394   info->n_max_iter[1] = 10;
395   info->n_max_iter[2] = 10000;
396 
397   info->poly_degree[0] = -1;
398   info->poly_degree[1] = -1;
399   info->poly_degree[2] = -1;
400 
401   /* In theory, one should increase precision on coarsest mesh,
402      but in practice, it is more efficient to have a lower precision,
403      so we choose coarse_precision = global_precision; */
404 
405   info->precision_mult[0] = -1.;
406   info->precision_mult[1] = -1.;
407   info->precision_mult[2] = 1.;
408 
409   /* Counting and timing */
410 
411   for (i = 0; i < 2; i++)
412     info->n_calls[i] = 0;
413 
414   info->n_levels_tot = 0;
415 
416   for (i = 0; i < 3; i++) {
417     info->n_levels[i] = 0;
418     info->n_cycles[i] = 0;
419   }
420 
421   for (i = 0; i < 2; i++)
422     CS_TIMER_COUNTER_INIT(info->t_tot[i]);
423 }
424 
425 /*----------------------------------------------------------------------------
426  * Initialize multigrid level info structure.
427  *
428  * parameters:
429  *   name <-- system name
430  *   info <-- pointer to multigrid info structure
431  *----------------------------------------------------------------------------*/
432 
433 static void
_multigrid_level_info_init(cs_multigrid_level_info_t * info)434 _multigrid_level_info_init(cs_multigrid_level_info_t *info)
435 {
436   int i;
437 
438   memset(info, 0, sizeof(cs_multigrid_level_info_t));
439 
440   for (i = 0; i < 3; i++) {
441     info->imbalance[i][0] = HUGE_VALF;
442     info->imbalance[i][1] = 0.;
443   }
444 
445   for (i = 0; i < 7; i++)
446     CS_TIMER_COUNTER_INIT(info->t_tot[i]);
447 }
448 
449 /*----------------------------------------------------------------------------
450  * Output information regarding multigrid options.
451  *
452  * parameters:
453  *   mg <-> pointer to multigrid structure
454  *----------------------------------------------------------------------------*/
455 
456 static void
_multigrid_setup_log(const cs_multigrid_t * mg)457 _multigrid_setup_log(const cs_multigrid_t *mg)
458 {
459   if (mg->info.is_pc == false)
460     cs_log_printf(CS_LOG_SETUP,
461                   _("  Solver type:                       multigrid\n"));
462   else
463     cs_log_printf(CS_LOG_SETUP,
464                   _("  Multigrid preconditioner parameters:\n"));
465 
466   cs_log_printf(CS_LOG_SETUP,
467                 _("  Coarsening type:                   %s\n"
468                   "    Max fine rows per coarse row:    %d\n"
469                   "    Maximum number of levels :       %d\n"
470                   "    Minimum number of coarse rows:   %llu\n"
471                   "    P0/P1 relaxation parameter:      %g\n"
472                   "  Maximum number of cycles:          %d\n"),
473                 _(cs_grid_coarsening_type_name[mg->coarsening_type]),
474                 mg->aggregation_limit,
475                 mg->n_levels_max, (unsigned long long)(mg->n_g_rows_min),
476                 mg->p0p1_relax, mg->info.n_max_cycles);
477 
478 #if defined(HAVE_MPI)
479   if (cs_glob_n_ranks > 1)
480     cs_log_printf(CS_LOG_SETUP,
481                   _("\n"
482                     "  Rank merge parameters:\n"
483                     "    merge rank stride:               %d\n"
484                     "    mean  coarse rows threshold:    %d\n"
485                     "    total coarse rows threshold:    %llu\n"),
486                   mg->merge_stride,
487                   (int)(mg->merge_mean_threshold),
488                   (unsigned long long)(mg->merge_glob_threshold));
489 #endif
490 
491   cs_log_printf(CS_LOG_SETUP,
492                 _("  Cycle type:                        %s\n"),
493                 _(cs_multigrid_type_name[mg->type]));
494 
495   const char *stage_name[] = {"Descent smoother",
496                               "Ascent smoother",
497                               "Coarsest level solver"};
498 
499   for (int i = 0; i < 3; i++) {
500 
501     if (   mg->info.type[i] != CS_SLES_N_IT_TYPES
502         && mg->info.type[i] < CS_SLES_N_SMOOTHER_TYPES) {
503       cs_log_printf(CS_LOG_SETUP,
504                     _("  %s:\n"
505                       "    Type:                            %s\n"),
506                     _(stage_name[i]),
507                     _(cs_sles_it_type_name[mg->info.type[i]]));
508 
509       if (mg->info.poly_degree[i] > -1) {
510         cs_log_printf(CS_LOG_SETUP,
511                       _("    Preconditioning:                 "));
512         if (mg->info.poly_degree[i] == 0)
513           cs_log_printf(CS_LOG_SETUP, _("Jacobi\n"));
514         else if (mg->info.poly_degree[i] < 0) {
515           if (mg->lv_mg[i] != NULL) {
516             cs_log_printf(CS_LOG_SETUP, "%s\n",
517                           _(cs_multigrid_type_name[mg->lv_mg[i]->type]));
518           }
519           else
520             cs_log_printf(CS_LOG_SETUP, _("None\n"));
521         }
522         else
523           cs_log_printf(CS_LOG_SETUP, _("polynomial, degree %d\n"),
524                         mg->info.poly_degree[i]);
525       }
526       cs_log_printf(CS_LOG_SETUP,
527                     _("    Maximum number of iterations:    %d\n"
528                       "    Precision multiplier:            %g\n"),
529                     mg->info.n_max_iter[i],
530                     mg->info.precision_mult[i]);
531     }
532     else if (mg->lv_mg[i] != NULL) {
533       cs_log_printf(CS_LOG_SETUP, "  %s:\n", _(stage_name[i]));
534       _multigrid_setup_log(mg->lv_mg[i]);
535     }
536 
537   }
538 
539   cs_log_printf(CS_LOG_SETUP,
540                 _("  Postprocess coarsening:            %d\n"),
541                 mg->post_row_max);
542 }
543 
544 /*----------------------------------------------------------------------------
545  * Output information regarding multigrid resolution.
546  *
547  * parameters:
548  *   mg <-> pointer to multigrid structure
549  *----------------------------------------------------------------------------*/
550 
551 static void
_multigrid_performance_log(const cs_multigrid_t * mg)552 _multigrid_performance_log(const cs_multigrid_t *mg)
553 {
554   unsigned i;
555 
556   unsigned long long n_builds_denom = CS_MAX(mg->info.n_calls[0], 1);
557   unsigned long long n_solves_denom = CS_MAX(mg->info.n_calls[1], 1);
558   int n_lv_min = mg->info.n_levels[1];
559   int n_lv_max = mg->info.n_levels[2];
560   int n_lv_mean = (int)(mg->info.n_levels_tot / n_builds_denom);
561   int n_cy_mean = (int)(mg->info.n_cycles[2] / n_solves_denom);
562 
563   char tmp_s[7][64] =  {"", "", "", "", "", "", ""};
564   const char *stage_name[2] = {N_("Construction:"), N_("Resolution:")};
565   const char *lv_stage_name[7] = {N_("build:"), N_("solve:"),
566                                   N_("descent smoothe:"), N_("ascent smoothe:"),
567                                   N_("restrict:"), N_("prolong:"),
568                                   N_("BLAS")};
569 
570   cs_log_printf(CS_LOG_PERFORMANCE,
571                  _("\n"
572                    "  Multigrid:\n"
573                    "    %s\n"
574                    "    Coarsening: %s\n"),
575                 _(cs_multigrid_type_name[mg->type]),
576                 _(cs_grid_coarsening_type_name[mg->coarsening_type]));
577 
578   if (   mg->info.type[0] != CS_SLES_N_IT_TYPES
579       && mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) {
580 
581     const char *descent_smoother_name = cs_sles_it_type_name[mg->info.type[0]];
582     const char *ascent_smoother_name = cs_sles_it_type_name[mg->info.type[1]];
583 
584     if (mg->info.type[0] == mg->info.type[1])
585       cs_log_printf(CS_LOG_PERFORMANCE,
586                     _("    Smoother: %s\n"),
587                     _(descent_smoother_name));
588     else
589       cs_log_printf(CS_LOG_PERFORMANCE,
590                     _("    Descent smoother:     %s\n"
591                       "    Ascent smoother:      %s\n"),
592                     _(descent_smoother_name), _(ascent_smoother_name));
593 
594     cs_log_printf(CS_LOG_PERFORMANCE,
595                   _("    Coarsest level solver:       %s\n"),
596                   _(cs_sles_it_type_name[mg->info.type[2]]));
597 
598   }
599 
600   sprintf(tmp_s[0], "%-36s", "");
601   cs_log_strpadl(tmp_s[1], _(" mean"), 12, 64);
602   cs_log_strpadl(tmp_s[2], _("minimum"), 12, 64);
603   cs_log_strpadl(tmp_s[3], _("maximum"), 12, 64);
604 
605   cs_log_printf(CS_LOG_PERFORMANCE,
606                 "\n  %s %s %s %s\n",
607                 tmp_s[0], tmp_s[1], tmp_s[2], tmp_s[3]);
608 
609   cs_log_strpad(tmp_s[0], _("Number of levels:"), 36, 64);
610   cs_log_strpad(tmp_s[1], _("Number of cycles:"), 36, 64);
611 
612   cs_log_printf(CS_LOG_PERFORMANCE,
613                 "  %s %12d %12d %12d\n",
614                 tmp_s[0], n_lv_mean, n_lv_min, n_lv_max);
615   cs_log_printf(CS_LOG_PERFORMANCE,
616                 "  %s %12d %12d %12d\n\n",
617                 tmp_s[1], n_cy_mean,
618                 (int)(mg->info.n_cycles[0]), (int)(mg->info.n_cycles[1]));
619 
620   cs_log_timer_array_header(CS_LOG_PERFORMANCE,
621                             2,                  /* indent, */
622                             "",                 /* header title */
623                             true);              /* calls column */
624   cs_log_timer_array(CS_LOG_PERFORMANCE,
625                      2,                  /* indent, */
626                      2,                  /* n_lines */
627                      stage_name,
628                      mg->info.n_calls,
629                      mg->info.t_tot);
630 
631   sprintf(tmp_s[0], "%-36s", "");
632   cs_log_strpadl(tmp_s[1], _(" mean"), 12, 64);
633   cs_log_strpadl(tmp_s[2], _("minimum"), 12, 64);
634   cs_log_strpadl(tmp_s[3], _("maximum"), 12, 64);
635 
636   cs_log_printf(CS_LOG_PERFORMANCE,
637                 "\n  %s %s %s %s\n",
638                 tmp_s[0], tmp_s[1], tmp_s[2], tmp_s[3]);
639 
640   for (i = 0; i < mg->info.n_levels[2]; i++) {
641 
642     const cs_multigrid_level_info_t *lv_info = mg->lv_info + i;
643     unsigned long long n_lv_builds = lv_info->n_calls[0];
644 
645     if (n_lv_builds < 1)
646       continue;
647 
648     cs_log_strpad(tmp_s[0], _("Number of rows:"), 34, 64);
649     cs_log_printf(CS_LOG_PERFORMANCE,
650                   _("  Grid level %d:\n"
651                     "    %s %12llu %12llu %12llu\n"),
652                   i, tmp_s[0],
653                   lv_info->n_g_rows[3] / n_lv_builds,
654                   lv_info->n_g_rows[1], lv_info->n_g_rows[2]);
655 
656     if (mg->caller_n_ranks == 1) {
657       cs_log_strpad(tmp_s[1], _("Number of entries:"), 34, 64);
658       cs_log_printf(CS_LOG_PERFORMANCE,
659                     "    %s %12llu %12llu %12llu\n",
660                     tmp_s[1],
661                     lv_info->n_elts[2][3] / n_lv_builds,
662                     lv_info->n_elts[2][1], lv_info->n_elts[2][2]);
663     }
664 
665 #if defined(HAVE_MPI)
666 
667     if (mg->caller_n_ranks > 1) {
668       cs_log_strpad(tmp_s[0], _("Number of active ranks:"), 34, 64);
669       cs_log_printf(CS_LOG_PERFORMANCE,
670                     "    %s %12llu %12llu %12llu\n",
671                     tmp_s[0],
672                     lv_info->n_ranks[3] / n_lv_builds,
673                     lv_info->n_ranks[1], lv_info->n_ranks[2]);
674       cs_log_strpad(tmp_s[0], _("Mean local rows:"), 34, 64);
675       cs_log_strpad(tmp_s[1], _("Mean local columns + ghosts:"), 34, 64);
676       cs_log_strpad(tmp_s[2], _("Mean local entries:"), 34, 64);
677       cs_log_printf(CS_LOG_PERFORMANCE,
678                     "    %s %12llu %12llu %12llu\n"
679                     "    %s %12llu %12llu %12llu\n"
680                     "    %s %12llu %12llu %12llu\n",
681                     tmp_s[0],
682                     lv_info->n_elts[0][3] / n_lv_builds,
683                     lv_info->n_elts[0][1], lv_info->n_elts[0][2],
684                     tmp_s[1],
685                     lv_info->n_elts[1][3] / n_lv_builds,
686                     lv_info->n_elts[1][1], lv_info->n_elts[1][2],
687                     tmp_s[2],
688                     lv_info->n_elts[2][3] / n_lv_builds,
689                     lv_info->n_elts[2][1], lv_info->n_elts[2][2]);
690       cs_log_strpad(tmp_s[0], _("Rows imbalance:"), 34, 64);
691       cs_log_strpad(tmp_s[1], _("Columns + ghosts imbalance:"), 34, 64);
692       cs_log_strpad(tmp_s[2], _("entries imbalance"), 34, 64);
693       cs_log_printf(CS_LOG_PERFORMANCE,
694                     "    %-34s %12.3f %12.3f %12.3f\n"
695                     "    %-34s %12.3f %12.3f %12.3f\n"
696                     "    %-34s %12.3f %12.3f %12.3f\n",
697                     tmp_s[0],
698                     lv_info->imbalance[0][3] / n_lv_builds,
699                     lv_info->imbalance[0][1], lv_info->imbalance[0][2],
700                     tmp_s[1],
701                     lv_info->imbalance[1][3] / n_lv_builds,
702                     lv_info->imbalance[1][1], lv_info->imbalance[1][2],
703                     tmp_s[2],
704                     lv_info->imbalance[2][3] / n_lv_builds,
705                     lv_info->imbalance[2][1], lv_info->imbalance[2][2]);
706     }
707 
708 #endif
709 
710     if (lv_info->n_calls[1] > 0) {
711       cs_log_strpad(tmp_s[0], _("Iterations for solving:"), 34, 64);
712       cs_log_printf(CS_LOG_PERFORMANCE,
713                     "    %s %12llu %12llu %12llu\n",
714                     tmp_s[0],
715                     lv_info->n_it_solve[3] / lv_info->n_calls[1],
716                     lv_info->n_it_solve[1], lv_info->n_it_solve[2]);
717     }
718 
719     if (lv_info->n_calls[2] > 0) {
720       cs_log_strpad(tmp_s[1], _("Descent smoother iterations:"), 34, 64);
721       cs_log_printf(CS_LOG_PERFORMANCE,
722                     "    %s %12llu %12llu %12llu\n",
723                     tmp_s[1],
724                     lv_info->n_it_ds_smoothe[3] / lv_info->n_calls[2],
725                     lv_info->n_it_ds_smoothe[1], lv_info->n_it_ds_smoothe[2]);
726     }
727 
728     if (lv_info->n_calls[3] > 0) {
729       cs_log_strpad(tmp_s[2], _("Ascent smoother iterations:"), 34, 64);
730       cs_log_printf(CS_LOG_PERFORMANCE,
731                     "    %s %12llu %12llu %12llu\n",
732                     tmp_s[2],
733                     lv_info->n_it_as_smoothe[3] / lv_info->n_calls[3],
734                     lv_info->n_it_as_smoothe[1], lv_info->n_it_as_smoothe[2]);
735     }
736   }
737 
738   cs_log_timer_array_header(CS_LOG_PERFORMANCE,
739                             2,                  /* indent, */
740                             "",                 /* header title */
741                             true);              /* calls column */
742 
743   for (i = 0; i < mg->info.n_levels[2]; i++) {
744 
745     const cs_multigrid_level_info_t *lv_info = mg->lv_info + i;
746 
747     cs_log_printf(CS_LOG_PERFORMANCE,
748                   _("  Grid level %d:\n"), i);
749 
750     cs_log_timer_array(CS_LOG_PERFORMANCE,
751                        4,                  /* indent, */
752                        7,                  /* n_lines */
753                        lv_stage_name,
754                        lv_info->n_calls,
755                        lv_info->t_tot);
756 
757   }
758 
759   {
760     const char *names[]
761       = {N_("coarse level descent smoother"),
762          N_("coarse level ascent smoother"),
763          N_("bottom level solver")};
764 
765     for (i = 0; i < 3; i++) {
766       if (mg->lv_mg[i] != NULL) {
767 
768         cs_log_printf(CS_LOG_PERFORMANCE,
769                       _("\n"
770                         "  Nested %s:\n"), _(names[i]));
771         cs_multigrid_log(mg->lv_mg[i], CS_LOG_PERFORMANCE);
772       }
773     }
774   }
775 }
776 
777 /*----------------------------------------------------------------------------
778  * Create empty structure used to maintain setup data
779  * (between cs_sles_setup and cs_sles_free type calls.
780  *
781  * returns:
782  *   pointer to multigrid setup data structure
783  *----------------------------------------------------------------------------*/
784 
785 static cs_multigrid_setup_data_t *
_multigrid_setup_data_create(void)786 _multigrid_setup_data_create(void)
787 {
788   cs_multigrid_setup_data_t *mgd;
789 
790   BFT_MALLOC(mgd, 1, cs_multigrid_setup_data_t);
791 
792   mgd->n_levels = 0;
793   mgd->n_levels_alloc = 0;
794 
795   mgd->grid_hierarchy = NULL;
796   mgd->sles_hierarchy = NULL;
797 
798   mgd->exit_initial_residue = -1.;
799   mgd->exit_residue = -1.;
800   mgd->exit_level = -1.;
801   mgd->exit_cycle_id = -1.;
802 
803   mgd->rhs_vx_buf = NULL;
804   mgd->rhs_vx = NULL;
805 
806   mgd->pc_name = NULL;
807   mgd->pc_aux = NULL;
808   mgd->pc_verbosity = 0;
809 
810   return mgd;
811 }
812 
813 /*----------------------------------------------------------------------------
814  * Add grid to multigrid structure hierarchy.
815  *
816  * parameters:
817  *   mg   <-- multigrid structure
818  *   grid <-- grid to add
819  *----------------------------------------------------------------------------*/
820 
821 static void
_multigrid_add_level(cs_multigrid_t * mg,cs_grid_t * grid)822 _multigrid_add_level(cs_multigrid_t  *mg,
823                      cs_grid_t       *grid)
824 {
825   cs_multigrid_setup_data_t *mgd = mg->setup_data;
826 
827   unsigned ii;
828 
829   /* Reallocate arrays if necessary */
830 
831   if (mgd->n_levels == mgd->n_levels_alloc) {
832 
833     /* Max previous */
834     unsigned int n_lv_max_prev = CS_MAX(mg->info.n_levels[2],
835                                         mgd->n_levels);
836 
837     if (mgd->n_levels_alloc == 0) {
838       mgd->n_levels_alloc = n_lv_max_prev;
839       if (mgd->n_levels_alloc == 0)
840         mgd->n_levels_alloc = 10;
841     }
842     else
843       mgd->n_levels_alloc *= 2;
844 
845     BFT_REALLOC(mgd->grid_hierarchy, mgd->n_levels_alloc, cs_grid_t *);
846     BFT_REALLOC(mgd->sles_hierarchy, mgd->n_levels_alloc*2, cs_mg_sles_t);
847 
848     for (ii = mgd->n_levels; ii < mgd->n_levels_alloc*2; ii++) {
849       mgd->sles_hierarchy[ii].context = NULL;
850       mgd->sles_hierarchy[ii].setup_func = NULL;
851       mgd->sles_hierarchy[ii].solve_func = NULL;
852       mgd->sles_hierarchy[ii].destroy_func = NULL;
853     }
854 
855     if (n_lv_max_prev < mgd->n_levels_alloc) {
856       BFT_REALLOC(mg->lv_info, mgd->n_levels_alloc, cs_multigrid_level_info_t);
857       for (ii = n_lv_max_prev; ii < mgd->n_levels_alloc; ii++)
858         _multigrid_level_info_init(mg->lv_info + ii);
859     }
860 
861   }
862 
863   /* Add new grid to hierarchy */
864 
865   mgd->grid_hierarchy[mgd->n_levels] = grid;
866 
867   if (mg->post_row_num != NULL) {
868     int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
869     BFT_REALLOC(mg->post_row_num, mgd->n_levels_alloc, int *);
870     for (ii = n_max_post_levels + 1; ii < mgd->n_levels_alloc; ii++)
871       mg->post_row_num[ii] = NULL;
872   }
873 
874   if (mg->post_row_rank != NULL) {
875     int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
876     BFT_REALLOC(mg->post_row_rank, mgd->n_levels_alloc, int *);
877     for (ii = n_max_post_levels + 1; ii < mgd->n_levels_alloc; ii++)
878       mg->post_row_rank[ii] = NULL;
879   }
880 
881   /* Update associated info */
882 
883   {
884     int  n_ranks;
885     cs_lnum_t  n_rows, n_rows_with_ghosts, n_entries;
886     cs_gnum_t  n_g_rows;
887     cs_multigrid_level_info_t  *lv_info = mg->lv_info + mgd->n_levels;
888 
889     cs_grid_get_info(grid,
890                      NULL,
891                      NULL,
892                      NULL,
893                      NULL,
894                      &n_ranks,
895                      &n_rows,
896                      &n_rows_with_ghosts,
897                      &n_entries,
898                      &n_g_rows);
899 
900     mg->info.n_levels[0] = mgd->n_levels + 1;
901 
902     lv_info->n_ranks[0] = n_ranks;
903     if (lv_info->n_ranks[1] > (unsigned)n_ranks)
904       lv_info->n_ranks[1] = n_ranks;
905     else if (lv_info->n_ranks[2] < (unsigned)n_ranks)
906       lv_info->n_ranks[2] = n_ranks;
907     lv_info->n_ranks[3] += n_ranks;
908 
909     lv_info->n_g_rows[0] = n_g_rows;
910     if (lv_info->n_g_rows[1] > n_g_rows)
911       lv_info->n_g_rows[1] = n_g_rows;
912     else if (lv_info->n_g_rows[2] < n_g_rows)
913       lv_info->n_g_rows[2] = n_g_rows;
914     lv_info->n_g_rows[3] += n_g_rows;
915 
916     lv_info->n_elts[0][0] = n_rows;
917     lv_info->n_elts[1][0] = n_rows_with_ghosts;
918     lv_info->n_elts[2][0] = n_entries;
919 
920     for (ii = 0; ii < 3; ii++) {
921       if (lv_info->n_elts[ii][1] > lv_info->n_elts[ii][0])
922         lv_info->n_elts[ii][1] = lv_info->n_elts[ii][0];
923       else if (lv_info->n_elts[ii][2] < lv_info->n_elts[ii][0])
924         lv_info->n_elts[ii][2] = lv_info->n_elts[ii][0];
925       lv_info->n_elts[ii][3] += lv_info->n_elts[ii][0];
926     }
927 
928 #if defined(HAVE_MPI)
929 
930     if (mg->caller_n_ranks > 1) {
931       cs_gnum_t tot_sizes[3], max_sizes[3];
932       cs_gnum_t loc_sizes[3] = {n_rows, n_rows_with_ghosts, n_entries};
933       MPI_Allreduce(loc_sizes, tot_sizes, 3, CS_MPI_GNUM, MPI_SUM,
934                     mg->caller_comm);
935       MPI_Allreduce(loc_sizes, max_sizes, 3, CS_MPI_GNUM, MPI_MAX,
936                     mg->caller_comm);
937       for (ii = 0; ii < 3; ii++) {
938         lv_info->imbalance[ii][0] = (  max_sizes[ii]
939                                      / (tot_sizes[ii]*1.0/n_ranks)) - 1.0;
940         if (lv_info->imbalance[ii][1] > lv_info->imbalance[ii][0])
941           lv_info->imbalance[ii][1] = lv_info->imbalance[ii][0];
942         else if (lv_info->imbalance[ii][2] < lv_info->imbalance[ii][0])
943           lv_info->imbalance[ii][2] = lv_info->imbalance[ii][0];
944         lv_info->imbalance[ii][3] += lv_info->imbalance[ii][0];
945       }
946     }
947 
948 #endif /* defined(HAVE_MPI) */
949 
950     if (lv_info->n_calls[0] == 0) {
951       lv_info->n_ranks[1] = n_ranks;
952       lv_info->n_g_rows[1] = n_g_rows;
953       for (ii = 0; ii < 3; ii++) {
954         lv_info->n_elts[ii][1] = lv_info->n_elts[ii][0];
955 #if defined(HAVE_MPI)
956         lv_info->imbalance[ii][1] = lv_info->imbalance[ii][0];
957 #endif
958       }
959     }
960 
961     lv_info->n_calls[0] += 1;
962   }
963 
964   /* Ready for next level */
965 
966   mgd->n_levels += 1;
967 }
968 
969 /*----------------------------------------------------------------------------
970  * Add postprocessing info to multigrid hierarchy
971  *
972  * parameters:
973  *   mg            <-> multigrid structure
974  *   name          <-- postprocessing name
975  *   post_location <-- where to postprocess (cells or vertices)
976  *   n_base_cells  <-- number of cells in base grid
977  *----------------------------------------------------------------------------*/
978 
979 static void
_multigrid_add_post(cs_multigrid_t * mg,const char * name,int post_location,cs_lnum_t n_base_rows)980 _multigrid_add_post(cs_multigrid_t  *mg,
981                     const char      *name,
982                     int              post_location,
983                     cs_lnum_t        n_base_rows)
984 {
985   cs_multigrid_setup_data_t *mgd = mg->setup_data;
986 
987   int ii;
988 
989   assert(mg != NULL);
990 
991   if (mg->post_row_max < 1)
992     return;
993 
994   mg->post_location = post_location;
995   mg->n_levels_post = mgd->n_levels - 1;
996 
997   BFT_REALLOC(mg->post_name, strlen(name) + 1, char);
998   strcpy(mg->post_name, name);
999 
1000   assert(mg->n_levels_post <= mg->n_levels_max);
1001 
1002   /* Reallocate arrays if necessary */
1003 
1004   if (mg->post_row_num == NULL) {
1005     BFT_MALLOC(mg->post_row_num, mg->n_levels_max, int *);
1006     for (ii = 0; ii < mg->n_levels_max; ii++)
1007       mg->post_row_num[ii] = NULL;
1008   }
1009 
1010   if (mg->post_row_rank == NULL && mg->merge_stride > 1) {
1011     BFT_MALLOC(mg->post_row_rank, mg->n_levels_max, int *);
1012     for (ii = 0; ii < mg->n_levels_max; ii++)
1013       mg->post_row_rank[ii] = NULL;
1014   }
1015 
1016   for (ii = 0; ii < mg->n_levels_post; ii++) {
1017     BFT_REALLOC(mg->post_row_num[ii], n_base_rows, int);
1018     cs_grid_project_row_num(mgd->grid_hierarchy[ii+1],
1019                             n_base_rows,
1020                             mg->post_row_max,
1021                             mg->post_row_num[ii]);
1022   }
1023 
1024   if (mg->post_row_rank != NULL) {
1025     for (ii = 0; ii < mg->n_levels_post; ii++) {
1026       BFT_REALLOC(mg->post_row_rank[ii], n_base_rows, int);
1027       cs_grid_project_row_rank(mgd->grid_hierarchy[ii+1],
1028                                n_base_rows,
1029                                mg->post_row_rank[ii]);
1030     }
1031   }
1032 }
1033 
1034 /*----------------------------------------------------------------------------
1035  * Post process variables associated with Multigrid hierarchy
1036  *
1037  * parameters:
1038  *   mgh <-- multigrid hierarchy
1039  *   ts  <-- time step status structure
1040  *----------------------------------------------------------------------------*/
1041 
1042 static void
_cs_multigrid_post_function(void * mgh,const cs_time_step_t * ts)1043 _cs_multigrid_post_function(void                  *mgh,
1044                             const cs_time_step_t  *ts)
1045 {
1046   CS_UNUSED(ts);
1047 
1048   int ii;
1049   size_t name_len;
1050   char *var_name = NULL;
1051   cs_multigrid_t *mg = mgh;
1052   const char *base_name = NULL;
1053 
1054   /* Return if necessary structures inconsistent or have been destroyed */
1055 
1056   if (mg == NULL)
1057     return;
1058 
1059   if (mg->post_row_num == NULL || cs_post_mesh_exists(-1) != true)
1060     return;
1061 
1062   int *s_num = NULL;
1063   const cs_range_set_t *rs = NULL;
1064   if (mg->post_location == CS_MESH_LOCATION_VERTICES) {
1065     BFT_MALLOC(s_num, cs_glob_mesh->n_vertices, int);
1066     rs = cs_glob_mesh->vtx_range_set;
1067   }
1068 
1069   /* Allocate name buffer */
1070 
1071   base_name = mg->post_name;
1072   name_len = 3 + strlen(base_name) + 1 + 3 + 1 + 4 + 1;
1073   BFT_MALLOC(var_name, name_len, char);
1074 
1075   /* Loop on grid levels */
1076 
1077   for (ii = 0; ii < mg->n_levels_post; ii++) {
1078 
1079     sprintf(var_name, "mg %s %2d", base_name, (ii+1));
1080 
1081     if (mg->post_location == CS_MESH_LOCATION_CELLS)
1082       cs_post_write_var(CS_POST_MESH_VOLUME,
1083                         CS_POST_WRITER_ALL_ASSOCIATED,
1084                         var_name,
1085                         1,
1086                         false,
1087                         true,
1088                         CS_POST_TYPE_int,
1089                         mg->post_row_num[ii],
1090                         NULL,
1091                         NULL,
1092                         cs_glob_time_step);
1093 
1094     else if (mg->post_location == CS_MESH_LOCATION_VERTICES) {
1095       cs_range_set_scatter(rs, CS_INT_TYPE, 1, mg->post_row_num[ii], s_num);
1096       cs_post_write_vertex_var(CS_POST_MESH_VOLUME,
1097                                CS_POST_WRITER_ALL_ASSOCIATED,
1098                                var_name,
1099                                1,
1100                                false,
1101                                true,
1102                                CS_POST_TYPE_int,
1103                                s_num,
1104                                cs_glob_time_step);
1105     }
1106 
1107     else
1108       bft_error(__FILE__, __LINE__, 0,
1109                 "%s: Invalid location for post-processing.\n", __func__);
1110 
1111     BFT_FREE(mg->post_row_num[ii]);
1112 
1113     if (mg->post_row_rank != NULL) {
1114 
1115       sprintf(var_name, "rk %s %2d",
1116               base_name, (ii+1));
1117 
1118       if (mg->post_location == CS_MESH_LOCATION_CELLS)
1119         cs_post_write_var(CS_POST_MESH_VOLUME,
1120                           CS_POST_WRITER_ALL_ASSOCIATED,
1121                           var_name,
1122                           1,
1123                           false,
1124                           true,
1125                           CS_POST_TYPE_int,
1126                           mg->post_row_rank[ii],
1127                           NULL,
1128                           NULL,
1129                           cs_glob_time_step);
1130       else if (mg->post_location == CS_MESH_LOCATION_VERTICES) {
1131         cs_range_set_scatter(rs, CS_INT_TYPE, 1, mg->post_row_rank[ii], s_num);
1132         cs_post_write_vertex_var(CS_POST_MESH_VOLUME,
1133                                  CS_POST_WRITER_ALL_ASSOCIATED,
1134                                  var_name,
1135                                  1,
1136                                  false,
1137                                  true,
1138                                  CS_POST_TYPE_int,
1139                                  s_num,
1140                                  cs_glob_time_step);
1141       }
1142 
1143       BFT_FREE(mg->post_row_rank[ii]);
1144 
1145     }
1146 
1147   }
1148   mg->n_levels_post = 0;
1149 
1150   BFT_FREE(s_num);
1151   BFT_FREE(var_name);
1152 }
1153 
1154 /*----------------------------------------------------------------------------
1155  * Function returning the type name of polynomial preconditioner context.
1156  *
1157  * parameters:
1158  *   context   <-- pointer to preconditioner context
1159  *   logging   <-- if true, logging description; if false, canonical name
1160  *----------------------------------------------------------------------------*/
1161 
1162 static const char *
_multigrid_pc_get_type(const void * context,bool logging)1163 _multigrid_pc_get_type(const void  *context,
1164                        bool         logging)
1165 {
1166   CS_UNUSED(context);
1167 
1168   if (logging == false) {
1169     static const char t[] = "multigrid";
1170     return t;
1171   }
1172   else {
1173     static const char t[] = N_("Multigrid");
1174     return _(t);
1175   }
1176 }
1177 
1178 /*----------------------------------------------------------------------------
1179  * Function for setup of a multigrid preconditioner.
1180  *
1181  * parameters:
1182  *   context   <-> pointer to preconditioner context
1183  *   name      <-- pointer to name of associated linear system
1184  *   a         <-- matrix
1185  *   verbosity <-- associated verbosity
1186  *----------------------------------------------------------------------------*/
1187 
1188 static void
_multigrid_pc_setup(void * context,const char * name,const cs_matrix_t * a,int verbosity)1189 _multigrid_pc_setup(void               *context,
1190                     const char         *name,
1191                     const cs_matrix_t  *a,
1192                     int                 verbosity)
1193 {
1194   cs_multigrid_setup(context,  name, a, verbosity);
1195 
1196   cs_multigrid_t  *mg = context;
1197   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1198 
1199   BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char);
1200   strcpy(mgd->pc_name, name);
1201 }
1202 
1203 /*----------------------------------------------------------------------------
1204  * Function for setup of a multigrid preconditioner.
1205  *
1206  * parameters:
1207  *   context   <-> pointer to preconditioner context
1208  *   name      <-- pointer to name of associated linear system
1209  *   a         <-- matrix
1210  *   verbosity <-- associated verbosity
1211  *----------------------------------------------------------------------------*/
1212 
1213 static void
_multigrid_pc_setup_k_sub(void * context,const char * name,const cs_matrix_t * a,int verbosity)1214 _multigrid_pc_setup_k_sub(void               *context,
1215                           const char         *name,
1216                           const cs_matrix_t  *a,
1217                           int                 verbosity)
1218 {
1219   cs_multigrid_t  *mg = context;
1220   cs_multigrid_t  *parent = mg->p_mg;
1221   cs_multigrid_setup_data_t *p_mgd = parent->setup_data;
1222 
1223   int n_ranks = 1;
1224 #if defined(HAVE_MPI)
1225   {
1226     int p_lv = p_mgd->n_levels-1;
1227     MPI_Comm lv_comm = cs_grid_get_comm(p_mgd->grid_hierarchy[p_lv]);
1228     if (lv_comm != MPI_COMM_NULL)
1229       MPI_Comm_size(lv_comm, &n_ranks);
1230   }
1231 #endif
1232 
1233   _setup_k_cycle_hpc_sub(context, name, a, n_ranks, verbosity);
1234 
1235   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1236 
1237   BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char);
1238   strcpy(mgd->pc_name, name);
1239 }
1240 
1241 /*----------------------------------------------------------------------------
1242  * Function for setup of a multigrid (local smoother)
1243  *
1244  * parameters:
1245  *   context   <-> pointer to preconditioner context
1246  *   name      <-- pointer to name of associated linear system
1247  *   a         <-- matrix
1248  *   verbosity <-- associated verbosity
1249  *----------------------------------------------------------------------------*/
1250 
1251 static void
_multigrid_setup_k_local_smoothe(void * context,const char * name,const cs_matrix_t * a,int verbosity)1252 _multigrid_setup_k_local_smoothe(void               *context,
1253                                  const char         *name,
1254                                  const cs_matrix_t  *a,
1255                                  int                 verbosity)
1256 {
1257   _setup_k_cycle_hpc_sub(context, name, a, 1, verbosity);
1258 
1259   cs_multigrid_t  *mg = context;
1260   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1261 
1262   BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char);
1263   strcpy(mgd->pc_name, name);
1264 }
1265 
1266 /*----------------------------------------------------------------------------
1267  * Function or setting of the required tolerance for multigrid as
1268  * as a preconditioner.
1269  *
1270  * The preconditioner is considered to have converged when
1271  * residue/r_norm <= precision, residue being the L2 norm of a.vx-rhs.
1272  *
1273  * parameters:
1274  *   context       <-> pointer to multigrid context
1275  *   precision     <-- preconditioner precision
1276  *   r_norm        <-- residue normalization
1277  *----------------------------------------------------------------------------*/
1278 
1279 static void
_multigrid_pc_tolerance_t(void * context,double precision,double r_norm)1280 _multigrid_pc_tolerance_t(void    *context,
1281                           double   precision,
1282                           double   r_norm)
1283 {
1284   cs_multigrid_t  *mg = context;
1285 
1286   mg->pc_precision = precision;
1287   mg->pc_r_norm = r_norm;
1288 }
1289 
1290 /*----------------------------------------------------------------------------
1291  * Function for application of a Multigrid preconditioner.
1292  *
1293  * In cases where it is desired that the preconditioner modify a vector
1294  * "in place", x_in should be set to NULL, and x_out contain the vector to
1295  * be modified (\f$x_{out} \leftarrow M^{-1}x_{out})\f$).
1296  *
1297  * parameters:
1298  *   context       <-> pointer to preconditioner context
1299  *   x_in          <-- input vector
1300  *   x_out         <-> input/output vector
1301  *
1302  * returns:
1303  *   preconditioner application status
1304  *----------------------------------------------------------------------------*/
1305 
1306 static cs_sles_pc_state_t
_multigrid_pc_apply(void * context,const cs_real_t * x_in,cs_real_t * x_out)1307 _multigrid_pc_apply(void                *context,
1308                     const cs_real_t     *x_in,
1309                     cs_real_t           *x_out)
1310 {
1311   int     n_iter;
1312   double  residue;
1313 
1314   cs_multigrid_t  *mg = context;
1315   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1316 
1317   const cs_matrix_t  *a = cs_grid_get_matrix(mgd->grid_hierarchy[0]);
1318 
1319   const cs_real_t *rhs = x_in;
1320 
1321   const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(a);
1322   const cs_lnum_t n_rows = cs_matrix_get_n_rows(a) * db_size[1];
1323 
1324   /* If preconditioner is "in-place", use additional buffer */
1325 
1326   if (x_in == NULL) {
1327     if (mgd->pc_aux == NULL) {
1328       const cs_lnum_t n_cols = cs_matrix_get_n_columns(a) * db_size[1];
1329       BFT_MALLOC(mgd->pc_aux, n_cols, cs_real_t);
1330     }
1331     cs_real_t *restrict _rhs = mgd->pc_aux;
1332 #   pragma omp parallel for if(n_rows > CS_THR_MIN)
1333     for (cs_lnum_t ii = 0; ii < n_rows; ii++)
1334       _rhs[ii] = x_out[ii];
1335     rhs = _rhs;
1336   }
1337 
1338 # pragma omp parallel for if(n_rows > CS_THR_MIN)
1339   for (cs_lnum_t ii = 0; ii < n_rows; ii++)
1340     x_out[ii] = 0.;
1341 
1342   cs_sles_convergence_state_t  cvg = cs_multigrid_solve(context,
1343                                                         mgd->pc_name,
1344                                                         a,
1345                                                         mgd->pc_verbosity,
1346                                                         mg->pc_precision,
1347                                                         mg->pc_r_norm,
1348                                                         &n_iter,
1349                                                         &residue,
1350                                                         rhs,
1351                                                         x_out,
1352                                                         0,
1353                                                         NULL);
1354 
1355   cs_sles_pc_state_t state;
1356 
1357   switch(cvg) {
1358   case CS_SLES_DIVERGED:
1359     state = CS_SLES_PC_DIVERGED;
1360     break;
1361   case CS_SLES_BREAKDOWN:
1362     state = CS_SLES_PC_BREAKDOWN;
1363     break;
1364   case CS_SLES_CONVERGED:
1365     state = CS_SLES_PC_CONVERGED;
1366     break;
1367   default:
1368     state = CS_SLES_PC_MAX_ITERATION;
1369   }
1370 
1371   return state;
1372 }
1373 
1374 /*----------------------------------------------------------------------------
1375  * Create a Preconditioner structure using multigrid.
1376  *
1377  * parameters:
1378  *   mg_type  <-- type of multigrid algorithm to use
1379  *
1380  * returns:
1381  *   pointer to newly created preconditioner object.
1382  *----------------------------------------------------------------------------*/
1383 
1384 static cs_multigrid_t *
_multigrid_pc_create(cs_multigrid_type_t mg_type)1385 _multigrid_pc_create(cs_multigrid_type_t  mg_type)
1386 {
1387   cs_multigrid_t *mg = cs_multigrid_create(mg_type);
1388 
1389   switch(mg_type) {
1390   case CS_MULTIGRID_V_CYCLE:
1391     cs_multigrid_set_solver_options
1392       (mg,
1393        CS_SLES_P_SYM_GAUSS_SEIDEL, /* descent smoothe */
1394        CS_SLES_P_SYM_GAUSS_SEIDEL, /* ascent smoothe */
1395        CS_SLES_PCG,                /* coarse smoothe */
1396        1,                          /* n_max_cycles */
1397        1,                          /* n_max_iter_descent, */
1398        1,                          /* n_max_iter_ascent */
1399        500,                        /* n_max_iter_coarse */
1400        0, 0, -1,                   /* precond poly_degree */
1401        -1, -1, 1);                 /* precision_multiplier */
1402     break;
1403 
1404   case CS_MULTIGRID_K_CYCLE:
1405     cs_multigrid_set_solver_options
1406       (mg,
1407        CS_SLES_TS_F_GAUSS_SEIDEL,
1408        CS_SLES_TS_B_GAUSS_SEIDEL,
1409        CS_SLES_PCG,                /* coarse smoothe */
1410        1,    /* n max cycles */
1411        1,    /* n max iter for descent */
1412        1,    /* n max iter for ascent */
1413        500,  /* n max iter for coarse solve */
1414        0, 0, 0,     /* precond degree */
1415        -1, -1, 10); /* precision multiplier */
1416     break;
1417 
1418   case CS_MULTIGRID_K_CYCLE_HPC:
1419     cs_multigrid_set_solver_options
1420       (mg,
1421        CS_SLES_TS_F_GAUSS_SEIDEL,
1422        CS_SLES_TS_B_GAUSS_SEIDEL,
1423        CS_SLES_FCG,                /* coarse smoothe */
1424        1,   /* n max cycles */
1425        1,   /* n max iter for descent */
1426        1,   /* n max iter for ascent */
1427        500, /* n max iter for coarse solve */
1428        0, 0, 0,    /* precond degree */
1429        -1, -1, 1); /* precision multiplier */
1430     break;
1431 
1432   default:
1433     assert(0);
1434   }
1435 
1436   return mg;
1437 }
1438 
1439 /*----------------------------------------------------------------------------*/
1440 /*!
1441  * \brief Create a multigrid smoother using a local K cycle for coarse-level
1442  * solvers used by high-performance system variant of K-cycle.
1443  *
1444  * \param[in]  parent  parent-level multigrid, or NULL
1445 
1446  * \return  pointer to newly created mutigrid object.
1447  */
1448 /*----------------------------------------------------------------------------*/
1449 
1450 static cs_multigrid_t *
_multigrid_create_k_cycle_bottom_smoother(cs_multigrid_t * parent)1451 _multigrid_create_k_cycle_bottom_smoother(cs_multigrid_t  *parent)
1452 {
1453   cs_multigrid_t *mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE);
1454 
1455   mg->subtype = CS_MULTIGRID_BOTTOM_SMOOTHE;
1456   mg->info.is_pc = true;
1457   mg->aggregation_limit = 8;
1458   mg->k_cycle_threshold = -1;
1459 
1460 #if defined(HAVE_MPI)
1461   mg->comm = MPI_COMM_NULL;
1462   if (parent != NULL)
1463     mg->caller_comm = parent->comm;
1464   else
1465     mg->caller_comm = cs_glob_mpi_comm;
1466 #endif
1467 
1468   return mg;
1469 }
1470 
1471 /*----------------------------------------------------------------------------*/
1472 /*!
1473  * \brief Destructor for preconditioner using persistent multigrid context.
1474  *
1475  * Using this destructor, the multigrid context is only freed, not destroyed.
1476  *
1477  * \param[in, out]  context  pointer to multigrid linear solver info
1478  *                           (actual type: cs_multigrid_t  **)
1479  */
1480 /*----------------------------------------------------------------------------*/
1481 
1482 static void
_pc_from_mg_destroy(void ** context)1483 _pc_from_mg_destroy(void  **context)
1484 {
1485   cs_multigrid_t *mg = (cs_multigrid_t *)(*context);
1486 
1487   if (mg != NULL)
1488     cs_multigrid_free(mg);
1489 }
1490 
1491 /*----------------------------------------------------------------------------*/
1492 /*!
1493  * \brief Create a multigrid preconditioner based onusing a K cycle for coarse-level
1494  * solvers used by high-performance system variant of K-cycle.
1495  *
1496  * \param[in]  mg  associated coarse level multigrid structure
1497  *
1498  * \return  pointer to newly created preconditioner object.
1499  */
1500 /*----------------------------------------------------------------------------*/
1501 
1502 static cs_sles_pc_t *
_pc_create_from_mg_sub(cs_multigrid_t * mg)1503 _pc_create_from_mg_sub(cs_multigrid_t  *mg)
1504 {
1505   cs_sles_pc_t *pc = cs_sles_pc_define(mg,
1506                                        _multigrid_pc_get_type,
1507                                        _multigrid_pc_setup_k_sub,
1508                                        _multigrid_pc_tolerance_t,
1509                                        _multigrid_pc_apply,
1510                                        cs_multigrid_free,
1511                                        cs_multigrid_log,
1512                                        cs_multigrid_copy,
1513                                        _pc_from_mg_destroy);
1514 
1515   return pc;
1516 }
1517 
1518 /*----------------------------------------------------------------------------*/
1519 /*!
1520  * \brief Create recursive high-performance system variant of K-cycle
1521  *        for coarse-solver bottom-level.
1522  *
1523  * Returns NULL if the number of associated ranks is small enough that
1524  * no multigrid solver is required.
1525  *
1526  * \param[in]  parent  parent-level multigrid, or NULL
1527 
1528  * \return  pointer to newly created preconditioner object.
1529  */
1530 /*----------------------------------------------------------------------------*/
1531 
1532 static cs_multigrid_t *
_multigrid_create_k_cycle_bottom_coarsest(cs_multigrid_t * parent)1533 _multigrid_create_k_cycle_bottom_coarsest(cs_multigrid_t  *parent)
1534 {
1535   cs_multigrid_t *mg = NULL;
1536 
1537 #if defined(HAVE_MPI)
1538   if (parent != NULL) {
1539     int size = 1;
1540     if (parent->comm != MPI_COMM_NULL)
1541       MPI_Comm_size(parent->comm, &size);
1542 
1543     if (size >= _k_cycle_hpc_merge_stride) {
1544       if (size > _k_cycle_hpc_merge_stride)
1545         mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE_HPC); /* recursive */
1546       else {
1547         if (size > _k_cycle_hpc_recurse_threshold) {
1548           mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE);
1549           mg->aggregation_limit = 8;
1550           mg->k_cycle_threshold = -1;
1551         }
1552       }
1553       if (mg != NULL) {
1554         mg->caller_comm = parent->comm;
1555         mg->comm = cs_grid_get_comm_merge(parent->comm,
1556                                           _k_cycle_hpc_merge_stride);
1557         mg->subtype = CS_MULTIGRID_COARSE;
1558       }
1559     }
1560 
1561   }
1562 #endif /* defined(HAVE_MPI) */
1563 
1564   return mg;
1565 }
1566 
1567 /*----------------------------------------------------------------------------*/
1568 /*!
1569  * \brief Create a 2 level coarse-level solver for use
1570  *        by high-performance system variant of K-cycle.
1571  *
1572  * \param[in]  parent  parent-level multigrid, or NULL
1573 
1574  * \return  pointer to newly created preconditioner object.
1575  */
1576 /*----------------------------------------------------------------------------*/
1577 
1578 static cs_multigrid_t *
_multigrid_create_k_cycle_bottom(cs_multigrid_t * parent)1579 _multigrid_create_k_cycle_bottom(cs_multigrid_t  *parent)
1580 {
1581   cs_multigrid_t *mg = cs_multigrid_create(CS_MULTIGRID_V_CYCLE);
1582 
1583   mg->subtype = CS_MULTIGRID_BOTTOM;
1584   mg->p_mg = parent;
1585 
1586   mg->aggregation_limit = 0;
1587   mg->coarsening_type = CS_GRID_COARSENING_DEFAULT; /* Not used here */
1588   mg->n_levels_max = 2;
1589   mg->n_g_rows_min = 1;
1590 
1591   mg->pc_precision = 0.0;
1592   mg->pc_r_norm = 0.0;
1593 
1594   mg->n_levels_post = 0;
1595   mg->setup_data = NULL;
1596 
1597   BFT_REALLOC(mg->lv_info, mg->n_levels_max, cs_multigrid_level_info_t);
1598 
1599   for (int ii = 0; ii < mg->n_levels_max; ii++)
1600     _multigrid_level_info_init(mg->lv_info + ii);
1601 
1602   mg->caller_n_ranks = cs_glob_n_ranks;
1603 
1604 #if defined(HAVE_MPI)
1605   mg->merge_mean_threshold = 0;
1606   mg->merge_glob_threshold = 1;
1607   mg->merge_stride = _k_cycle_hpc_merge_stride;
1608 #endif
1609 
1610 #if defined(HAVE_MPI)
1611   if (parent != NULL) {
1612     mg->comm = parent->comm;
1613     mg->caller_comm = parent->comm;
1614   }
1615 #endif /* defined(HAVE_MPI) */
1616 
1617   /* local k-cycle for level 0 smoother,
1618      k-cycle preconditioning for level 1 solver (such as FCG) */
1619 
1620   mg->lv_mg[0] = _multigrid_create_k_cycle_bottom_smoother(mg);
1621   mg->lv_mg[2] = _multigrid_create_k_cycle_bottom_coarsest(mg);
1622 
1623   mg->lv_mg[0]->p_mg = parent;
1624   if (mg->lv_mg[2] != NULL)
1625     mg->lv_mg[2]->p_mg = mg;
1626 
1627   int pc_degree_bottom = (mg->lv_mg[2] != NULL) ? -1 : 1;
1628 
1629   cs_multigrid_set_solver_options
1630     (mg,
1631      CS_SLES_N_SMOOTHER_TYPES, CS_SLES_N_SMOOTHER_TYPES, CS_SLES_FCG,
1632      1,   /* n max cycles */
1633      1,   /* n max iter for descent */
1634      1,   /* n max iter for ascent */
1635      500, /* n max iter coarse */
1636      -1, -1, pc_degree_bottom,   /* precond degree */
1637      1, 1, 1);  /* precision multiplier */
1638 
1639   return mg;
1640 }
1641 
1642 /*----------------------------------------------------------------------------
1643  * Allocate working array for coarse right hand sides and corrections.
1644  *
1645  * parameters:
1646  *   mg      <-> pointer to multigrid solver info and context
1647  *   stride  <-- matrix fill stride
1648  *----------------------------------------------------------------------------*/
1649 
1650 static void
_multigrid_setup_sles_work_arrays(cs_multigrid_t * mg,cs_lnum_t stride)1651 _multigrid_setup_sles_work_arrays(cs_multigrid_t  *mg,
1652                                   cs_lnum_t        stride)
1653 {
1654   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1655 
1656   unsigned n0 = 0, n1 = 2;
1657   if (   mg->type >= CS_MULTIGRID_K_CYCLE
1658       && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) {
1659     n0 = 4;
1660     n1 = n0 + 6;
1661   }
1662 
1663   BFT_MALLOC(mgd->rhs_vx, mgd->n_levels*n1, cs_real_t *);
1664 
1665   for (unsigned i = 0; i < n1; i++)
1666     mgd->rhs_vx[i] = NULL;
1667 
1668   if (mgd->n_levels > 1) {
1669 
1670     size_t wr_size0 = 0, wr_size1 = 0;
1671     for (unsigned i = 0; i < mgd->n_levels; i++) {
1672       size_t block_size
1673         = cs_grid_get_n_cols_max(mgd->grid_hierarchy[i])*stride;
1674       block_size = CS_SIMD_SIZE(block_size);
1675       if (i == 0)
1676         wr_size0 += block_size;
1677       else
1678         wr_size1 += block_size;
1679     }
1680 
1681     BFT_MALLOC(mgd->rhs_vx_buf, wr_size0*n0 + wr_size1*n1, cs_real_t);
1682 
1683     size_t block_size_shift = 0;
1684 
1685     for (unsigned i = 0; i < mgd->n_levels; i++) {
1686       size_t block_size
1687         = cs_grid_get_n_cols_max(mgd->grid_hierarchy[i])*stride;
1688       cs_lnum_t n = (i == 0) ? n0 : n1;
1689       for (int j = 0; j < n; j++) {
1690         mgd->rhs_vx[i*n1 + j] = mgd->rhs_vx_buf+ block_size_shift;
1691         block_size_shift += block_size;
1692       }
1693     }
1694 
1695   }
1696 }
1697 
1698 /*----------------------------------------------------------------------------
1699  * Setup multigrid sparse linear equation solvers on existing hierarchy.
1700  *
1701  * parameters:
1702  *   mg        <-> pointer to multigrid solver info and context
1703  *   name      <-- linear system name
1704  *   verbosity <-- associated verbosity
1705  *----------------------------------------------------------------------------*/
1706 
1707 static void
_multigrid_setup_sles_k_cycle_bottom(cs_multigrid_t * mg,const char * name,int verbosity)1708 _multigrid_setup_sles_k_cycle_bottom(cs_multigrid_t  *mg,
1709                                      const char      *name,
1710                                      int              verbosity)
1711 {
1712   cs_timer_t t0, t1;
1713 
1714   cs_multigrid_level_info_t *mg_lv_info;
1715   const cs_grid_t *g;
1716   const cs_matrix_t *m;
1717 
1718   size_t l = strlen(name) + 32;
1719   char *_name;
1720   BFT_MALLOC(_name, l, char);
1721 
1722   /* Initialization */
1723 
1724   t0 = cs_timer_time();
1725 
1726   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1727 
1728   cs_lnum_t stride = 1; /* For diagonal blocks */
1729 
1730   /* Prepare solver context */
1731 
1732   assert(mgd->n_levels == 2);
1733 
1734   unsigned i = 0;
1735 
1736   assert(mg->subtype == CS_MULTIGRID_BOTTOM);
1737   assert(mg->type == CS_MULTIGRID_V_CYCLE);
1738 
1739   g = mgd->grid_hierarchy[i];
1740   m = cs_grid_get_matrix(g);
1741 
1742   mg_lv_info = mg->lv_info + i;
1743 
1744   t1 = cs_timer_time();
1745   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1746 
1747   /* Intermediate grids */
1748 
1749   t0 = t1;
1750 
1751   g = mgd->grid_hierarchy[0];
1752   m = cs_grid_get_matrix(g);
1753 
1754   mg_lv_info = mg->lv_info;
1755 
1756   cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[0]);
1757 
1758   if (mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) {
1759     mg_sles->context
1760       = cs_multigrid_smoother_create(mg->info.type[0],
1761                                      mg->info.poly_degree[0],
1762                                      mg->info.n_max_iter[0]);
1763     mg_sles->setup_func = cs_multigrid_smoother_setup;
1764     mg_sles->solve_func = cs_multigrid_smoother_solve;
1765     mg_sles->destroy_func = cs_sles_it_destroy;
1766   }
1767   else if (i == 0 && mg->lv_mg[0] != NULL) {
1768     assert(mg_sles->context == NULL);
1769     mg_sles->context = mg->lv_mg[0];
1770     mg_sles->setup_func = _multigrid_setup_k_local_smoothe;
1771     mg_sles->solve_func = cs_multigrid_solve;
1772     mg_sles->destroy_func = NULL;
1773   }
1774 
1775   snprintf(_name, l-1, "%s:smoother:%d", name, i);
1776   _name[l-1] = '\0';
1777 
1778   mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
1779 #if defined(HAVE_MPI)
1780   if (mg_sles->solve_func == cs_sles_it_solve) {
1781     cs_sles_it_t  *context = mg_sles->context;
1782     cs_sles_it_set_mpi_reduce_comm(context,
1783                                    MPI_COMM_NULL,
1784                                    MPI_COMM_NULL);
1785   }
1786 #endif
1787 
1788   t1 = cs_timer_time();
1789   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1790 
1791   /* Coarsest grid */
1792 
1793   t0 = t1;
1794 
1795   g = mgd->grid_hierarchy[1];
1796   m = cs_grid_get_matrix(g);
1797 
1798   mg_lv_info = mg->lv_info + 1;
1799 
1800   mg_sles = &(mgd->sles_hierarchy[2]);
1801   mg_sles->context
1802     = cs_sles_it_create(mg->info.type[2],
1803                         mg->info.poly_degree[2],
1804                         mg->info.n_max_iter[2],
1805                         false); /* stats not updated here */
1806   mg_sles->setup_func = cs_sles_it_setup;
1807   mg_sles->solve_func = cs_sles_it_solve;
1808   mg_sles->destroy_func = cs_sles_it_destroy;
1809 
1810   if (mg->lv_mg[2] != NULL) {
1811     cs_sles_pc_t *pc = _pc_create_from_mg_sub(mg->lv_mg[2]);
1812     cs_sles_it_transfer_pc(mg_sles->context, &pc);
1813   }
1814 
1815 #if defined(HAVE_MPI)
1816   {
1817     cs_sles_it_t  *context = mg_sles->context;
1818     cs_sles_it_set_mpi_reduce_comm(context,
1819                                    cs_grid_get_comm(mgd->grid_hierarchy[1]),
1820                                    mg->comm);
1821   }
1822 #endif
1823 
1824   snprintf(_name, l-1, "%s:coarse:%d", name, i);
1825   _name[l-1] = '\0';
1826 
1827   mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
1828 
1829   /* Diagonal block size is the same for all levels */
1830 
1831   const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(m);
1832   stride = db_size[1];
1833 
1834   _multigrid_setup_sles_work_arrays(mg, stride);
1835 
1836   BFT_FREE(_name);
1837 
1838   /* Timing */
1839 
1840   t1 = cs_timer_time();
1841   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1842 }
1843 
1844 /*----------------------------------------------------------------------------
1845  * Setup multigrid sparse linear equation solvers on existing hierarchy.
1846  *
1847  * parameters:
1848  *   mg        <-> pointer to multigrid solver info and context
1849  *   name      <-- linear system name
1850  *   verbosity <-- associated verbosity
1851  *----------------------------------------------------------------------------*/
1852 
1853 static void
_multigrid_setup_sles(cs_multigrid_t * mg,const char * name,int verbosity)1854 _multigrid_setup_sles(cs_multigrid_t  *mg,
1855                       const char      *name,
1856                       int              verbosity)
1857 {
1858   cs_timer_t t0, t1;
1859 
1860   cs_multigrid_level_info_t *mg_lv_info;
1861   const cs_grid_t *g;
1862   const cs_matrix_t *m;
1863 
1864   size_t l = strlen(name) + 32;
1865   char *_name;
1866   BFT_MALLOC(_name, l, char);
1867 
1868   /* Initialization */
1869 
1870   t0 = cs_timer_time();
1871 
1872   cs_multigrid_setup_data_t *mgd = mg->setup_data;
1873 
1874   cs_lnum_t stride = 1; /* For diagonal blocks */
1875 
1876   /* Prepare solver context */
1877 
1878   unsigned n_levels = mgd->n_levels;
1879 
1880   unsigned i = 0;
1881 
1882   g = mgd->grid_hierarchy[i];
1883   m = cs_grid_get_matrix(g);
1884 
1885   mg_lv_info = mg->lv_info + i;
1886 
1887   t1 = cs_timer_time();
1888   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1889 
1890   /* Intermediate grids */
1891 
1892   for (i = 0; i < n_levels - 1; i++) {
1893 
1894     t0 = t1;
1895 
1896     g = mgd->grid_hierarchy[i];
1897     m = cs_grid_get_matrix(g);
1898 
1899     mg_lv_info = mg->lv_info + i;
1900 
1901     int n_ops = 2;
1902     if (i == 0) {
1903       if (mg->type == CS_MULTIGRID_V_CYCLE)
1904         n_ops = 1;
1905     }
1906 
1907     for (int j = 0; j < n_ops; j++) {
1908       if (   mg->info.type[j] != CS_SLES_N_IT_TYPES
1909           && mg->info.type[j] < CS_SLES_N_SMOOTHER_TYPES) {
1910         cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[i*2 + j]);
1911         mg_sles->context
1912           = cs_multigrid_smoother_create(mg->info.type[j],
1913                                          mg->info.poly_degree[j],
1914                                          mg->info.n_max_iter[j]);
1915         mg_sles->setup_func = cs_multigrid_smoother_setup;
1916         mg_sles->solve_func = cs_multigrid_smoother_solve;
1917         mg_sles->destroy_func = cs_sles_it_destroy;
1918 
1919         /* Share context between descent and ascent smoothers if both
1920            are of the cs_sles_it type */
1921         if (j == 1) {
1922           if (   mg->info.type[0] != CS_SLES_N_IT_TYPES
1923               && mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES)
1924             cs_sles_it_set_shareable(mgd->sles_hierarchy[i*2 + 1].context,
1925                                      mgd->sles_hierarchy[i*2].context);
1926         }
1927       }
1928     }
1929 
1930     if (i == 0 && mg->lv_mg[0] != NULL) {
1931       cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[0]);
1932       assert(mg->subtype == CS_MULTIGRID_BOTTOM);
1933       assert(mg->type == CS_MULTIGRID_V_CYCLE);
1934       assert(n_ops == 1);
1935       assert(mg_sles->context == NULL);
1936       mg_sles->context = mg->lv_mg[0];
1937       mg_sles->setup_func = _multigrid_setup_k_local_smoothe;
1938       mg_sles->solve_func = cs_multigrid_solve;
1939       mg_sles->destroy_func = NULL;
1940     }
1941 
1942     for (int j = 0; j < n_ops; j++) {
1943       if (j == 0)
1944         snprintf(_name, l-1, "%s:descent:%d", name, i);
1945       else
1946         snprintf(_name, l-1, "%s:ascent:%d", name, i);
1947       _name[l-1] = '\0';
1948       cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[i*2 + j]);
1949       mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
1950 #if defined(HAVE_MPI)
1951       if (mg_sles->solve_func == cs_sles_it_solve) {
1952         cs_sles_it_t  *context = mg_sles->context;
1953         MPI_Comm lv_comm = cs_grid_get_comm(mgd->grid_hierarchy[i]);
1954         cs_sles_it_set_mpi_reduce_comm(context,
1955                                        lv_comm,
1956                                        mg->comm);
1957       }
1958 #endif
1959     }
1960 
1961     t1 = cs_timer_time();
1962     cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1963 
1964   }
1965 
1966   /* Coarsest grid */
1967 
1968   if (n_levels > 1) {
1969 
1970     t0 = t1;
1971 
1972     i = n_levels - 1;
1973 
1974     g = mgd->grid_hierarchy[i];
1975     m = cs_grid_get_matrix(g);
1976 
1977     mg_lv_info = mg->lv_info + i;
1978 
1979     cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[i*2]);
1980     mg_sles->context
1981       = cs_sles_it_create(mg->info.type[2],
1982                           mg->info.poly_degree[2],
1983                           mg->info.n_max_iter[2],
1984                           false); /* stats not updated here */
1985     mg_sles->setup_func = cs_sles_it_setup;
1986     mg_sles->solve_func = cs_sles_it_solve;
1987     mg_sles->destroy_func = cs_sles_it_destroy;
1988 
1989     if (mg->lv_mg[2] != NULL) {
1990       cs_sles_pc_t *pc = _pc_create_from_mg_sub(mg->lv_mg[2]);
1991       cs_sles_it_transfer_pc(mg_sles->context, &pc);
1992     }
1993 
1994 #if defined(HAVE_MPI)
1995     {
1996       cs_sles_it_t  *context = mg_sles->context;
1997       cs_sles_it_set_mpi_reduce_comm(context,
1998                                      cs_grid_get_comm(mgd->grid_hierarchy[i]),
1999                                      mg->comm);
2000     }
2001 #endif
2002 
2003     snprintf(_name, l-1, "%s:coarse:%d", name, i);
2004     _name[l-1] = '\0';
2005 
2006     mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
2007 
2008     /* Diagonal block size is the same for all levels */
2009 
2010     const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(m);
2011     stride = db_size[1];
2012 
2013   }
2014 
2015   _multigrid_setup_sles_work_arrays(mg, stride);
2016 
2017   BFT_FREE(_name);
2018 
2019   /* Timing */
2020 
2021   t1 = cs_timer_time();
2022   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
2023 }
2024 
2025 /*----------------------------------------------------------------------------*/
2026 /*!
2027  * \brief Setup multigrid sparse linear equation solver.
2028  *
2029  * \param[in, out]  context    pointer to multigrid solver info and context
2030  *                             (actual type: cs_multigrid_t  *)
2031  * \param[in]       name       pointer to name of linear system
2032  * \param[in]       mesh       associated mesh (for visualization), or NULL
2033  * \param[in, out]  f          associated fine grid
2034  * \param[in]       verbosity  associated verbosity
2035  */
2036 /*----------------------------------------------------------------------------*/
2037 
2038 static void
_setup_hierarchy(void * context,const char * name,const cs_mesh_t * mesh,cs_grid_t * f,int verbosity)2039 _setup_hierarchy(void             *context,
2040                  const char       *name,
2041                  const cs_mesh_t  *mesh,
2042                  cs_grid_t        *f,
2043                  int               verbosity)
2044 
2045 {
2046   cs_multigrid_t  *mg = context;
2047 
2048   cs_timer_t t0, t1, t2;
2049 
2050   int n_coarse_ranks = -2, n_coarse_ranks_prev = -2; /* for comparison only */
2051   cs_lnum_t n_rows = 0, n_cols_ext = 0, n_entries = 0;
2052   cs_gnum_t n_g_rows = 0, n_g_rows_prev = 0;
2053 
2054   cs_multigrid_level_info_t *mg_lv_info = NULL;
2055 
2056   cs_grid_t *g = f;
2057 
2058   t0 = cs_timer_time();
2059 
2060   /* Initialization */
2061 
2062   mg->setup_data = _multigrid_setup_data_create();
2063 
2064   _multigrid_add_level(mg, f); /* Assign to hierarchy */
2065 
2066   /* Add info */
2067 
2068   cs_grid_get_info(f,
2069                    NULL,
2070                    NULL,
2071                    NULL,
2072                    NULL,
2073                    NULL,
2074                    &n_rows,
2075                    &n_cols_ext,
2076                    &n_entries,
2077                    &n_g_rows);
2078 
2079   mg_lv_info = mg->lv_info;
2080 
2081   t1 = cs_timer_time();
2082   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
2083 
2084   bool add_grid = true;
2085 
2086   while (add_grid) {
2087 
2088     n_g_rows_prev = n_g_rows;
2089     n_coarse_ranks_prev = n_coarse_ranks;
2090 
2091     /* Recursion test */
2092 
2093     if ((int)(mg->setup_data->n_levels) >= mg->n_levels_max)
2094       break;
2095 
2096     /* Build coarser grid from previous grid */
2097 
2098     if (verbosity > 2)
2099       bft_printf(_("\n   building level %2u grid\n"), mg->setup_data->n_levels);
2100 
2101     if (mg->subtype == CS_MULTIGRID_BOTTOM)
2102       g = cs_grid_coarsen_to_single(g, mg->merge_stride, verbosity);
2103 
2104     else
2105       g = cs_grid_coarsen(g,
2106                           mg->coarsening_type,
2107                           mg->aggregation_limit,
2108                           verbosity,
2109                           mg->merge_stride,
2110                           mg->merge_mean_threshold,
2111                           mg->merge_glob_threshold,
2112                           mg->p0p1_relax);
2113 
2114     bool symmetric = true;
2115     int grid_lv;
2116 
2117     cs_grid_get_info(g,
2118                      &grid_lv,
2119                      &symmetric,
2120                      NULL,
2121                      NULL,
2122                      &n_coarse_ranks,
2123                      &n_rows,
2124                      &n_cols_ext,
2125                      &n_entries,
2126                      &n_g_rows);
2127 
2128 #if defined(HAVE_MPI)
2129     if ((n_coarse_ranks != mg->caller_n_ranks) && (mg->caller_n_ranks > 1)) {
2130       cs_gnum_t _n_g_rows = n_g_rows;
2131       MPI_Allreduce(&_n_g_rows, &n_g_rows, 1, CS_MPI_GNUM, MPI_MAX, mg->caller_comm);
2132     }
2133 #endif
2134 
2135     assert((unsigned)grid_lv == mg->setup_data->n_levels);
2136 
2137     /* If too few rows were grouped, we stop at this level */
2138 
2139     if (mg->setup_data->n_levels > 1) {
2140       if (   (n_g_rows < mg->n_g_rows_min)
2141           || (   n_g_rows > (0.8 * n_g_rows_prev)
2142               && n_coarse_ranks == n_coarse_ranks_prev)) {
2143         add_grid = false;
2144         cs_grid_destroy(&g); /* too coarse, not added */
2145       }
2146     }
2147 
2148     if (add_grid) {
2149 
2150       _multigrid_add_level(mg, g); /* Assign to hierarchy */
2151 
2152       /* Print coarse mesh stats */
2153 
2154       if (verbosity > 2) {
2155 
2156 #if defined(HAVE_MPI)
2157 
2158         if (mg->caller_n_ranks > 1) {
2159 
2160           int lcount[2], gcount[2];
2161           int n_c_min, n_c_max, n_f_min, n_f_max;
2162 
2163           lcount[0] = n_rows; lcount[1] = n_entries;
2164           MPI_Allreduce(lcount, gcount, 2, MPI_INT, MPI_MAX,
2165                         mg->caller_comm);
2166           n_c_max = gcount[0]; n_f_max = gcount[1];
2167 
2168           lcount[0] = n_rows; lcount[1] = n_entries;
2169           MPI_Allreduce(lcount, gcount, 2, MPI_INT, MPI_MIN,
2170                         mg->caller_comm);
2171           n_c_min = gcount[0]; n_f_min = gcount[1];
2172 
2173           bft_printf
2174             (_("                                  total       min        max\n"
2175                "     number of rows:      %12llu %10d %10d\n"
2176                "     number of entries:                %10d %10d\n"),
2177              (unsigned long long)n_g_rows, n_c_min, n_c_max, n_f_min, n_f_max);
2178       }
2179 
2180 #endif
2181 
2182         if (mg->caller_n_ranks == 1)
2183           bft_printf(_("     number of rows:      %10d\n"
2184                        "     number of entries:   %10d\n"),
2185                      (int)n_rows, (int)n_entries);
2186 
2187       }
2188 
2189       mg_lv_info = mg->lv_info + grid_lv;
2190       mg_lv_info->n_ranks[0] = n_coarse_ranks;
2191       mg_lv_info->n_elts[0][0] = n_rows;
2192       mg_lv_info->n_elts[1][0] = n_cols_ext;
2193       mg_lv_info->n_elts[2][0] = n_entries;
2194 
2195     } /* end adding grid */
2196 
2197     t2 = cs_timer_time();
2198     cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t1, &t2);
2199     t1 = t2;
2200 
2201   }
2202 
2203   /* Print final info */
2204 
2205   if (verbosity > 1)
2206     bft_printf
2207       (_("   number of grid levels:           %u\n"
2208          "   number of rows in coarsest grid: %llu\n\n"),
2209        mg->setup_data->n_levels, (unsigned long long)n_g_rows);
2210 
2211   /* Prepare preprocessing info if necessary */
2212 
2213   if (mg->post_row_max > 0) {
2214     if (mg->info.n_calls[0] == 0) {
2215       int l_id = 0;
2216       const cs_matrix_t *a = cs_grid_get_matrix(f);
2217       const cs_lnum_t n_rows_a = cs_matrix_get_n_rows(a);
2218       if (n_rows_a == mesh->n_cells)
2219         l_id = CS_MESH_LOCATION_CELLS;
2220       else if (n_rows_a <= mesh->n_vertices) {
2221         l_id = CS_MESH_LOCATION_VERTICES;
2222         if (mesh->vtx_range_set != NULL) {
2223           if (mesh->vtx_range_set->n_elts[0] != n_rows_a)
2224             l_id = CS_MESH_LOCATION_NONE;
2225         }
2226       }
2227 #if defined(HAVE_MPI)
2228       if (mg->caller_n_ranks > 1) {
2229         int _l_id = l_id;
2230         MPI_Allreduce(&_l_id, &l_id, 1, MPI_INT, MPI_MAX, mg->caller_comm);
2231         if (l_id != _l_id)
2232           _l_id = CS_MESH_LOCATION_NONE;
2233         MPI_Allreduce(&_l_id, &l_id, 1, MPI_INT, MPI_MIN, mg->caller_comm);
2234       }
2235 #endif
2236       if (l_id != CS_MESH_LOCATION_NONE) {
2237         cs_post_add_time_dep_output(_cs_multigrid_post_function, (void *)mg);
2238         _multigrid_add_post(mg, name, l_id, n_rows_a);
2239       }
2240     }
2241   }
2242 
2243   /* Update info */
2244 
2245 #if defined(HAVE_MPI)
2246 
2247   /* In parallel, get global (average) values from local values */
2248 
2249   if (mg->caller_n_ranks > 1) {
2250 
2251     int i, j;
2252     cs_gnum_t *_n_elts_l = NULL, *_n_elts_s = NULL, *_n_elts_m = NULL;
2253     int grid_lv = mg->setup_data->n_levels;
2254 
2255     BFT_MALLOC(_n_elts_l, 3*grid_lv, cs_gnum_t);
2256     BFT_MALLOC(_n_elts_s, 3*grid_lv, cs_gnum_t);
2257     BFT_MALLOC(_n_elts_m, 3*grid_lv, cs_gnum_t);
2258 
2259     for (i = 0; i < grid_lv; i++) {
2260       cs_multigrid_level_info_t *mg_inf = mg->lv_info + i;
2261       for (j = 0; j < 3; j++)
2262         _n_elts_l[i*3 + j] = mg_inf->n_elts[j][0];
2263     }
2264 
2265     MPI_Allreduce(_n_elts_l, _n_elts_s, 3*grid_lv, CS_MPI_GNUM, MPI_SUM,
2266                   mg->caller_comm);
2267     MPI_Allreduce(_n_elts_l, _n_elts_m, 3*grid_lv, CS_MPI_GNUM, MPI_MAX,
2268                   mg->caller_comm);
2269 
2270     for (i = 0; i < grid_lv; i++) {
2271       cs_multigrid_level_info_t *mg_inf = mg->lv_info + i;
2272       cs_gnum_t n_g_ranks = mg_inf->n_ranks[0];
2273       for (j = 0; j < 3; j++) {
2274         cs_gnum_t tmp_max = n_g_ranks * _n_elts_m[i*3+j];
2275         mg_inf->n_elts[j][0] = (_n_elts_s[i*3+j] + n_g_ranks/2) / n_g_ranks;
2276         mg_inf->imbalance[j][0] = (float)(tmp_max*1.0/_n_elts_s[i*3+j]);
2277       }
2278     }
2279 
2280     BFT_FREE(_n_elts_m);
2281     BFT_FREE(_n_elts_s);
2282     BFT_FREE(_n_elts_l);
2283 
2284   }
2285 
2286 #endif
2287 
2288   mg->info.n_levels_tot += mg->setup_data->n_levels;
2289 
2290   mg->info.n_levels[0] = mg->setup_data->n_levels;
2291 
2292   if (mg->info.n_calls[0] > 0) {
2293     if (mg->info.n_levels[0] < mg->info.n_levels[1])
2294       mg->info.n_levels[1] = mg->info.n_levels[0];
2295     if (mg->info.n_levels[0] > mg->info.n_levels[2])
2296       mg->info.n_levels[2] = mg->info.n_levels[0];
2297   }
2298   else {
2299     mg->info.n_levels[1] = mg->info.n_levels[0];
2300     mg->info.n_levels[2] = mg->info.n_levels[0];
2301   }
2302 
2303   mg->info.n_calls[0] += 1;
2304 
2305   /* Cleanup temporary interpolation arrays */
2306 
2307   for (unsigned i = 0; i < mg->setup_data->n_levels; i++)
2308     cs_grid_free_quantities(mg->setup_data->grid_hierarchy[i]);
2309 
2310   /* Setup solvers */
2311 
2312   if (mg->subtype == CS_MULTIGRID_BOTTOM)
2313     _multigrid_setup_sles_k_cycle_bottom(mg, name, verbosity);
2314   else
2315     _multigrid_setup_sles(mg, name, verbosity);
2316 
2317   /* Update timers */
2318 
2319   t2 = cs_timer_time();
2320   cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t2);
2321 }
2322 
2323 /*----------------------------------------------------------------------------*/
2324 /*!
2325  * \brief Setup coarse multigrid for k cycle HPC variant.
2326  *
2327  * \param[in, out]  mg         pointer to multigrid solver info and context
2328  * \param[in]       name       pointer to name of linear system
2329  * \param[in]       a          associated matrix
2330  * \param[in]       n_ranks    associated number of MPI ranks
2331  * \param[in]       verbosity  associated verbosity
2332  */
2333 /*----------------------------------------------------------------------------*/
2334 
2335 static void
_setup_k_cycle_hpc_sub(cs_multigrid_t * mg,const char * name,const cs_matrix_t * a,int n_ranks,int verbosity)2336 _setup_k_cycle_hpc_sub(cs_multigrid_t     *mg,
2337                        const char         *name,
2338                        const cs_matrix_t  *a,
2339                        int                 n_ranks,
2340                        int                 verbosity)
2341 {
2342   /* Destroy previous hierarchy if necessary */
2343 
2344   if (mg->setup_data != NULL)
2345     cs_multigrid_free(mg);
2346 
2347   /* Initialization */
2348 
2349   cs_timer_t t0 = cs_timer_time();
2350 
2351   if (verbosity > 1)
2352     bft_printf(_("\n Construction of grid hierarchy for \"%s\"\n"),
2353                name);
2354 
2355   /* Build coarse grids hierarchy */
2356   /*------------------------------*/
2357 
2358   cs_grid_t *f = cs_grid_create_from_parent(a, n_ranks);
2359 
2360   cs_multigrid_level_info_t *mg_lv_info = mg->lv_info;
2361 
2362   cs_timer_t t1 = cs_timer_time();
2363   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
2364 
2365   _setup_hierarchy(mg, name, NULL, f, verbosity); /* Assign to and build
2366                                                      hierarchy */
2367 
2368   /* Update timers */
2369 
2370   t1 = cs_timer_time();
2371   cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1);
2372 }
2373 
2374 /*----------------------------------------------------------------------------
2375  * Compute dot product, summing result over all ranks.
2376  *
2377  * parameters:
2378  *   mg <-- pointer to solver context info
2379  *   n  <-- local number of elements
2380  *   x  <-- vector in s = x.x
2381  *
2382  * returns:
2383  *   result of s = x.x
2384  *----------------------------------------------------------------------------*/
2385 
2386 inline static double
_dot_xx(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x)2387 _dot_xx(const cs_multigrid_t  *mg,
2388         cs_lnum_t              n,
2389         const cs_real_t       *x)
2390 {
2391   double s = cs_dot_xx(n, x);
2392 
2393 #if defined(HAVE_MPI)
2394 
2395   if (mg->comm != MPI_COMM_NULL) {
2396     double _sum;
2397     MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, mg->comm);
2398     s = _sum;
2399   }
2400 
2401 #endif /* defined(HAVE_MPI) */
2402 
2403   return s;
2404 }
2405 
2406 /*----------------------------------------------------------------------------
2407  * Compute 2 dot products x.x and y.y, summing result over all ranks.
2408  *
2409  * parameters:
2410  *   mg <-- pointer to solver context info
2411  *   n  <-- number of associated values
2412  *   x  <-- vector in s1 = x.x
2413  *   y  <-- vector in s2 = y.y
2414  *   s1 --> result of s1 = x.x
2415  *   s2 --> result of s2 = y.y
2416  *----------------------------------------------------------------------------*/
2417 
2418 inline static void
_dot_xx_yy(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x,const cs_real_t * y,double * s1,double * s2)2419 _dot_xx_yy(const cs_multigrid_t  *mg,
2420            cs_lnum_t              n,
2421            const cs_real_t       *x,
2422            const cs_real_t       *y,
2423            double                *s1,
2424            double                *s2)
2425 {
2426   double s[2];
2427 
2428   s[0] = cs_dot_xx(n, x);
2429   s[1] = cs_dot_xx(n, y);
2430 
2431 #if defined(HAVE_MPI)
2432 
2433   if (mg->comm != MPI_COMM_NULL) {
2434     double _sum[2];
2435     MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, mg->comm);
2436     s[0] = _sum[0];
2437     s[1] = _sum[1];
2438   }
2439 
2440 #endif /* defined(HAVE_MPI) */
2441 
2442   *s1 = s[0];
2443   *s2 = s[1];
2444 }
2445 
2446 /*----------------------------------------------------------------------------
2447  * Compute 2 dot products x.x and x.y, summing result over all ranks.
2448  *
2449  * parameters:
2450  *   mg <-- pointer to solver context info
2451  *   n  <-- number of associated values
2452  *   x  <-- vector in s1 = x.y
2453  *   y  <-- vector in s1 = x.y and s2 = y.z
2454  *   z  <-- vector in s2 = y.z
2455  *   s1 --> result of s1 = x.y
2456  *   s2 --> result of s2 = y.z
2457  *----------------------------------------------------------------------------*/
2458 
2459 inline static void
_dot_xy_yz(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x,const cs_real_t * y,const cs_real_t * z,double * s1,double * s2)2460 _dot_xy_yz(const cs_multigrid_t  *mg,
2461            cs_lnum_t              n,
2462            const cs_real_t       *x,
2463            const cs_real_t       *y,
2464            const cs_real_t       *z,
2465            double                *s1,
2466            double                *s2)
2467 {
2468   double s[2];
2469 
2470   cs_dot_xy_yz(n, x, y, z, s, s+1);
2471 
2472 #if defined(HAVE_MPI)
2473 
2474   if (mg->comm != MPI_COMM_NULL) {
2475     double _sum[2];
2476     MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, mg->comm);
2477     s[0] = _sum[0];
2478     s[1] = _sum[1];
2479   }
2480 
2481 #endif /* defined(HAVE_MPI) */
2482 
2483   *s1 = s[0];
2484   *s2 = s[1];
2485 }
2486 
2487 /*----------------------------------------------------------------------------
2488  * Compute 3 dot products x.u, xv, and x.w, summing result over all ranks.
2489  *
2490  * parameters:
2491  *   mg <-- pointer to solver context info
2492  *   n  <-- number of associated values
2493  *   x  <-- vector in s1 = x.u, s2 = x.v, s3 = x.w
2494  *   u <-- vector in s1 = x.u
2495  *   v <-- vector in s2 = x.v
2496  *   w <-- vector in s2 = x.w
2497  *   s1 --> result of s1 = x.y
2498  *   s2 --> result of s2 = y.z
2499  *----------------------------------------------------------------------------*/
2500 
2501 inline static void
_dot_xu_xv_xw(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x,const cs_real_t * u,const cs_real_t * v,const cs_real_t * w,double * s1,double * s2,double * s3)2502 _dot_xu_xv_xw(const cs_multigrid_t  *mg,
2503               cs_lnum_t              n,
2504               const cs_real_t       *x,
2505               const cs_real_t       *u,
2506               const cs_real_t       *v,
2507               const cs_real_t       *w,
2508               double                *s1,
2509               double                *s2,
2510               double                *s3)
2511 {
2512   double s[3];
2513 
2514   /* Use two separate call as cs_blas.c does not yet hav matching call */
2515   cs_dot_xy_yz(n, u, x, v, s, s+1);
2516   s[2] = cs_dot(n, x, w);
2517 
2518 #if defined(HAVE_MPI)
2519 
2520   if (mg->comm != MPI_COMM_NULL) {
2521     double _sum[3];
2522     MPI_Allreduce(s, _sum, 3, MPI_DOUBLE, MPI_SUM, mg->comm);
2523     s[0] = _sum[0];
2524     s[1] = _sum[1];
2525     s[2] = _sum[2];
2526   }
2527 
2528 #endif /* defined(HAVE_MPI) */
2529 
2530   *s1 = s[0];
2531   *s2 = s[1];
2532   *s3 = s[2];
2533 }
2534 
2535 /*----------------------------------------------------------------------------
2536  * Test if convergence is attained.
2537  *
2538  * parameters:
2539  *   mg              <-- associated multigrid structure
2540  *   var_name        <-- variable name
2541  *   n_f_rows       <-- number of rows on fine mesh
2542  *   n_max_cycles    <-- maximum number of cycles
2543  *   cycle_id        <-- number of current cycle
2544  *
2545  *   verbosity       <-- verbosity level
2546  *   n_iters         <-- number of iterations
2547  *   precision       <-- precision limit
2548  *   r_norm          <-- residue normalization
2549  *   initial_residue <-- initial residue
2550  *   residue         <-> residue
2551  *   rhs             <-- right-hand side
2552  *
2553  * returns:
2554  *   convergence status
2555  *----------------------------------------------------------------------------*/
2556 
2557 static cs_sles_convergence_state_t
_convergence_test(cs_multigrid_t * mg,const char * var_name,cs_lnum_t n_f_rows,int n_max_cycles,int cycle_id,int verbosity,int n_iters,double precision,double r_norm,double initial_residue,double * residue,const cs_real_t rhs[])2558 _convergence_test(cs_multigrid_t        *mg,
2559                   const char            *var_name,
2560                   cs_lnum_t              n_f_rows,
2561                   int                    n_max_cycles,
2562                   int                    cycle_id,
2563                   int                    verbosity,
2564                   int                    n_iters,
2565                   double                 precision,
2566                   double                 r_norm,
2567                   double                 initial_residue,
2568                   double                *residue,
2569                   const cs_real_t        rhs[])
2570 {
2571   const char cycle_h_fmt[]
2572     = N_("  ---------------------------------------------------\n"
2573          "    n.     | Cumulative iterations | Norm. residual\n"
2574          "    cycles | on fine mesh          | on fine mesh\n"
2575          "  ---------------------------------------------------\n");
2576   const char cycle_t_fmt[]
2577     = N_("  ---------------------------------------------------\n");
2578   const char cycle_cv_fmt[]
2579     = N_("     %4d  |               %6d  |  %12.4e\n");
2580 
2581   const char cycle_fmt[]
2582     = N_("   N. cycles: %4d; Fine mesh cumulative iter: %5d; "
2583          "Norm. residual %12.4e\n");
2584 
2585   /* Compute residue */
2586 
2587   *residue = sqrt(_dot_xx(mg, n_f_rows, rhs));
2588 
2589   if (cycle_id == 1)
2590     initial_residue = *residue;
2591 
2592   /* Plot convergence if requested */
2593 
2594   if (mg->cycle_plot != NULL) {
2595     double vals = *residue;
2596     double wall_time = cs_timer_wtime();
2597     mg->plot_time_stamp += 1;
2598     cs_time_plot_vals_write(mg->cycle_plot,
2599                             mg->plot_time_stamp,
2600                             wall_time,
2601                             1,
2602                             &vals);
2603   }
2604 
2605   if (*residue < precision*r_norm) {
2606 
2607     if (verbosity == 2)
2608       bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm);
2609     else if (verbosity > 2) {
2610       bft_printf(_(cycle_h_fmt));
2611       bft_printf(_(cycle_cv_fmt),
2612                  cycle_id, n_iters, *residue/r_norm);
2613       bft_printf(_(cycle_t_fmt));
2614     }
2615     return CS_SLES_CONVERGED;
2616   }
2617 
2618   else if (cycle_id > n_max_cycles) {
2619 
2620     if (  (verbosity > -1 && !(mg->info.is_pc || mg->subtype != CS_MULTIGRID_MAIN))
2621         || verbosity > 0) {
2622       if (verbosity == 1)
2623         bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm);
2624       else if (verbosity > 1) {
2625         bft_printf(_(cycle_fmt),
2626                    cycle_id, n_iters, *residue/r_norm);
2627         bft_printf(_(cycle_t_fmt));
2628       }
2629       bft_printf(_(" @@ Warning: algebraic multigrid for [%s]\n"
2630                    "    ********\n"
2631                    "    Maximum number of cycles (%d) reached.\n"),
2632                  var_name, n_max_cycles);
2633     }
2634     return CS_SLES_MAX_ITERATION;
2635   }
2636 
2637   else {
2638 
2639     if (*residue > initial_residue * 10000.0 && *residue > 100.) {
2640       if (verbosity > 2)
2641         bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm);
2642       return CS_SLES_DIVERGED;
2643     }
2644     else if (verbosity > 2) {
2645       if (cycle_id == 1)
2646         bft_printf(_(cycle_h_fmt));
2647       bft_printf(_(cycle_cv_fmt), cycle_id, n_iters, *residue/r_norm);
2648     }
2649 
2650 #if (__STDC_VERSION__ >= 199901L)
2651     if (isnan(*residue) || isinf(*residue))
2652       return CS_SLES_DIVERGED;
2653 #endif
2654   }
2655 
2656   return CS_SLES_ITERATING;
2657 }
2658 
2659 /*----------------------------------------------------------------------------
2660  * Log residue A.vx - Rhs
2661  *
2662  * parameters:
2663  *   mg              <-- pointer to multigrid context info
2664  *   int cycle_id    <-- cycle id
2665  *   var_name        <-- variable name
2666  *   a               <-- matrix
2667  *   rhs             <-- right hand side
2668  *   vx              <-> system solution
2669  *
2670  * returns:
2671  *   convergence state
2672  *----------------------------------------------------------------------------*/
2673 
2674 static void
_log_residual(const cs_multigrid_t * mg,int cycle_id,const char * var_name,const cs_matrix_t * a,const cs_real_t * rhs,cs_real_t * restrict vx)2675 _log_residual(const cs_multigrid_t   *mg,
2676               int                     cycle_id,
2677               const char             *var_name,
2678               const cs_matrix_t      *a,
2679               const cs_real_t        *rhs,
2680               cs_real_t              *restrict vx)
2681 {
2682   const cs_lnum_t *diag_block_size = cs_matrix_get_diag_block_size(a);
2683   const cs_lnum_t n_rows = cs_matrix_get_n_rows(a) * diag_block_size[0];
2684   const cs_lnum_t n_cols = cs_matrix_get_n_columns(a) * diag_block_size[0];
2685 
2686   cs_real_t  *r;
2687   BFT_MALLOC(r, n_cols, cs_real_t);
2688 
2689   cs_matrix_vector_multiply(a, vx, r);
2690 
2691   for (cs_lnum_t i = 0; i < n_rows; i++)
2692     r[i] -= rhs[i];
2693 
2694   double s = cs_dot_xx(n_rows, r);
2695 
2696   BFT_FREE(r);
2697 
2698 #if defined(HAVE_MPI)
2699 
2700   if (mg->comm != MPI_COMM_NULL) {
2701     double _sum;
2702     MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, mg->comm);
2703     s = _sum;
2704   }
2705 
2706 #endif /* defined(HAVE_MPI) */
2707 
2708   cs_log_printf(CS_LOG_DEFAULT, "  mg cycle %d: %s residual: %.3g\n",
2709                 cycle_id, var_name, s);
2710 }
2711 
2712 /*----------------------------------------------------------------------------
2713  * Update level information iteration counts
2714  *
2715  * parameters:
2716  *   lv_info_it <-> logged number of iterations (last, min, max, total)
2717  *   n_iter     <-- current number of iterations
2718  *----------------------------------------------------------------------------*/
2719 
2720 static inline void
_lv_info_update_stage_iter(unsigned long long lv_info_it[],unsigned n_iter)2721 _lv_info_update_stage_iter(unsigned long long  lv_info_it[],
2722                            unsigned            n_iter)
2723 {
2724   lv_info_it[0] = n_iter;
2725   if (n_iter < lv_info_it[1])
2726     lv_info_it[1] = n_iter;
2727   else if (n_iter > lv_info_it[2])
2728     lv_info_it[2] = n_iter;
2729   if (lv_info_it[1] == 0)
2730     lv_info_it[1] = n_iter;
2731   lv_info_it[3] += n_iter;
2732 }
2733 
2734 /*----------------------------------------------------------------------------
2735  * Compute buffer size required for level names
2736  *
2737  * parameters:
2738  *   name     <-- linear system name
2739  *   n_levels <-- number multigrid levels
2740  *
2741  * returns:
2742  *   buffer size needed for level names
2743  *----------------------------------------------------------------------------*/
2744 
2745 static size_t
_level_names_size(const char * name,int n_levels)2746 _level_names_size(const char  *name,
2747                   int          n_levels)
2748 {
2749   /* Format name width */
2750 
2751   int w = 1;
2752   for (int i = n_levels/10; i > 0; i /=10)
2753     w += 1;
2754 
2755   /* First part: pointers */
2756 
2757   size_t retval = n_levels*sizeof(char *)*2;
2758   retval = CS_SIMD_SIZE(retval);
2759 
2760   /* Second part: buffers */
2761   size_t buf_size = 0;
2762 
2763   if (n_levels > 1)
2764     buf_size =   (strlen(name) + strlen(":descent:") + w + 1)
2765                * (n_levels)*2;
2766   retval += CS_SIMD_SIZE(buf_size);
2767 
2768   return retval;
2769 }
2770 
2771 /*----------------------------------------------------------------------------
2772  * Initialize level names
2773  *
2774  * parameters:
2775  *   name     <-- linear system name
2776  *   n_levels <-- number multigrid levels
2777  *   buffer   <-- buffer
2778  *----------------------------------------------------------------------------*/
2779 
2780 static void
_level_names_init(const char * name,int n_levels,void * buffer)2781 _level_names_init(const char  *name,
2782                   int          n_levels,
2783                   void        *buffer)
2784 {
2785   /* Format name width */
2786 
2787   int w = 1;
2788   for (int i = n_levels/10; i > 0; i /=10)
2789     w += 1;
2790 
2791   /* First part: pointers */
2792 
2793   size_t ptr_size = n_levels*sizeof(char *)*2;
2794   ptr_size = CS_SIMD_SIZE(ptr_size);
2795 
2796   char *_buffer = buffer;
2797   char **_lv_names = buffer;
2798   const char **lv_names = (const char **)_lv_names;
2799   const size_t name_len = strlen(name) + strlen(":descent:") + w + 1;
2800 
2801   /* Second part: buffers */
2802 
2803   for (int i = 0; i < n_levels -1; i++) {
2804     lv_names[i*2] = _buffer + ptr_size + i*2*name_len;
2805     lv_names[i*2+1] = lv_names[i*2] + name_len;
2806     sprintf(_lv_names[i*2], "%s:descent:%0*d", name, w, i);
2807     sprintf(_lv_names[i*2+1], "%s:ascent:%0*d", name, w, i);
2808   }
2809 
2810   if (n_levels > 1) {
2811     int i = n_levels - 1;
2812     lv_names[i*2] = _buffer + ptr_size + i*2*name_len;
2813     lv_names[i*2+1] = NULL;
2814     sprintf(_lv_names[i*2], "%s:coarse:%0*d", name, w, i);
2815   }
2816 }
2817 
2818 /*----------------------------------------------------------------------------
2819  * Sparse linear system resolution using multigrid.
2820  *
2821  * parameters:
2822  *   mg              <-- multigrid system
2823  *   lv_names        <-- names of linear systems
2824  *                       (indexed as mg->setup_data->sles_hierarchy)
2825  *   verbosity       <-- verbosity level
2826  *   cycle_id        <-- id of currect cycle
2827  *   n_equiv_iter    <-> equivalent number of iterations
2828  *   precision       <-- solver precision
2829  *   r_norm          <-- residue normalization
2830  *   initial_residue <-> initial residue
2831  *   residue         <-> residue
2832  *   rhs             <-- right hand side
2833  *   vx              --> system solution
2834  *   aux_size        <-- number of elements in aux_vectors (in bytes)
2835  *   aux_vectors     --- optional working area (allocation otherwise)
2836  *
2837  * returns:
2838  *   convergence status
2839  *----------------------------------------------------------------------------*/
2840 
2841 static cs_sles_convergence_state_t
_multigrid_v_cycle(cs_multigrid_t * mg,const char ** lv_names,int verbosity,int cycle_id,int * n_equiv_iter,double precision,double r_norm,double * initial_residue,double * residue,const cs_real_t * rhs,cs_real_t * vx,size_t aux_size,void * aux_vectors)2842 _multigrid_v_cycle(cs_multigrid_t       *mg,
2843                    const char          **lv_names,
2844                    int                   verbosity,
2845                    int                   cycle_id,
2846                    int                  *n_equiv_iter,
2847                    double                precision,
2848                    double                r_norm,
2849                    double               *initial_residue,
2850                    double               *residue,
2851                    const cs_real_t      *rhs,
2852                    cs_real_t            *vx,
2853                    size_t                aux_size,
2854                    void                 *aux_vectors)
2855 {
2856   int level, coarsest_level;
2857   cs_lnum_t ii;
2858   cs_timer_t t0, t1;
2859 
2860   cs_lnum_t db_size[4] = {1, 1, 1, 1};
2861   cs_lnum_t eb_size[4] = {1, 1, 1, 1};
2862   cs_sles_convergence_state_t cvg = CS_SLES_ITERATING, c_cvg = CS_SLES_ITERATING;
2863   int n_iter = 0;
2864   double _residue = -1.;
2865   double _initial_residue = 0.;
2866 
2867   size_t _aux_r_size = aux_size / sizeof(cs_real_t);
2868   cs_lnum_t n_rows = 0, n_cols_ext = 0;
2869   cs_lnum_t _n_rows = 0;
2870   cs_gnum_t n_g_rows = 0;
2871   cs_real_t r_norm_l = r_norm;
2872 
2873   double denom_n_g_rows_0 = 1.0;
2874 
2875   cs_multigrid_setup_data_t *mgd = mg->setup_data;
2876   cs_multigrid_level_info_t  *lv_info = NULL;
2877 
2878   cs_real_t *_aux_vectors = aux_vectors;
2879   cs_real_t *restrict wr = NULL;
2880   cs_real_t *restrict vx_lv = NULL;
2881 
2882   const cs_real_t *restrict rhs_lv = NULL;
2883   const cs_matrix_t  *_matrix = NULL;
2884   const cs_grid_t *f = NULL, *c= NULL;
2885 
2886   bool end_cycle = false;
2887 
2888   /* Initialization */
2889 
2890   coarsest_level = mgd->n_levels - 1;
2891 
2892   f = mgd->grid_hierarchy[0];
2893 
2894   cs_grid_get_info(f,
2895                    NULL,
2896                    NULL,
2897                    db_size,
2898                    eb_size,
2899                    NULL,
2900                    &n_rows,
2901                    &n_cols_ext,
2902                    NULL,
2903                    &n_g_rows);
2904 
2905   denom_n_g_rows_0 = 1.0 / n_g_rows;
2906 
2907   /* Allocate wr or use working area
2908      (note the finest grid could have less elements than a coarser
2909      grid to wich rank merging has been applied, hence the test below) */
2910 
2911   size_t wr_size = n_cols_ext*db_size[1];
2912   for (level = 1; level < (int)(mgd->n_levels); level++) {
2913     cs_lnum_t n_cols_max
2914       = cs_grid_get_n_cols_max(mgd->grid_hierarchy[level]);
2915     wr_size = CS_MAX(wr_size, (size_t)(n_cols_max*db_size[1]));
2916     wr_size = CS_SIMD_SIZE(wr_size);
2917   }
2918 
2919   if (_aux_r_size >= wr_size) {
2920     wr = aux_vectors;
2921     _aux_vectors = wr + wr_size;
2922     _aux_r_size -= wr_size;
2923   }
2924   else
2925     BFT_MALLOC(wr, wr_size, cs_real_t);
2926 
2927   /* map arrays for rhs and vx;
2928      for the finest level, simply point to input and output arrays */
2929 
2930   mgd->rhs_vx[0] = NULL; /* Use _rhs_level when necessary to avoid
2931                             const warning */
2932   mgd->rhs_vx[1] = vx;
2933 
2934   /* Descent */
2935   /*---------*/
2936 
2937   for (level = 0; level < coarsest_level; level++) {
2938 
2939     lv_info = mg->lv_info + level;
2940     t0 = cs_timer_time();
2941 
2942     rhs_lv = (level == 0) ? rhs : mgd->rhs_vx[level*2];
2943     vx_lv = mgd->rhs_vx[level*2 + 1];
2944 
2945     c = mgd->grid_hierarchy[level+1];
2946 
2947     /* Smoother pass */
2948 
2949     _matrix = cs_grid_get_matrix(f);
2950 
2951     cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[level*2]);
2952 
2953     c_cvg = mg_sles->solve_func(mg_sles->context,
2954                                 lv_names[level*2],
2955                                 _matrix,
2956                                 verbosity - 4, /* verbosity */
2957                                 precision*mg->info.precision_mult[0],
2958                                 r_norm_l,
2959                                 &n_iter,
2960                                 &_residue,
2961                                 rhs_lv,
2962                                 vx_lv,
2963                                 _aux_r_size*sizeof(cs_real_t),
2964                                 _aux_vectors);
2965 
2966     if (mg->plot_time_stamp > -1)
2967       mg->plot_time_stamp += n_iter+1;
2968 
2969     if (mg_sles->solve_func == cs_sles_it_solve)
2970       _initial_residue
2971         = cs_sles_it_get_last_initial_residue(mg_sles->context);
2972     else
2973       _initial_residue = HUGE_VAL;
2974 
2975     if (level == 0 && cycle_id == 1)
2976       *initial_residue = _initial_residue;
2977 
2978     if (verbosity > 1)
2979       _log_residual(mg, cycle_id, lv_names[level*2],
2980                     _matrix, rhs_lv, vx_lv);
2981 
2982     if (c_cvg < CS_SLES_BREAKDOWN) {
2983       end_cycle = true;
2984       break;
2985     }
2986 
2987     /* Restrict residue
2988        TODO: get residue from cs_sles_solve(). This optimisation would
2989        require adding an argument and exercising caution to ensure the
2990        correct sign and meaning of the residue
2991        (regarding timing, this stage is part of the descent smoother) */
2992 
2993     cs_matrix_vector_multiply(_matrix, vx_lv, wr);
2994 
2995     _n_rows = n_rows*db_size[1];
2996 #   pragma omp parallel for if(_n_rows > CS_THR_MIN)
2997     for (ii = 0; ii < _n_rows; ii++)
2998       wr[ii] = rhs_lv[ii] - wr[ii];
2999 
3000     /* Convergence test in beginning of cycle (fine mesh) */
3001 
3002     if (level == 0) {
3003 
3004       cvg = _convergence_test(mg,
3005                               lv_names[0],
3006                               n_rows*db_size[1],
3007                               mg->info.n_max_cycles,
3008                               cycle_id,
3009                               verbosity,
3010                               lv_info->n_it_ds_smoothe[3],
3011                               precision,
3012                               r_norm,
3013                               *initial_residue,
3014                               residue,
3015                               wr);
3016 
3017       /* If converged or cycle limit reached, break from descent loop */
3018 
3019       if (cvg != 0) {
3020         c_cvg = cvg;
3021         end_cycle = true;
3022         t1 = cs_timer_time();
3023         cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1);
3024         lv_info->n_calls[2] += 1;
3025         _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter);
3026         *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3027         break;
3028       }
3029 
3030     }
3031 
3032     t1 = cs_timer_time();
3033     cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1);
3034     lv_info->n_calls[2] += 1;
3035     _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter);
3036     *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3037 
3038     /* Prepare for next level */
3039 
3040     cs_grid_restrict_row_var(f, c, wr, mgd->rhs_vx[(level+1)*2]);
3041 
3042     cs_grid_get_info(c,
3043                      NULL,
3044                      NULL,
3045                      NULL,
3046                      NULL,
3047                      NULL,
3048                      &n_rows,
3049                      &n_cols_ext,
3050                      NULL,
3051                      &n_g_rows);
3052 
3053     f = c;
3054 
3055     /* Initialize correction */
3056 
3057     cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*2 + 1];
3058 
3059     _n_rows = n_rows*db_size[1];
3060 #   pragma omp parallel for if(n_rows > CS_THR_MIN)
3061     for (ii = 0; ii < _n_rows; ii++)
3062       vx_lv1[ii] = 0.0;
3063 
3064     t0 = cs_timer_time();
3065     cs_timer_counter_add_diff(&(lv_info->t_tot[4]), &t1, &t0);
3066     lv_info->n_calls[4] += 1;
3067 
3068   } /* End of loop on levels (descent) */
3069 
3070   if (end_cycle == false) {
3071 
3072     /* Resolve coarsest level to convergence */
3073     /*---------------------------------------*/
3074 
3075     assert(level == coarsest_level);
3076     assert(c == mgd->grid_hierarchy[coarsest_level]);
3077 
3078     /* coarsest level == 0 should never happen, but we play it safe */
3079     rhs_lv = (level == 0) ?  rhs : mgd->rhs_vx[coarsest_level*2];
3080     vx_lv = mgd->rhs_vx[level*2 + 1];
3081 
3082     _matrix = cs_grid_get_matrix(c);
3083 
3084     cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[level*2]);
3085 
3086     _initial_residue = _residue;
3087 
3088     lv_info = mg->lv_info + level;
3089 
3090     t0 = cs_timer_time();
3091 
3092     c_cvg = mg_sles->solve_func(mg_sles->context,
3093                                 lv_names[level*2],
3094                                 _matrix,
3095                                 verbosity - 3,
3096                                 precision*mg->info.precision_mult[2],
3097                                 r_norm_l,
3098                                 &n_iter,
3099                                 &_residue,
3100                                 rhs_lv,
3101                                 vx_lv,
3102                                 _aux_r_size*sizeof(cs_real_t),
3103                                 _aux_vectors);
3104 
3105     t1 = cs_timer_time();
3106     cs_timer_counter_add_diff(&(lv_info->t_tot[1]), &t0, &t1);
3107     lv_info->n_calls[1] += 1;
3108     _lv_info_update_stage_iter(lv_info->n_it_solve, n_iter);
3109 
3110     if (mg_sles->solve_func == cs_sles_it_solve)
3111       _initial_residue
3112         = cs_sles_it_get_last_initial_residue(mg_sles->context);
3113     else
3114       _initial_residue = HUGE_VAL;
3115 
3116     *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3117 
3118     if (verbosity > 1)
3119       _log_residual(mg, cycle_id, lv_names[level*2],
3120                     _matrix, rhs_lv, vx_lv);
3121 
3122     if (c_cvg < CS_SLES_BREAKDOWN)
3123       end_cycle = true;
3124 
3125   }
3126 
3127   if (end_cycle == false) {
3128 
3129     /* Ascent */
3130     /*--------*/
3131 
3132     for (level = coarsest_level - 1; level > -1; level--) {
3133 
3134       vx_lv = mgd->rhs_vx[level*2 + 1];
3135 
3136       lv_info = mg->lv_info + level;
3137 
3138       c = mgd->grid_hierarchy[level+1];
3139       f = mgd->grid_hierarchy[level];
3140 
3141       cs_grid_get_info(f,
3142                        NULL,
3143                        NULL,
3144                        NULL,
3145                        NULL,
3146                        NULL,
3147                        &n_rows,
3148                        &n_cols_ext,
3149                        NULL,
3150                        &n_g_rows);
3151 
3152       /* Prolong correction */
3153 
3154       t0 = cs_timer_time();
3155 
3156       cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*2 + 1];
3157       cs_grid_prolong_row_var(c, f, vx_lv1, wr);
3158 
3159       _n_rows = n_rows*db_size[1];
3160 #     pragma omp parallel for if(n_rows > CS_THR_MIN)
3161       for (ii = 0; ii < _n_rows; ii++)
3162         vx_lv[ii] += wr[ii];
3163 
3164       t1 = cs_timer_time();
3165       cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t0, &t1);
3166       lv_info->n_calls[5] += 1;
3167 
3168       /* Smoother pass if level > 0
3169          (smoother not called for finest mesh, as it will be called in
3170          descent phase of the next cycle, before the convergence test). */
3171 
3172       if (level > 0) {
3173 
3174         rhs_lv = mgd->rhs_vx[level*2];
3175 
3176         _matrix = cs_grid_get_matrix(f);
3177 
3178         cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[level*2 + 1]);
3179 
3180         c_cvg = mg_sles->solve_func(mg_sles->context,
3181                                     lv_names[level*2+1],
3182                                     _matrix,
3183                                     verbosity - 4, /* verbosity */
3184                                     precision*mg->info.precision_mult[1],
3185                                     r_norm_l,
3186                                     &n_iter,
3187                                     &_residue,
3188                                     rhs_lv,
3189                                     vx_lv,
3190                                     _aux_r_size*sizeof(cs_real_t),
3191                                     _aux_vectors);
3192 
3193         t0 = cs_timer_time();
3194         cs_timer_counter_add_diff(&(lv_info->t_tot[3]), &t1, &t0);
3195         lv_info->n_calls[3] += 1;
3196         _lv_info_update_stage_iter(lv_info->n_it_as_smoothe, n_iter);
3197 
3198         if (mg_sles->solve_func == cs_sles_it_solve)
3199           _initial_residue
3200             = cs_sles_it_get_last_initial_residue(mg_sles->context);
3201         else
3202           _initial_residue = HUGE_VAL;
3203 
3204         *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3205 
3206         if (verbosity > 1)
3207           _log_residual(mg, cycle_id, lv_names[level*2+1],
3208                         _matrix, rhs_lv, vx_lv);
3209 
3210         if (c_cvg < CS_SLES_BREAKDOWN)
3211           break;
3212       }
3213 
3214     } /* End loop on levels (ascent) */
3215 
3216   } /* End of tests on end_cycle */
3217 
3218   mgd->exit_level = level;
3219   mgd->exit_residue = _residue;
3220   if (level == 0)
3221     mgd->exit_initial_residue = *initial_residue;
3222   else
3223     mgd->exit_initial_residue = _initial_residue;
3224   mgd->exit_cycle_id = cycle_id;
3225 
3226   /* Free memory */
3227 
3228   if (wr != aux_vectors)
3229     BFT_FREE(wr);
3230 
3231   return cvg;
3232 }
3233 
3234 /*----------------------------------------------------------------------------
3235  * Apply a multigrid K-cycle on the current level.
3236  *
3237  * This version of the multigrid cycle is a K-cycle, described by Y. Notay in
3238  * "An aggregation-based algebraic multigrid method", Electronic Transactions on
3239  * Numerical Analysis, 2010, vol 37, pages 123-146.
3240  *
3241  * Used as a preconditioner, it is not constant (sometimes there is only one
3242  * visit of the coarse level, sometimes 2) and therefore, it has to be used with
3243  * IPCG (or FCG) instead of CG. Descent and ascent smoothers still have to be
3244  * symmetric for the IPCG to converge.
3245  *
3246  * parameters:
3247  *   mg              <-- multigrid system
3248  *   level           <-- level on which we apply the cycle
3249  *   lv_names        <-- names of linear systems
3250  *                       (indexed as mg->setup_data->sles_hierarchy)
3251  *   verbosity       <-- verbosity level
3252  *   cycle_id        <-- id of currect cycle
3253  *   n_equiv_iter    <-> equivalent number of iterations
3254  *   precision       <-- solver precision
3255  *   r_norm          <-- residue normalization
3256  *   initial_residue <-> initial residue
3257  *   residue         <-> residue
3258  *   rhs             <-- right hand side
3259  *   vx              --> system solution
3260  *   aux_size        <-- number of elements in aux_vectors (in bytes)
3261  *   aux_vectors     --- optional working area (allocation otherwise)
3262  *
3263  * returns:
3264  *   convergence status
3265  *----------------------------------------------------------------------------*/
3266 
3267 static cs_sles_convergence_state_t
_multigrid_k_cycle(cs_multigrid_t * mg,int level,const char ** lv_names,int verbosity,int cycle_id,int * n_equiv_iter,double precision,double r_norm,double * initial_residue,double * residue,const cs_real_t * rhs,cs_real_t * vx,size_t aux_size,void * aux_vectors)3268 _multigrid_k_cycle(cs_multigrid_t       *mg,
3269                    int                   level,
3270                    const char          **lv_names,
3271                    int                   verbosity,
3272                    int                   cycle_id,
3273                    int                  *n_equiv_iter,
3274                    double                precision,
3275                    double                r_norm,
3276                    double               *initial_residue,
3277                    double               *residue,
3278                    const cs_real_t      *rhs,
3279                    cs_real_t            *vx,
3280                    size_t                aux_size,
3281                    void                 *aux_vectors)
3282 {
3283   cs_timer_t t0, t1;
3284 
3285   bool end_cycle = false;
3286   cs_sles_convergence_state_t c_cvg = CS_SLES_ITERATING, cvg = CS_SLES_ITERATING;
3287   int n_iter = 0;
3288   double _residue = -1.;
3289   cs_real_t r_norm_l = r_norm;
3290 
3291   size_t _aux_r_size = aux_size / sizeof(cs_real_t);
3292   cs_lnum_t f_n_rows = 0, c_n_rows = 0;
3293   cs_lnum_t f_n_cols_ext = 0, c_n_cols_ext = 0;
3294   cs_gnum_t f_n_g_rows = 0, c_n_g_rows = 0;
3295 
3296   cs_multigrid_setup_data_t *mgd = mg->setup_data;
3297   int coarsest_level = mgd->n_levels-1;
3298 
3299   cs_lnum_t db_size[4] = {1, 1, 1, 1};
3300   cs_lnum_t eb_size[4] = {1, 1, 1, 1};
3301 
3302   /* Recursion threshold */
3303   const cs_real_t trsh = mg->k_cycle_threshold;
3304 
3305   /* RHS and unknowns arrays */
3306   cs_real_t *restrict vx_lv = NULL;
3307   const cs_real_t *restrict rhs_lv = NULL;
3308   const cs_matrix_t  *f_matrix = NULL, *c_matrix = NULL;
3309 
3310   /* Initialization of the computing tools */
3311 
3312   cs_grid_t *f = mgd->grid_hierarchy[level];
3313   cs_grid_t *c = mgd->grid_hierarchy[level+1];
3314 
3315   f_matrix = cs_grid_get_matrix(f);
3316   c_matrix = cs_grid_get_matrix(c);
3317 
3318   cs_grid_get_info(f, NULL, NULL, db_size, eb_size, NULL, &f_n_rows,
3319                    &f_n_cols_ext, NULL, &f_n_g_rows);
3320   cs_grid_get_info(c, NULL, NULL, db_size, eb_size, NULL, &c_n_rows,
3321                    &c_n_cols_ext, NULL, &c_n_g_rows);
3322 
3323   cs_lnum_t _f_n_rows = f_n_rows*db_size[1];
3324   cs_lnum_t _c_n_rows = c_n_rows*db_size[1];
3325 
3326   static double denom_n_g_rows_0 = 1;
3327   if (level == 0)
3328     denom_n_g_rows_0 = 1.0 / f_n_g_rows;
3329 
3330   rhs_lv = rhs;
3331   vx_lv = vx;
3332 
3333   /* Descent
3334      ------- */
3335 
3336   const int na = 10; /* number of array slots in mgd->rhs_vx */
3337 
3338   /* Arrays that are needed for both descent and ascent phases */
3339   cs_real_t *restrict rt_lv = mgd->rhs_vx[level*na];
3340   assert(rt_lv != NULL);
3341 
3342 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3343   for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3344     vx_lv[ii] = 0.0;
3345 
3346   cs_multigrid_level_info_t  *lv_info = mg->lv_info + level;
3347   t0 = cs_timer_time();
3348 
3349   /* Pre-smoothing */
3350 
3351   cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[level*2]);
3352 
3353   c_cvg = mg_sles->solve_func(mg_sles->context,
3354                               lv_names[level*2],
3355                               f_matrix,
3356                               0, /* verbosity */
3357                               precision*mg->info.precision_mult[0],
3358                               r_norm_l,
3359                               &n_iter,
3360                               &_residue,
3361                               rhs_lv,
3362                               vx_lv,
3363                               _aux_r_size*sizeof(cs_real_t),
3364                               aux_vectors);
3365 
3366   if (initial_residue != NULL && cycle_id == 1)
3367     *initial_residue = HUGE_VAL;
3368 
3369   t1 = cs_timer_time();
3370   cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1);
3371   lv_info->n_calls[2] += 1;
3372   _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter);
3373   *n_equiv_iter += n_iter * f_n_g_rows * denom_n_g_rows_0;
3374 
3375   if (verbosity > 0)
3376     _log_residual(mg, cycle_id, lv_names[level*2],
3377                   f_matrix, rhs_lv, vx_lv);
3378 
3379   /* Compute new residual */
3380   cs_matrix_vector_multiply(f_matrix, vx_lv, rt_lv);
3381 
3382 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3383   for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3384     rt_lv[ii] = rhs_lv[ii] - rt_lv[ii];
3385 
3386   /* Convergence test in beginning of cycle (fine mesh) */
3387 
3388   if ((level == 0 && mg->info.n_max_cycles > 1) || c_cvg < CS_SLES_BREAKDOWN) {
3389 
3390     if (c_cvg >= CS_SLES_BREAKDOWN)
3391       cvg = _convergence_test(mg,
3392                               lv_names[0],
3393                               f_n_rows*db_size[1],
3394                               mg->info.n_max_cycles,
3395                               cycle_id,
3396                               verbosity,
3397                               lv_info->n_it_ds_smoothe[3],
3398                               precision,
3399                               r_norm,
3400                               *initial_residue,
3401                               residue,
3402                               rt_lv);
3403     else
3404       cvg = c_cvg;
3405 
3406     /* If converged or cycle limit reached, break from descent loop */
3407 
3408     if (cvg != CS_SLES_ITERATING)
3409       end_cycle = true;
3410   }
3411 
3412   t0 = cs_timer_time();
3413   cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0);
3414   lv_info->n_calls[6] += 1; /* Single update for this cycle */
3415 
3416   if (end_cycle) {
3417     return cvg;
3418   }
3419 
3420   /* Arrays needed for the descent phase
3421      (mapped to preallocated mgd->rhs_vx to avoid extra allocations) */
3422 
3423   cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*na + 4];
3424   cs_real_t *restrict rhs_lv1 = mgd->rhs_vx[(level+1)*na + 5];
3425 
3426 # pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3427   for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3428     vx_lv1[i] = 0.0;
3429 
3430   /* Restriction of rt_lv into the next level rhs */
3431   cs_grid_restrict_row_var(f, c, rt_lv, rhs_lv1);
3432 
3433   t1 = cs_timer_time();
3434   cs_timer_counter_add_diff(&(lv_info->t_tot[4]), &t0, &t1);
3435   lv_info->n_calls[4] += 1;
3436 
3437   /* Approximate solution to the coarser problem
3438      ------------------------------------------- */
3439 
3440   /* If the next level is the coarsest one, then we solve it directly */
3441 
3442   if (level == coarsest_level-1) {
3443 
3444     lv_info = mg->lv_info + coarsest_level;
3445 
3446     mg_sles = &(mgd->sles_hierarchy[coarsest_level*2]);
3447 
3448     cvg = mg_sles->solve_func(mg_sles->context,
3449                               lv_names[coarsest_level*2],
3450                               c_matrix,
3451                               verbosity - 2,
3452                               precision*mg->info.precision_mult[2],
3453                               r_norm_l,
3454                               &n_iter,
3455                               residue,
3456                               rhs_lv1,
3457                               vx_lv1,
3458                               _aux_r_size*sizeof(cs_real_t),
3459                               aux_vectors);
3460 
3461     t0 = cs_timer_time();
3462     cs_timer_counter_add_diff(&(lv_info->t_tot[1]), &t1, &t0);
3463     lv_info->n_calls[1] += 1;
3464     _lv_info_update_stage_iter(lv_info->n_it_solve, n_iter);
3465 
3466     if (verbosity > 0)
3467       _log_residual(mg, cycle_id, lv_names[coarsest_level*2],
3468                     c_matrix, rhs_lv1, vx_lv1);
3469 
3470     *n_equiv_iter += n_iter * c_n_g_rows * denom_n_g_rows_0;
3471 
3472   }
3473 
3474   /* Otherwise, we apply the K-cycle recursively */
3475 
3476   else {
3477 
3478     c_cvg = _multigrid_k_cycle(mg,
3479                                level + 1,
3480                                lv_names,
3481                                verbosity,
3482                                cycle_id,
3483                                &n_iter,
3484                                precision,
3485                                r_norm,
3486                                NULL,
3487                                residue,
3488                                rhs_lv1,
3489                                vx_lv1,
3490                                aux_size,
3491                                aux_vectors);
3492 
3493     t1 = cs_timer_time();
3494 
3495     /* Arrays needed for the first Krylov iteration inside the cycle */
3496     cs_real_t *restrict v_lv1 = mgd->rhs_vx[(level+1)*na + 6];
3497     cs_real_t *restrict rt_lv1 = mgd->rhs_vx[(level+1)*na + 7];
3498 
3499     cs_matrix_vector_multiply(c_matrix, vx_lv1, v_lv1);
3500 
3501     /* Coefficients for the Krylov iteration */
3502 
3503     cs_real_t rho1 = 0, alpha1 = 0;
3504     _dot_xy_yz(mg, _c_n_rows, v_lv1, vx_lv1, rhs_lv1, &rho1, &alpha1);
3505 
3506     cs_real_t ar1 = alpha1 / rho1;
3507 
3508     /* New residual */
3509     for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3510       rt_lv1[i] = rhs_lv1[i] - ar1 * v_lv1[i];
3511 
3512     cs_real_t  rt_lv1_norm = 1.0, r_lv1_norm = 0.0;
3513 
3514     if (trsh > 0)
3515       _dot_xx_yy(mg, _c_n_rows, rt_lv1, rhs_lv1, &rt_lv1_norm, &r_lv1_norm);
3516 
3517     /* Free (unmap) arrays that were needed only for the descent phase */
3518     rhs_lv1 = NULL;
3519 
3520     /* Test for the second coarse resolution */
3521     if (rt_lv1_norm < trsh * trsh * r_lv1_norm) {
3522 #     pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3523       for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3524         vx_lv1[i] = ar1 * vx_lv1[i];
3525     }
3526     else {
3527 
3528       /* Arrays for the (optional) second Krylov iteration */
3529       cs_real_t *restrict vx2_lv1 = mgd->rhs_vx[(level+1)*na + 8];
3530       cs_real_t *restrict w_lv1 = mgd->rhs_vx[(level+1)*na + 9];
3531 
3532 #     pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3533       for (cs_lnum_t i = 0; i < _c_n_rows; i++) {
3534         vx2_lv1[i] = 0.0;
3535         w_lv1[i] = 0.0;
3536       }
3537 
3538       t0 = cs_timer_time();
3539       cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0);
3540 
3541       c_cvg = _multigrid_k_cycle(mg,
3542                                  level + 1,
3543                                  lv_names,
3544                                  verbosity,
3545                                  cycle_id,
3546                                  &n_iter,
3547                                  precision,
3548                                  r_norm,
3549                                  initial_residue,
3550                                  residue,
3551                                  rt_lv1,
3552                                  vx2_lv1,
3553                                  aux_size,
3554                                  aux_vectors);
3555 
3556       t1 = cs_timer_time();
3557 
3558       cs_matrix_vector_multiply(c_matrix, vx2_lv1, w_lv1);
3559 
3560       /* Krylov iteration */
3561 
3562       cs_real_t gamma = 0, beta = 0, alpha2 = 0;
3563 
3564       _dot_xu_xv_xw(mg, _c_n_rows,
3565                     vx2_lv1, v_lv1, w_lv1, rt_lv1,
3566                     &gamma, &beta, &alpha2);
3567 
3568       cs_real_t rho2 = beta - (gamma * gamma) / rho1;
3569       cs_real_t ar2 = alpha2 / rho2;
3570       cs_real_t ar1_ar2 = ar1 - (gamma / rho1) * ar2;
3571 
3572 #     pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3573       for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3574         vx_lv1[i] = ar1_ar2 * vx_lv1[i] + ar2 * vx2_lv1[i];
3575 
3576       vx2_lv1 = NULL;
3577       w_lv1 = NULL;
3578     }
3579 
3580     t0 = cs_timer_time();
3581     cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0);
3582 
3583     v_lv1 = NULL;
3584     rt_lv1 = NULL;
3585   }
3586 
3587   /* Ascent
3588      ------ */
3589 
3590   lv_info = mg->lv_info + level;
3591   t0 = cs_timer_time();
3592 
3593   /* Arrays that are needed for the ascent phase */
3594   cs_real_t *restrict rb_lv = mgd->rhs_vx[level*na + 1];
3595   cs_real_t *restrict z1_lv = mgd->rhs_vx[level*na + 2];
3596   cs_real_t *restrict z2_lv = mgd->rhs_vx[level*na + 3];
3597 
3598 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3599   for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3600     z2_lv[ii] = 0.0;
3601 
3602   /* Prolongation */
3603   cs_grid_prolong_row_var(c, f, vx_lv1, z1_lv);
3604 
3605   t1 = cs_timer_time();
3606   cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t0, &t1);
3607   lv_info->n_calls[5] += 1;
3608 
3609   /* New residual */
3610   cs_matrix_vector_multiply(f_matrix, z1_lv, rb_lv);
3611 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3612   for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3613     rb_lv[ii] = rt_lv[ii] - rb_lv[ii];
3614 
3615   t0 = cs_timer_time();
3616   cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t1, &t0);
3617   t1 = t0;
3618 
3619   /* Post-smoothing */
3620 
3621   mg_sles = &(mgd->sles_hierarchy[level*2+1]);
3622 
3623   cvg = mg_sles->solve_func(mg_sles->context,
3624                             lv_names[level*2+1],
3625                             f_matrix,
3626                             0, /* verbosity */
3627                             precision*mg->info.precision_mult[1],
3628                             r_norm_l,
3629                             &n_iter,
3630                             &_residue,
3631                             rb_lv,
3632                             z2_lv,
3633                             _aux_r_size*sizeof(cs_real_t),
3634                             aux_vectors);
3635 
3636   t0 = cs_timer_time();
3637   cs_timer_counter_add_diff(&(lv_info->t_tot[3]), &t1, &t0);
3638   lv_info->n_calls[3] += 1;
3639   _lv_info_update_stage_iter(lv_info->n_it_as_smoothe, n_iter);
3640   *n_equiv_iter += n_iter * f_n_g_rows * denom_n_g_rows_0;
3641 
3642   if (verbosity > 0)
3643     _log_residual(mg, cycle_id, lv_names[level*2 + 1],
3644                   c_matrix, rb_lv, z2_lv);
3645 
3646 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3647   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
3648     vx_lv[ii] += z1_lv[ii] + z2_lv[ii];
3649 
3650   t1 = cs_timer_time();
3651   cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t0, &t1);
3652 
3653   /* Free/unmap arrays of the ascent phase */
3654   rb_lv = NULL;
3655   z1_lv = NULL;
3656   z2_lv = NULL;
3657 
3658   /* Free/unmap arrays that were needed for both phases */
3659   rt_lv = NULL;
3660   vx_lv1 = NULL;
3661 
3662   return cvg;
3663 }
3664 
3665 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
3666 
3667 /*============================================================================
3668  * Public function definitions
3669  *============================================================================*/
3670 
3671 /*----------------------------------------------------------------------------*/
3672 /*!
3673  * \brief Initialize multigrid solver API.
3674  */
3675 /*----------------------------------------------------------------------------*/
3676 
3677 void
cs_multigrid_initialize(void)3678 cs_multigrid_initialize(void)
3679 {
3680 }
3681 
3682 /*----------------------------------------------------------------------------*/
3683 /*!
3684  * \brief Finalize multigrid solver API.
3685  */
3686 /*----------------------------------------------------------------------------*/
3687 
3688 void
cs_multigrid_finalize(void)3689 cs_multigrid_finalize(void)
3690 {
3691   cs_grid_finalize();
3692 }
3693 
3694 /*----------------------------------------------------------------------------*/
3695 /*!
3696  * \brief Define and associate a multigrid sparse linear system solver
3697  *        for a given field or equation name.
3698  *
3699  * If this system did not previously exist, it is added to the list of
3700  * "known" systems. Otherwise, its definition is replaced by the one
3701  * defined here.
3702  *
3703  * This is a utility function: if finer control is needed, see
3704  * \ref cs_sles_define and \ref cs_multigrid_create.
3705  *
3706  * Note that this function returns a pointer directly to the multigrid solver
3707  * management structure. This may be used to set further options, for
3708  * example calling \ref cs_multigrid_set_coarsening_options and
3709  * \ref cs_multigrid_set_solver_options.
3710  * If needed, \ref cs_sles_find may be used to obtain a pointer to the
3711  * matching \ref cs_sles_t container.
3712  *
3713  * \param[in]  f_id     associated field id, or < 0
3714  * \param[in]  name     associated name if f_id < 0, or NULL
3715  * \param[in]  mg_type  type of multigrid algorithm to use
3716  *
3717  * \return  pointer to new multigrid info and context
3718  */
3719 /*----------------------------------------------------------------------------*/
3720 
3721 cs_multigrid_t *
cs_multigrid_define(int f_id,const char * name,cs_multigrid_type_t mg_type)3722 cs_multigrid_define(int                   f_id,
3723                     const char           *name,
3724                     cs_multigrid_type_t   mg_type)
3725 {
3726   cs_multigrid_t *
3727     mg = cs_multigrid_create(mg_type);
3728 
3729   cs_sles_t *sc = cs_sles_define(f_id,
3730                                  name,
3731                                  mg,
3732                                  "cs_multigrid_t",
3733                                  cs_multigrid_setup,
3734                                  cs_multigrid_solve,
3735                                  cs_multigrid_free,
3736                                  cs_multigrid_log,
3737                                  cs_multigrid_copy,
3738                                  cs_multigrid_destroy);
3739 
3740   cs_sles_set_error_handler(sc,
3741                             cs_multigrid_error_post_and_abort);
3742 
3743   return mg;
3744 }
3745 
3746 /*----------------------------------------------------------------------------*/
3747 /*!
3748  * \brief Create multigrid linear system solver info and context.
3749  *
3750  * The multigrid variant is an ACM (Additive Corrective Multigrid) method.
3751  *
3752  * \param[in]  mg_type  type of multigrid algorithm to use
3753  *
3754  * \return  pointer to new multigrid info and context
3755  */
3756 /*----------------------------------------------------------------------------*/
3757 
3758 cs_multigrid_t *
cs_multigrid_create(cs_multigrid_type_t mg_type)3759 cs_multigrid_create(cs_multigrid_type_t  mg_type)
3760 {
3761   int ii;
3762   cs_multigrid_t *mg;
3763 
3764   /* Increment number of setups */
3765 
3766   BFT_MALLOC(mg, 1, cs_multigrid_t);
3767 
3768   mg->type = mg_type;
3769   mg->subtype = CS_MULTIGRID_MAIN;
3770 
3771 #if defined(HAVE_MPI)
3772   mg->comm = cs_glob_mpi_comm;
3773   mg->caller_comm = cs_glob_mpi_comm;
3774   mg->caller_n_ranks = cs_glob_n_ranks;
3775   if (mg->caller_n_ranks < 2) {
3776     mg->comm = MPI_COMM_NULL;
3777     mg->caller_comm = cs_glob_mpi_comm;
3778   }
3779   mg->merge_mean_threshold = 300;
3780   mg->merge_glob_threshold = 500;
3781   mg->merge_stride = 1;
3782 #endif
3783 
3784   mg->aggregation_limit = 3;
3785   mg->coarsening_type = CS_GRID_COARSENING_DEFAULT;
3786   mg->n_levels_max = 25;
3787   mg->n_g_rows_min = 30;
3788 
3789   mg->post_row_max = 0;
3790 
3791   mg->p0p1_relax = 0.;
3792   mg->k_cycle_threshold = 0;
3793 
3794   _multigrid_info_init(&(mg->info));
3795   for (int i = 0; i < 3; i++)
3796     mg->lv_mg[i] = NULL;
3797   mg->p_mg = NULL;
3798 
3799   if (mg->type == CS_MULTIGRID_V_CYCLE) {
3800     mg->p0p1_relax = 0.95;
3801   }
3802   else if (mg->type == CS_MULTIGRID_K_CYCLE) {
3803     mg->coarsening_type = CS_GRID_COARSENING_SPD_PW;
3804     mg->aggregation_limit = 4;
3805     mg->n_levels_max = 10;
3806     mg->n_g_rows_min = 256;
3807     mg->k_cycle_threshold = 0.25;
3808   }
3809   else if (mg->type == CS_MULTIGRID_K_CYCLE_HPC) {
3810     mg->coarsening_type = CS_GRID_COARSENING_SPD_PW;
3811     mg->aggregation_limit = 8;
3812     mg->n_levels_max = 4;
3813     mg->k_cycle_threshold = -1;
3814     mg->lv_mg[2] = _multigrid_create_k_cycle_bottom(mg);
3815   }
3816 
3817   mg->pc_precision = 0.0;
3818   mg->pc_r_norm = 0.0;
3819 
3820   mg->n_levels_post = 0;
3821 
3822   mg->setup_data = NULL;
3823 
3824   BFT_MALLOC(mg->lv_info, mg->n_levels_max, cs_multigrid_level_info_t);
3825 
3826   for (ii = 0; ii < mg->n_levels_max; ii++)
3827     _multigrid_level_info_init(mg->lv_info + ii);
3828 
3829   mg->post_row_num = NULL;
3830   mg->post_row_rank = NULL;
3831   mg->post_name = NULL;
3832 
3833   mg->cycle_plot = NULL;
3834   mg->plot_time_stamp = -1;
3835 
3836   if (mg_type == CS_MULTIGRID_V_CYCLE)
3837     cs_multigrid_set_solver_options
3838       (mg,
3839        CS_SLES_PCG, CS_SLES_PCG, CS_SLES_PCG,
3840        100, /* n max cycles */
3841        2,   /* n max iter for descent */
3842        10,  /* n max iter for ascent */
3843        500,
3844        0, 0, 0,  /* precond degree */
3845        1, 1, 1); /* precision multiplier */
3846 
3847   else if (   mg->type >= CS_MULTIGRID_K_CYCLE
3848            && mg->type <= CS_MULTIGRID_K_CYCLE_HPC)
3849     cs_multigrid_set_solver_options
3850       (mg,
3851        CS_SLES_P_SYM_GAUSS_SEIDEL,
3852        CS_SLES_P_SYM_GAUSS_SEIDEL,
3853        CS_SLES_PCG,
3854        100,  /* n max cycles */
3855        1,    /* n max iter for descent */
3856        1,    /* n max iter for ascent */
3857        50,   /* n max iter for coarse solve */
3858        -1, -1, 0,   /* precond degree */
3859        -1, -1, 1);  /* precision multiplier */
3860 
3861   return mg;
3862 }
3863 
3864 /*----------------------------------------------------------------------------*/
3865 /*!
3866  * \brief Destroy multigrid linear system solver info and context.
3867  *
3868  * \param[in, out]  context  pointer to multigrid linear solver info
3869  *                           (actual type: cs_multigrid_t  **)
3870  */
3871 /*----------------------------------------------------------------------------*/
3872 
3873 void
cs_multigrid_destroy(void ** context)3874 cs_multigrid_destroy(void  **context)
3875 {
3876   cs_multigrid_t *mg = (cs_multigrid_t *)(*context);
3877 
3878   if (mg == NULL)
3879     return;
3880 
3881   BFT_FREE(mg->lv_info);
3882 
3883   if (mg->post_row_num != NULL) {
3884     int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
3885     for (int i = 0; i < n_max_post_levels; i++)
3886       if (mg->post_row_num[i] != NULL)
3887         BFT_FREE(mg->post_row_num[i]);
3888     BFT_FREE(mg->post_row_num);
3889   }
3890 
3891   if (mg->post_row_rank != NULL) {
3892     int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
3893     for (int i = 0; i < n_max_post_levels; i++)
3894       if (mg->post_row_rank[i] != NULL)
3895         BFT_FREE(mg->post_row_rank[i]);
3896     BFT_FREE(mg->post_row_rank);
3897   }
3898 
3899   BFT_FREE(mg->post_name);
3900 
3901   if (mg->cycle_plot != NULL)
3902     cs_time_plot_finalize(&(mg->cycle_plot));
3903 
3904   for (int i = 0; i < 3; i++) {
3905     if (mg->lv_mg[i] != NULL)
3906       cs_multigrid_destroy((void **)(&(mg->lv_mg[i])));
3907   }
3908 
3909   BFT_FREE(mg);
3910   *context = (void *)mg;
3911 }
3912 
3913 /*----------------------------------------------------------------------------*/
3914 /*!
3915  * \brief Create multigrid sparse linear system solver info and context
3916  *        based on existing info and context.
3917  *
3918  * \param[in]  context  pointer to reference info and context
3919  *                      (actual type: cs_multigrid_t  *)
3920  *
3921  * \return  pointer to newly created solver info object
3922  *          (actual type: cs_multigrid_t  *)
3923  */
3924 /*----------------------------------------------------------------------------*/
3925 
3926 void *
cs_multigrid_copy(const void * context)3927 cs_multigrid_copy(const void  *context)
3928 {
3929   cs_multigrid_t *d = NULL;
3930 
3931   if (context != NULL) {
3932     const cs_multigrid_t *c = context;
3933     d = cs_multigrid_create(c->type);
3934     /* Beginning of cs_multigrid_info_t contains settings, the rest logging */
3935     memcpy(&(d->info), &(c->info),
3936            offsetof(cs_multigrid_info_t, n_calls));
3937     /* Same here: settings at beginningof structure */
3938     memcpy(d, c, offsetof(cs_multigrid_t, n_levels_post));
3939   }
3940 
3941   return d;
3942 }
3943 
3944 /*----------------------------------------------------------------------------*/
3945 /*!
3946  * \brief Log multigrid solver info.
3947  *
3948  * \param[in]  context   pointer to iterative solver info and context
3949  *                       (actual type: cs_multigrid_t  *)
3950  * \param[in]  log_type  log type
3951  */
3952 /*----------------------------------------------------------------------------*/
3953 
3954 void
cs_multigrid_log(const void * context,cs_log_t log_type)3955 cs_multigrid_log(const void  *context,
3956                  cs_log_t     log_type)
3957 {
3958   const cs_multigrid_t  *mg = context;
3959 
3960   if (log_type == CS_LOG_SETUP)
3961     _multigrid_setup_log(mg);
3962 
3963   else if (log_type == CS_LOG_PERFORMANCE)
3964     _multigrid_performance_log(mg);
3965 }
3966 
3967 /*----------------------------------------------------------------------------*/
3968 /*!
3969  * \brief Set multigrid coarsening parameters.
3970  *
3971  * \param[in, out]  mg                 pointer to multigrid info and context
3972  * \param[in]       aggregation_limit  maximum allowed fine rows
3973  *                                     per coarse row
3974  * \param[in]       coarsening_type    coarsening type;
3975 *                                      see \ref cs_grid_coarsening_t
3976  * \param[in]       n_max_levels       maximum number of grid levels
3977  * \param[in]       min_g_rows         global number of rows on coarse grids
3978  *                                     under which no coarsening occurs
3979  * \param[in]       p0p1_relax         p0/p1 relaxation_parameter
3980  * \param[in]       postprocess        if > 0, postprocess coarsening
3981  *                                     (uses coarse row numbers
3982  *                                      modulo this value)
3983  */
3984 /*----------------------------------------------------------------------------*/
3985 
3986 void
cs_multigrid_set_coarsening_options(cs_multigrid_t * mg,int aggregation_limit,int coarsening_type,int n_max_levels,cs_gnum_t min_g_rows,double p0p1_relax,int postprocess)3987 cs_multigrid_set_coarsening_options(cs_multigrid_t  *mg,
3988                                     int              aggregation_limit,
3989                                     int              coarsening_type,
3990                                     int              n_max_levels,
3991                                     cs_gnum_t        min_g_rows,
3992                                     double           p0p1_relax,
3993                                     int              postprocess)
3994 {
3995   if (mg == NULL)
3996     return;
3997 
3998   mg->aggregation_limit = aggregation_limit;
3999   mg->coarsening_type = coarsening_type;
4000   mg->n_levels_max = n_max_levels;
4001   mg->n_g_rows_min = min_g_rows;
4002 
4003   mg->post_row_max = postprocess;
4004 
4005   mg->p0p1_relax = p0p1_relax;
4006 }
4007 
4008 /*----------------------------------------------------------------------------*/
4009 /*!
4010  * \brief Set multigrid parameters for associated iterative solvers.
4011  *
4012  * \param[in, out]  mg                      pointer to multigrid info
4013  *                                          and context
4014  * \param[in]       descent_smoother_type   type of smoother for descent
4015  * \param[in]       ascent_smoother_type    type of smoother for ascent
4016  * \param[in]       coarse_solver_type      type of solver for coarsest grid
4017  * \param[in]       n_max_cycles            maximum number of cycles
4018  * \param[in]       n_max_iter_descent      maximum iterations
4019  *                                          per descent smoothing
4020  * \param[in]       n_max_iter_ascent       maximum iterations
4021  *                                          per ascent smoothing
4022  * \param[in]       n_max_iter_coarse       maximum iterations
4023  *                                          per coarsest solution
4024  * \param[in]       poly_degree_descent     preconditioning polynomial degree
4025  *                                          for descent phases (0: diagonal)
4026  * \param[in]       poly_degree_ascent      preconditioning polynomial degree
4027  *                                          for ascent phases (0: diagonal)
4028  * \param[in]       poly_degree_coarse      preconditioning polynomial degree
4029  *                                          for coarse solver (0: diagonal)
4030  * \param[in]       precision_mult_descent  precision multiplier
4031  *                                          for descent smoothers (levels >= 1)
4032  * \param[in]       precision_mult_ascent   precision multiplier
4033  *                                          for ascent smoothers
4034  * \param[in]       precision_mult_coarse   precision multiplier
4035  *                                          for coarsest grid
4036  */
4037 /*----------------------------------------------------------------------------*/
4038 
4039 void
cs_multigrid_set_solver_options(cs_multigrid_t * mg,cs_sles_it_type_t descent_smoother_type,cs_sles_it_type_t ascent_smoother_type,cs_sles_it_type_t coarse_solver_type,int n_max_cycles,int n_max_iter_descent,int n_max_iter_ascent,int n_max_iter_coarse,int poly_degree_descent,int poly_degree_ascent,int poly_degree_coarse,double precision_mult_descent,double precision_mult_ascent,double precision_mult_coarse)4040 cs_multigrid_set_solver_options(cs_multigrid_t     *mg,
4041                                 cs_sles_it_type_t   descent_smoother_type,
4042                                 cs_sles_it_type_t   ascent_smoother_type,
4043                                 cs_sles_it_type_t   coarse_solver_type,
4044                                 int                 n_max_cycles,
4045                                 int                 n_max_iter_descent,
4046                                 int                 n_max_iter_ascent,
4047                                 int                 n_max_iter_coarse,
4048                                 int                 poly_degree_descent,
4049                                 int                 poly_degree_ascent,
4050                                 int                 poly_degree_coarse,
4051                                 double              precision_mult_descent,
4052                                 double              precision_mult_ascent,
4053                                 double              precision_mult_coarse)
4054 {
4055   if (mg == NULL)
4056     return;
4057 
4058   cs_multigrid_info_t  *info = &(mg->info);
4059 
4060   info->type[0] = descent_smoother_type;
4061   info->type[1] = ascent_smoother_type;
4062   info->type[2] = coarse_solver_type;
4063 
4064   info->n_max_cycles = n_max_cycles;
4065 
4066   info->n_max_iter[0] = n_max_iter_descent;
4067   info->n_max_iter[1] = n_max_iter_ascent;
4068   info->n_max_iter[2] = n_max_iter_coarse;
4069 
4070   info->poly_degree[0] = poly_degree_descent;
4071   info->poly_degree[1] = poly_degree_ascent;
4072   info->poly_degree[2] = poly_degree_coarse;
4073 
4074   info->precision_mult[0] = precision_mult_descent;
4075   info->precision_mult[1] = precision_mult_ascent;
4076   info->precision_mult[2] = precision_mult_coarse;
4077 
4078   for (int i = 0; i < 3; i++) {
4079     switch(info->type[i]) {
4080     case CS_SLES_JACOBI:
4081     case CS_SLES_P_GAUSS_SEIDEL:
4082     case CS_SLES_P_SYM_GAUSS_SEIDEL:
4083       info->poly_degree[i] = -1;
4084       break;
4085     default:
4086       break;
4087     }
4088   }
4089 }
4090 
4091 /*----------------------------------------------------------------------------*/
4092 /*!
4093  * \brief Return solver type used on fine mesh.
4094  *
4095  * \param[in]  mg  pointer to multigrid info and context
4096  *
4097  * \return   type of smoother for descent (used for fine mesh)
4098  */
4099 /*----------------------------------------------------------------------------*/
4100 
4101 cs_sles_it_type_t
cs_multigrid_get_fine_solver_type(const cs_multigrid_t * mg)4102 cs_multigrid_get_fine_solver_type(const cs_multigrid_t  *mg)
4103 {
4104   if (mg == NULL)
4105     return CS_SLES_N_IT_TYPES;
4106 
4107   const cs_multigrid_info_t  *info = &(mg->info);
4108 
4109   return info->type[0];
4110 }
4111 
4112 /*----------------------------------------------------------------------------*/
4113 /*!
4114  * \brief Setup multigrid sparse linear equation solver.
4115  *
4116  * \param[in, out]  context    pointer to multigrid solver info and context
4117  *                             (actual type: cs_multigrid_t  *)
4118  * \param[in]       name       pointer to name of linear system
4119  * \param[in]       a          associated matrix
4120  * \param[in]       verbosity  associated verbosity
4121  */
4122 /*----------------------------------------------------------------------------*/
4123 
4124 void
cs_multigrid_setup(void * context,const char * name,const cs_matrix_t * a,int verbosity)4125 cs_multigrid_setup(void               *context,
4126                    const char         *name,
4127                    const cs_matrix_t  *a,
4128                    int                 verbosity)
4129 {
4130   cs_multigrid_setup_conv_diff(context,
4131                                name,
4132                                a,
4133                                NULL,
4134                                NULL,
4135                                verbosity);
4136 }
4137 
4138 /*----------------------------------------------------------------------------*/
4139 /*!
4140  * \brief Setup multigrid sparse linear equation solver.
4141  *
4142  * \param[in, out]  context    pointer to multigrid solver info and context
4143  *                             (actual type: cs_multigrid_t  *)
4144  * \param[in]       name       pointer to name of linear system
4145  * \param[in]       a          associated matrix
4146  * \param[in]       a_conv     associated matrix (convection)
4147  * \param[in]       a_diff     associated matrix (diffusion)
4148  * \param[in]       verbosity  associated verbosity
4149  */
4150 /*----------------------------------------------------------------------------*/
4151 
4152 void
cs_multigrid_setup_conv_diff(void * context,const char * name,const cs_matrix_t * a,const cs_matrix_t * a_conv,const cs_matrix_t * a_diff,int verbosity)4153 cs_multigrid_setup_conv_diff(void               *context,
4154                              const char         *name,
4155                              const cs_matrix_t  *a,
4156                              const cs_matrix_t  *a_conv,
4157                              const cs_matrix_t  *a_diff,
4158                              int                 verbosity)
4159 
4160 {
4161   cs_multigrid_t  *mg = context;
4162 
4163   const cs_mesh_t  *mesh = cs_glob_mesh;
4164   const cs_mesh_quantities_t  *mq = cs_glob_mesh_quantities;
4165 
4166   /* Destroy previous hierarchy if necessary */
4167 
4168   if (mg->setup_data != NULL)
4169     cs_multigrid_free(mg);
4170 
4171   /* Initialization */
4172 
4173   cs_timer_t t0 = cs_timer_time();
4174 
4175   if (verbosity > 1) {
4176     if (!(mg->info.is_pc)) {
4177       bft_printf(_("\n Setup of solver for linear system \"%s\"\n"),
4178                  name);
4179       cs_matrix_log_info(a, verbosity);
4180     }
4181     bft_printf(_("\n Construction of grid hierarchy for \"%s\"\n"),
4182                name);
4183   }
4184 
4185   /* Build coarse grids hierarchy */
4186   /*------------------------------*/
4187 
4188   const cs_lnum_t *diag_block_size = cs_matrix_get_diag_block_size(a);
4189   const cs_lnum_t *extra_diag_block_size
4190     = cs_matrix_get_extra_diag_block_size(a);
4191 
4192   cs_grid_t *f
4193     = cs_grid_create_from_shared(mesh->n_i_faces,
4194                                  diag_block_size,
4195                                  extra_diag_block_size,
4196                                  (const cs_lnum_2_t *)(mesh->i_face_cells),
4197                                  mq->cell_cen,
4198                                  mq->cell_vol,
4199                                  mq->i_face_normal,
4200                                  a,
4201                                  a_conv,
4202                                  a_diff);
4203 
4204   cs_multigrid_level_info_t *mg_lv_info = mg->lv_info;
4205 
4206   cs_timer_t t1 = cs_timer_time();
4207   cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
4208 
4209   _setup_hierarchy(mg, name, mesh, f, verbosity); /* Assign to and build
4210                                                      hierarchy */
4211 
4212   /* Update timers */
4213 
4214   t1 = cs_timer_time();
4215   cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1);
4216 }
4217 
4218 /*----------------------------------------------------------------------------*/
4219 /*!
4220  * \brief Call multigrid sparse linear equation solver.
4221  *
4222  * \param[in, out]  context        pointer to multigrid solver info and context
4223  *                                 (actual type: cs_multigrid_t  *)
4224  * \param[in]       name           pointer to name of linear system
4225  * \param[in]       a              matrix
4226  * \param[in]       verbosity      associated verbosity
4227  * \param[in]       precision      solver precision
4228  * \param[in]       r_norm         residue normalization
4229  * \param[out]      n_iter         number of "equivalent" iterations
4230  * \param[out]      residue        residue
4231  * \param[in]       rhs            right hand side
4232  * \param[in, out]  vx             system solution
4233  * \param[in]       aux_size       size of aux_vectors (in bytes)
4234  * \param           aux_vectors    optional working area
4235  *                                 (internal allocation if NULL)
4236  *
4237  * \return  convergence state
4238  */
4239 /*----------------------------------------------------------------------------*/
4240 
4241 cs_sles_convergence_state_t
cs_multigrid_solve(void * context,const char * name,const cs_matrix_t * a,int verbosity,double precision,double r_norm,int * n_iter,double * residue,const cs_real_t * rhs,cs_real_t * vx,size_t aux_size,void * aux_vectors)4242 cs_multigrid_solve(void                *context,
4243                    const char          *name,
4244                    const cs_matrix_t   *a,
4245                    int                  verbosity,
4246                    double               precision,
4247                    double               r_norm,
4248                    int                 *n_iter,
4249                    double              *residue,
4250                    const cs_real_t     *rhs,
4251                    cs_real_t           *vx,
4252                    size_t               aux_size,
4253                    void                *aux_vectors)
4254 {
4255   cs_timer_t t0, t1;
4256   t0 = cs_timer_time();
4257 
4258   cs_sles_convergence_state_t cvg = CS_SLES_ITERATING;
4259 
4260   cs_multigrid_t *mg = context;
4261   cs_multigrid_info_t *mg_info = &(mg->info);
4262 
4263   const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(a);
4264 
4265   assert(db_size[0] == db_size[1]);
4266 
4267   cs_lnum_t n_rows = cs_matrix_get_n_rows(a);
4268 
4269   /* Initialize number of equivalent iterations and residue,
4270      check for immediate return,
4271      solve sparse linear system using multigrid algorithm. */
4272 
4273   *n_iter = 0;
4274   unsigned n_cycles = 0;
4275 
4276   if (mg->setup_data == NULL) {
4277     /* Stop solve timer to switch to setup timer */
4278     t1 = cs_timer_time();
4279     cs_timer_counter_add_diff(&(mg->info.t_tot[1]), &t0, &t1);
4280 
4281     /* Setup grid hierarchy */
4282     cs_multigrid_setup(context, name, a, verbosity);
4283 
4284     /* Restart solve timer */
4285     t0 = cs_timer_time();
4286   }
4287 
4288   if (mg_info->is_pc == false && verbosity > 1)
4289     cs_log_printf(CS_LOG_DEFAULT,
4290                   _(" RHS norm:          %11.4e\n\n"), r_norm);
4291 
4292   /* Buffer size sufficient to avoid local reallocation for most solvers */
4293   size_t  lv_names_size = _level_names_size(name, mg->setup_data->n_levels);
4294   size_t  _aux_size =   lv_names_size
4295                       + n_rows * 6 * db_size[1] * sizeof(cs_real_t);
4296   unsigned char *_aux_buf = aux_vectors;
4297 
4298   if (_aux_size > aux_size)
4299     BFT_MALLOC(_aux_buf, _aux_size, unsigned char);
4300   else
4301     _aux_size = aux_size;
4302 
4303   _level_names_init(name, mg->setup_data->n_levels, _aux_buf);
4304   const char **lv_names = (const char **)_aux_buf;
4305 
4306   if (verbosity == 2) /* More detailed headers later if > 2 */
4307     bft_printf(_("Multigrid [%s]:\n"), name);
4308 
4309   /* Initial residue should be improved, but this is consistent
4310      with the legacy (by increment) case */
4311 
4312   double initial_residue = -1;
4313 
4314   *residue = initial_residue; /* not known yet, so be safe */
4315 
4316   /* Cycle to solution */
4317 
4318   if (mg->type == CS_MULTIGRID_V_CYCLE) {
4319     for (n_cycles = 0; cvg == CS_SLES_ITERATING; n_cycles++) {
4320       int cycle_id = n_cycles+1;
4321       cvg = _multigrid_v_cycle(mg,
4322                                lv_names,
4323                                verbosity,
4324                                cycle_id,
4325                                n_iter,
4326                                precision,
4327                                r_norm,
4328                                &initial_residue,
4329                                residue,
4330                                rhs,
4331                                vx,
4332                                _aux_size - lv_names_size,
4333                                _aux_buf + lv_names_size);
4334     }
4335   }
4336   else if (   mg->type >= CS_MULTIGRID_K_CYCLE
4337            && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) {
4338     for (n_cycles = 0;
4339          n_cycles < (unsigned)mg->info.n_max_cycles && cvg == CS_SLES_ITERATING;
4340          n_cycles++) {
4341       int cycle_id = n_cycles+1;
4342       cvg = _multigrid_k_cycle(mg,
4343                                0,
4344                                lv_names,
4345                                verbosity,
4346                                cycle_id,
4347                                n_iter,
4348                                precision,
4349                                r_norm,
4350                                &initial_residue,
4351                                residue,
4352                                rhs,
4353                                vx,
4354                                _aux_size - lv_names_size,
4355                                _aux_buf + lv_names_size);
4356     }
4357   }
4358 
4359   if (_aux_buf != aux_vectors)
4360     BFT_FREE(_aux_buf);
4361 
4362   if (verbosity > 0)
4363     cs_log_printf(CS_LOG_DEFAULT, "\n");
4364 
4365   /* Update statistics */
4366 
4367   t1 = cs_timer_time();
4368 
4369   /* Update stats on number of iterations (last, min, max, total) */
4370 
4371   mg_info->n_cycles[2] += n_cycles;
4372 
4373   if (mg_info->n_calls[1] > 0) {
4374     if (mg_info->n_cycles[0] > n_cycles)
4375       mg_info->n_cycles[0] = n_cycles;
4376     if (mg_info->n_cycles[1] < n_cycles)
4377       mg_info->n_cycles[1] = n_cycles;
4378   }
4379   else {
4380     mg_info->n_cycles[0] = n_cycles;
4381     mg_info->n_cycles[1] = n_cycles;
4382   }
4383 
4384   /* Update number of resolutions and timing data */
4385 
4386   mg_info->n_calls[1] += 1;
4387   cs_timer_counter_add_diff(&(mg->info.t_tot[1]), &t0, &t1);
4388 
4389   return cvg;
4390 }
4391 
4392 /*----------------------------------------------------------------------------*/
4393 /*!
4394  * \brief Free multigrid sparse linear equation solver setup context.
4395  *
4396  * This function frees resolution-related data, incuding the current
4397  * grid hierarchy, but does not free the whole context,
4398  * as info used for logging (especially performance data) is maintained.
4399  *
4400  * \param[in, out]  context  pointer to multigrid solver info and context
4401  *                           (actual type: cs_multigrid_t  *)
4402  */
4403 /*----------------------------------------------------------------------------*/
4404 
4405 void
cs_multigrid_free(void * context)4406 cs_multigrid_free(void  *context)
4407 {
4408   cs_multigrid_t *mg = context;
4409 
4410   cs_timer_t t0, t1;
4411 
4412   /* Initialization */
4413 
4414   t0 = cs_timer_time();
4415 
4416   /* Free subgrid info first if needed */
4417 
4418   for (int i = 0; i < 3; i++) {
4419     if (mg->lv_mg[i] != NULL)
4420       cs_multigrid_free(mg->lv_mg[i]);
4421   }
4422 
4423   if (mg->setup_data != NULL) {
4424 
4425     cs_multigrid_setup_data_t *mgd = mg->setup_data;
4426 
4427     /* Free coarse solution data */
4428 
4429     BFT_FREE(mgd->rhs_vx);
4430     BFT_FREE(mgd->rhs_vx_buf);
4431 
4432     /* Destroy solver hierarchy */
4433 
4434     for (int i = mgd->n_levels - 1; i > -1; i--) {
4435       for (int j = 0; j < 2; j++) {
4436         cs_mg_sles_t  *mg_sles = &(mgd->sles_hierarchy[i*2+j]);
4437         if (mg_sles->context != NULL && mg_sles->destroy_func)
4438           mg_sles->destroy_func(&(mg_sles->context));
4439       }
4440     }
4441     BFT_FREE(mgd->sles_hierarchy);
4442 
4443     /* Destroy grid hierarchy */
4444 
4445     for (int i = mgd->n_levels - 1; i > -1; i--)
4446       cs_grid_destroy(mgd->grid_hierarchy + i);
4447     BFT_FREE(mgd->grid_hierarchy);
4448 
4449     /* Destroy peconditioning-only arrays */
4450 
4451     BFT_FREE(mgd->pc_name);
4452     BFT_FREE(mgd->pc_aux);
4453 
4454     BFT_FREE(mg->setup_data);
4455   }
4456 
4457   /* Update timers */
4458 
4459   t1 = cs_timer_time();
4460   cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1);
4461 }
4462 
4463 /*----------------------------------------------------------------------------*/
4464 /*!
4465  * \brief Create a multigrid preconditioner.
4466  *
4467  * \param[in]  mg_type  type of multigrid algorithm to use
4468  *
4469  * \return  pointer to newly created preconditioner object.
4470  */
4471 /*----------------------------------------------------------------------------*/
4472 
4473 cs_sles_pc_t *
cs_multigrid_pc_create(cs_multigrid_type_t mg_type)4474 cs_multigrid_pc_create(cs_multigrid_type_t  mg_type)
4475 {
4476   cs_multigrid_t *mg = _multigrid_pc_create(mg_type);
4477 
4478   mg->info.is_pc = true;
4479 
4480   cs_sles_pc_t *pc = cs_sles_pc_define(mg,
4481                                        _multigrid_pc_get_type,
4482                                        _multigrid_pc_setup,
4483                                        _multigrid_pc_tolerance_t,
4484                                        _multigrid_pc_apply,
4485                                        cs_multigrid_free,
4486                                        cs_multigrid_log,
4487                                        cs_multigrid_copy,
4488                                        cs_multigrid_destroy);
4489 
4490   return pc;
4491 }
4492 
4493 /*----------------------------------------------------------------------------*/
4494 /*!
4495  * \brief Error handler for multigrid sparse linear equation solver.
4496  *
4497  * In case of divergence or breakdown, this error handler outputs
4498  * postprocessing data to assist debugging, then aborts the run.
4499  * It does nothing in case the maximum iteration count is reached.
4500 
4501  * \param[in, out]  sles           pointer to solver object
4502  * \param[in]       state          convergence state
4503  * \param[in]       a              matrix
4504  * \param[in]       rhs            right hand side
4505  * \param[in, out]  vx             system solution
4506  *
4507  * \return  false (do not attempt new solve)
4508  */
4509 /*----------------------------------------------------------------------------*/
4510 
4511 bool
cs_multigrid_error_post_and_abort(cs_sles_t * sles,cs_sles_convergence_state_t state,const cs_matrix_t * a,const cs_real_t rhs[],cs_real_t vx[])4512 cs_multigrid_error_post_and_abort(cs_sles_t                    *sles,
4513                                   cs_sles_convergence_state_t   state,
4514                                   const cs_matrix_t            *a,
4515                                   const cs_real_t               rhs[],
4516                                   cs_real_t                     vx[])
4517 {
4518   if (state >= CS_SLES_MAX_ITERATION)
4519     return false;
4520 
4521   const cs_multigrid_t  *mg = cs_sles_get_context(sles);
4522   const char *name = cs_sles_get_name(sles);
4523 
4524   cs_multigrid_setup_data_t *mgd = mg->setup_data;
4525   if (mgd == NULL)
4526     return false;
4527 
4528   int level = mgd->exit_level;
4529 
4530   int mesh_id = cs_post_init_error_writer_cells();
4531   int location_id = CS_MESH_LOCATION_CELLS;
4532 
4533   const cs_range_set_t  *rs = NULL;
4534 
4535   if (mesh_id != 0) {
4536 
4537     char var_name[32];
4538 
4539     int lv_id = 0;
4540     cs_real_t *var = NULL, *da = NULL;
4541 
4542     cs_lnum_t db_size[4] = {1, 1, 1, 1};
4543     cs_lnum_t eb_size[4] = {1, 1, 1, 1};
4544 
4545     const cs_grid_t *g = mgd->grid_hierarchy[0];
4546     const cs_lnum_t n_base_rows = cs_grid_get_n_rows(g);
4547     const cs_matrix_t  *_matrix = NULL;
4548 
4549     BFT_MALLOC(var, cs_grid_get_n_cols_ext(g), cs_real_t);
4550     BFT_MALLOC(da, cs_grid_get_n_cols_ext(g), cs_real_t);
4551 
4552     /* Output info on main level */
4553 
4554     cs_sles_post_error_output_def(name,
4555                                   mesh_id,
4556                                   a,
4557                                   rhs,
4558                                   vx);
4559 
4560     /* Output diagonal and diagonal dominance for all coarse levels */
4561 
4562     for (lv_id = 1; lv_id < (int)(mgd->n_levels); lv_id++) {
4563 
4564       g = mgd->grid_hierarchy[lv_id];
4565 
4566       cs_grid_get_info(g,
4567                        NULL,
4568                        NULL,
4569                        db_size,
4570                        eb_size,
4571                        NULL,
4572                        NULL,
4573                        NULL,
4574                        NULL,
4575                        NULL);
4576 
4577       _matrix = cs_grid_get_matrix(g);
4578 
4579       cs_matrix_copy_diagonal(_matrix, da);
4580       cs_grid_project_var(g, n_base_rows, da, var);
4581       cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4582       sprintf(var_name, "Diag_%04d", lv_id);
4583       cs_sles_post_output_var(var_name,
4584                               mesh_id,
4585                               location_id,
4586                               CS_POST_WRITER_ERRORS,
4587                               db_size[1],
4588                               var);
4589 
4590       cs_grid_project_diag_dom(g, n_base_rows, var);
4591       cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4592       sprintf(var_name, "Diag_Dom_%04d", lv_id);
4593       cs_sles_post_output_var(var_name,
4594                               mesh_id,
4595                               location_id,
4596                               CS_POST_WRITER_ERRORS,
4597                               db_size[1],
4598                               var);
4599     }
4600 
4601     /* Output info on current level if > 0 */
4602 
4603     if (level > 0) {
4604 
4605       cs_lnum_t ii;
4606       cs_lnum_t n_cells = 0;
4607       cs_lnum_t n_cols_ext = 0;
4608 
4609       cs_real_t *c_res = NULL;
4610 
4611       g = mgd->grid_hierarchy[level];
4612 
4613       cs_grid_get_info(g,
4614                        NULL,
4615                        NULL,
4616                        db_size,
4617                        eb_size,
4618                        NULL,
4619                        &n_cells,
4620                        &n_cols_ext,
4621                        NULL,
4622                        NULL);
4623 
4624       cs_grid_project_var(g, n_base_rows, mgd->rhs_vx[level*2], var);
4625       cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4626       sprintf(var_name, "RHS_%04d", level);
4627       cs_sles_post_output_var(var_name,
4628                               mesh_id,
4629                               location_id,
4630                               CS_POST_WRITER_ERRORS,
4631                               db_size[1],
4632                               var);
4633 
4634       cs_grid_project_var(g, n_base_rows, mgd->rhs_vx[level*2+1], var);
4635       cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4636       sprintf(var_name, "X_%04d", level);
4637       cs_sles_post_output_var(var_name,
4638                               mesh_id,
4639                               location_id,
4640                               CS_POST_WRITER_ERRORS,
4641                               db_size[1],
4642                               var);
4643 
4644       /* Compute residual */
4645 
4646       BFT_MALLOC(c_res, n_cols_ext*db_size[1], cs_real_t);
4647 
4648       _matrix = cs_grid_get_matrix(g);
4649 
4650       cs_matrix_vector_multiply(_matrix,
4651                                 mgd->rhs_vx[level*2+1],
4652                                 c_res);
4653 
4654       const cs_real_t *c_rhs_lv = mgd->rhs_vx[level*2];
4655       for (ii = 0; ii < n_cells; ii++) {
4656         for (cs_lnum_t i = 0; i < db_size[0]; i++)
4657           c_res[ii*db_size[1] + i]
4658             = fabs(c_res[ii*db_size[1] + i] - c_rhs_lv[ii*db_size[1] + i]);
4659       }
4660 
4661       cs_grid_project_var(g, n_base_rows, c_res, var);
4662       cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4663 
4664       BFT_FREE(c_res);
4665 
4666       sprintf(var_name, "Residual_%04d", level);
4667       cs_sles_post_output_var(var_name,
4668                               mesh_id,
4669                               location_id,
4670                               CS_POST_WRITER_ERRORS,
4671                               db_size[1],
4672                               var);
4673     }
4674 
4675     cs_post_finalize();
4676 
4677     BFT_FREE(da);
4678     BFT_FREE(var);
4679   }
4680 
4681   /* Now abort */
4682 
4683   const char *error_type[] = {N_("divergence"), N_("breakdown")};
4684   int err_id = (state == CS_SLES_BREAKDOWN) ? 1 : 0;
4685 
4686   if (level == 0)
4687     bft_error(__FILE__, __LINE__, 0,
4688               _("algebraic multigrid [%s]: %s after %d cycles:\n"
4689                 "  initial residual: %11.4e; current residual: %11.4e"),
4690               name, _(error_type[err_id]), mgd->exit_cycle_id,
4691               mgd->exit_initial_residue, mgd->exit_residue);
4692   else
4693     bft_error(__FILE__, __LINE__, 0,
4694               _("algebraic multigrid [%s]: %s after %d cycles\n"
4695                 "  during resolution at level %d:\n"
4696                 "  initial residual: %11.4e; current residual: %11.4e"),
4697               name, _(error_type[err_id]),
4698               mgd->exit_cycle_id, level,
4699               mgd->exit_initial_residue, mgd->exit_residue);
4700 
4701   return false;
4702 }
4703 
4704 /*----------------------------------------------------------------------------*/
4705 /*!
4706  * \brief Set plotting options for multigrid.
4707  *
4708  * \param[in, out]  mg             pointer to multigrid info and context
4709  * \param[in]       base_name      base plot name to activate, NULL otherwise
4710  * \param[in]       use_iteration  if true, use iteration as time stamp
4711  *                                 otherwise, use wall clock time
4712  */
4713 /*----------------------------------------------------------------------------*/
4714 
4715 void
cs_multigrid_set_plot_options(cs_multigrid_t * mg,const char * base_name,bool use_iteration)4716 cs_multigrid_set_plot_options(cs_multigrid_t  *mg,
4717                               const char      *base_name,
4718                               bool             use_iteration)
4719 {
4720   if (mg != NULL) {
4721     if (cs_glob_rank_id < 1 && base_name != NULL) {
4722 
4723       /* Destroy previous plot if options reset */
4724       if (mg->cycle_plot != NULL)
4725         cs_time_plot_finalize(&(mg->cycle_plot));
4726 
4727       /* Create new plot */
4728       cs_file_mkdir_default("monitoring");
4729       const char *probe_names[] = {base_name};
4730       mg->cycle_plot = cs_time_plot_init_probe(base_name,
4731                                                "monitoring/residue_",
4732                                                CS_TIME_PLOT_CSV,
4733                                                use_iteration,
4734                                                -1.,      /* force flush */
4735                                                0,        /* no buffer */
4736                                                1,        /* n_probes */
4737                                                NULL,     /* probe_list */
4738                                                NULL,     /* probe_coords */
4739                                                probe_names);
4740 
4741       if (use_iteration)
4742         mg->plot_time_stamp = 0;
4743     }
4744   }
4745 }
4746 
4747 /*----------------------------------------------------------------------------*/
4748 /*!
4749  * \brief Query the global multigrid parameters for parallel grid merging.
4750  *
4751  * \param[in]   mg                   pointer to multigrid info and context
4752  * \param[out]  rank_stride          number of ranks over which merging
4753  *                                   takes place, or NULL
4754  * \param[out]  rows_mean_threshold  mean number of rows under which merging
4755  *                                   should be applied, or NULL
4756  * \param[out]  rows_glob_threshold  global number of rows under which
4757  *                                   merging should be applied, or NULL
4758  */
4759 /*----------------------------------------------------------------------------*/
4760 
4761 void
cs_multigrid_get_merge_options(const cs_multigrid_t * mg,int * rank_stride,int * rows_mean_threshold,cs_gnum_t * rows_glob_threshold)4762 cs_multigrid_get_merge_options(const cs_multigrid_t  *mg,
4763                                int                   *rank_stride,
4764                                int                   *rows_mean_threshold,
4765                                cs_gnum_t             *rows_glob_threshold)
4766 {
4767 #if defined(HAVE_MPI)
4768   if (rank_stride != NULL)
4769     *rank_stride = mg->merge_stride;
4770   if (rows_mean_threshold != NULL)
4771     *rows_mean_threshold = mg->merge_mean_threshold;
4772   if (rows_glob_threshold != NULL)
4773     *rows_glob_threshold = mg->merge_glob_threshold;
4774 #else
4775   if (rank_stride != NULL)
4776     *rank_stride = 0;
4777   if (rows_mean_threshold != NULL)
4778     *rows_mean_threshold = 0;
4779   if (rows_glob_threshold != NULL)
4780     *rows_glob_threshold = 0;
4781 #endif
4782 }
4783 
4784 /*----------------------------------------------------------------------------*/
4785 /*!
4786  * \brief Set global multigrid parameters for parallel grid merging behavior.
4787  *
4788  * \param[in, out]  mg                   pointer to multigrid info and context
4789  * \param[in]       rank_stride          number of ranks over which merging
4790  *                                       takes place
4791  * \param[in]       rows_mean_threshold  mean number of rows under which
4792  *                                       merging should be applied
4793  * \param[in]       rows_glob_threshold  global number of rows under which
4794  *                                       merging should be applied
4795  */
4796 /*----------------------------------------------------------------------------*/
4797 
4798 void
cs_multigrid_set_merge_options(cs_multigrid_t * mg,int rank_stride,int rows_mean_threshold,cs_gnum_t rows_glob_threshold)4799 cs_multigrid_set_merge_options(cs_multigrid_t  *mg,
4800                                int              rank_stride,
4801                                int              rows_mean_threshold,
4802                                cs_gnum_t        rows_glob_threshold)
4803 {
4804 #if defined(HAVE_MPI)
4805   mg->merge_stride = rank_stride;
4806   if (mg->merge_stride > cs_glob_n_ranks)
4807     mg->merge_stride = cs_glob_n_ranks;
4808   mg->merge_mean_threshold = rows_mean_threshold;
4809   mg->merge_glob_threshold = rows_glob_threshold;
4810 #endif
4811 }
4812 
4813 /*----------------------------------------------------------------------------*/
4814 
4815 END_C_DECLS
4816