1 /*============================================================================
2 * Multigrid solver.
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 #include "cs_defs.h"
28
29 /*----------------------------------------------------------------------------
30 * Standard C library headers
31 *----------------------------------------------------------------------------*/
32
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #if defined(HAVE_MPI)
41 #include <mpi.h>
42 #endif
43
44 /*----------------------------------------------------------------------------
45 * Local headers
46 *----------------------------------------------------------------------------*/
47
48 #include "bft_mem.h"
49 #include "bft_error.h"
50 #include "bft_printf.h"
51
52 #include "cs_base.h"
53 #include "cs_blas.h"
54 #include "cs_file.h"
55 #include "cs_grid.h"
56 #include "cs_halo.h"
57 #include "cs_log.h"
58 #include "cs_matrix.h"
59 #include "cs_matrix_default.h"
60 #include "cs_matrix_util.h"
61 #include "cs_mesh.h"
62 #include "cs_mesh_quantities.h"
63 #include "cs_multigrid_smoother.h"
64 #include "cs_post.h"
65 #include "cs_sles.h"
66 #include "cs_sles_it.h"
67 #include "cs_sles_pc.h"
68 #include "cs_timer.h"
69 #include "cs_time_plot.h"
70 #include "cs_time_step.h"
71
72 /*----------------------------------------------------------------------------
73 * Header for the current file
74 *----------------------------------------------------------------------------*/
75
76 #include "cs_multigrid.h"
77
78 /*----------------------------------------------------------------------------*/
79
80 BEGIN_C_DECLS
81
82 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
83
84 /*=============================================================================
85 * Local Macro Definitions
86 *============================================================================*/
87
88 #define EPZERO 1.E-12
89 #define RINFIN 1.E+30
90
91 #if !defined(HUGE_VAL)
92 #define HUGE_VAL 1.E+12
93 #endif
94
95 /* Minimum size for OpenMP loops (needs benchmarking to adjust) */
96 #define CS_THR_MIN 128
97
98 /* SIMD unit size to ensure SIMD alignement (2 to 4 required on most
99 * current architectures, so 16 should be enough on most architectures
100 * through at least 2012) */
101
102 #define CS_SIMD_SIZE(s) (((s-1)/16+1)*16)
103
104 /*=============================================================================
105 * Local Type Definitions
106 *============================================================================*/
107
108 /*----------------------------------------------------------------------------
109 * Multigrid types
110 *----------------------------------------------------------------------------*/
111
112 typedef enum {
113
114 CS_MULTIGRID_MAIN, /*!< Main multigrid type */
115 CS_MULTIGRID_COARSE, /*!< Coarse level solver */
116 CS_MULTIGRID_BOTTOM, /*!< Bottom level solver */
117 CS_MULTIGRID_BOTTOM_SMOOTHE /*!< Bottom level smoother */
118
119 } cs_multigrid_subtype_t;
120
121 /*----------------------------------------------------------------------------
122 * Local Structure Definitions
123 *----------------------------------------------------------------------------*/
124
125 /* Basic linear system solver or smoother context and functions */
126 /*--------------------------------------------------------------*/
127
128 typedef struct _cs_mg_sles_t {
129
130 void *context; /* solver context
131 (options, state, logging) */
132
133 cs_sles_setup_t *setup_func; /* solver setup function */
134 cs_sles_solve_t *solve_func; /* solve function */
135 cs_sles_destroy_t *destroy_func; /* destruction function */
136
137 } cs_mg_sles_t;
138
139 /* Basic per linear system options and logging */
140 /*---------------------------------------------*/
141
142 typedef struct _cs_multigrid_info_t {
143
144 /* Settings */
145
146 cs_sles_it_type_t type[3]; /* Descent/ascent smoothers
147 Coarse solver */
148
149 bool is_pc; /* True if used as preconditioner */
150 int n_max_cycles; /* Maximum allowed cycles */
151
152 int n_max_iter[3]; /* maximum iterations allowed
153 (descent/ascent/coarse) */
154 int poly_degree[3]; /* polynomial preconditioning degree
155 (descent/ascent/coarse) */
156
157 double precision_mult[3]; /* solver precision multiplier
158 (descent/ascent/coarse) */
159
160 /* Logging */
161
162 unsigned n_calls[2]; /* Number of times grids built
163 (0) or solved (1) */
164
165 unsigned long long n_levels_tot; /* Total accumulated number of
166 grid levels built */
167 unsigned n_levels[3]; /* Number of grid levels:
168 [last, min, max] */
169
170 unsigned n_cycles[3]; /* Number of cycles for
171 system resolution:
172 [min, max, total] */
173
174 cs_timer_counter_t t_tot[2]; /* Total time used:
175 [build, solve] */
176
177 } cs_multigrid_info_t;
178
179 /* Per level info and logging */
180 /*----------------------------*/
181
182 typedef struct _cs_multigrid_level_info_t {
183
184 unsigned long long n_ranks[4]; /* Number of ranks for this level:
185 [last, min, max, total] */
186 unsigned long long n_g_rows[4]; /* Global number of rows
187 (last, min, max, total) */
188 unsigned long long n_elts[3][4]; /* Mean number of rows,
189 rows + ghosts, and entries
190 across ranks (last, min, max,
191 total) */
192 double imbalance[3][4]; /* Imbalance for rows, rows
193 + ghosts, and entries
194 (last, min, max, total) */
195
196 unsigned long long n_it_solve[4]; /* Number of iterations for
197 solving [last, min, max, total] */
198 unsigned long long n_it_ds_smoothe[4]; /* Number of iterations for
199 descent smoothing:
200 [last, min, max, total] */
201 unsigned long long n_it_as_smoothe[4]; /* Number of iterations for
202 ascent smoothing:
203 [last, min, max, total] */
204
205 unsigned n_calls[7]; /* Total number of calls:
206 build, solve, descent smoothe,
207 ascent smoothe, restrict from
208 finer, prolong to finer,
209 BLAS */
210
211 cs_timer_counter_t t_tot[7]; /* Total timers count:
212 [build, solve, descent smoothe,
213 ascent smoothe, restrict from
214 finer, prolong to finer,
215 BLAS] */
216
217 } cs_multigrid_level_info_t;
218
219 /* Grid hierarchy */
220 /*----------------*/
221
222 typedef struct _cs_multigrid_setup_data_t {
223
224 /* Setup */
225
226 unsigned n_levels; /* Current number of grid levels */
227 unsigned n_levels_alloc; /* Allocated number of grid levels */
228
229 cs_grid_t **grid_hierarchy; /* Array of grid pointers */
230 cs_mg_sles_t *sles_hierarchy; /* Contexts for associated smoothers
231 and solvers (i*2: descent, coarse;
232 i*2+1: ascent) */
233
234 /* Arrays used only for solving, but maintained until free,
235 so as to be usable by convergence error handler. */
236
237 double exit_initial_residue; /* Last level initial residue */
238 double exit_residue; /* Last residue */
239 int exit_level; /* Last level during solve */
240 int exit_cycle_id; /* Last cycle id during solve */
241
242 cs_real_t *rhs_vx_buf; /* Coarse grid "right hand sides"
243 and corrections buffer */
244 cs_real_t **rhs_vx; /* Coarse grid "right hand sides"
245 and corrections */
246
247 /* Options used only when used as a preconditioner */
248
249 char *pc_name; /* name of preconditioning system */
250
251 int pc_verbosity; /* preconditioner verbosity */
252
253 cs_real_t *pc_aux; /* preconditioner auxiliary array */
254
255
256 } cs_multigrid_setup_data_t;
257
258 /* Grid hierarchy */
259 /*----------------*/
260
261 struct _cs_multigrid_t {
262
263 /* Settings */
264
265 cs_multigrid_type_t type; /* Multigrid type */
266 cs_multigrid_subtype_t subtype; /* Multigrid subtype */
267
268 int aggregation_limit; /* Maximum allowed fine rows per coarse cell */
269 int coarsening_type; /* Coarsening traversal type */
270 int n_levels_max; /* Maximum number of grid levels */
271 cs_gnum_t n_g_rows_min; /* Global number of rows on coarse grids
272 under which no coarsening occurs */
273
274 int post_row_max; /* If > 0, activates postprocessing of
275 coarsening, projecting coarse cell
276 numbers (modulo post_row_max)
277 on the base grid */
278
279 double p0p1_relax; /* p0/p1 relaxation_parameter */
280 double k_cycle_threshold; /* threshold for k cycle */
281
282 /* Setting for use as a preconditioner */
283
284 double pc_precision; /* preconditioner precision */
285 double pc_r_norm; /* preconditioner residue normalization */
286
287 /* Data for postprocessing callback */
288
289 int post_location; /* associated mesh location */
290 int n_levels_post; /* Current number of postprocessed levels */
291
292 int **post_row_num; /* If post_row_max > 0, array of
293 (n_levels - 1) arrays of projected
294 coarse cell numbers on the base grid */
295
296 int **post_row_rank; /* If post_row_max > 0 and grid merging
297 is active, array of (n_levels - 1) arrays
298 of projected coarse cell ranks on the
299 base grid */
300 char *post_name; /* Name for postprocessing */
301
302 /* Options and maintained state (statistics) */
303
304 cs_multigrid_level_info_t *lv_info; /* Info for each level */
305 cs_multigrid_t *lv_mg[3]; /* Optional recursive multigrid
306 descent, ascent, or NULL */
307 cs_multigrid_t *p_mg; /* Optional parent multigrid,
308 or NULL */
309
310 cs_multigrid_info_t info; /* Base multigrid info */
311
312 /* Communicator used for reduction operations
313 (if left at NULL, main communicator will be used) */
314
315 # if defined(HAVE_MPI)
316
317 MPI_Comm comm;
318 MPI_Comm caller_comm;
319
320 # endif
321
322 cs_gnum_t merge_mean_threshold;
323 cs_gnum_t merge_glob_threshold;
324
325 int merge_stride;
326 int caller_n_ranks;
327
328 /* Data available between "setup" and "solve" states */
329
330 cs_multigrid_setup_data_t *setup_data; /* setup data */
331
332 cs_time_plot_t *cycle_plot; /* plotting of cycles */
333 int plot_time_stamp; /* plotting time stamp;
334 if < 0, use wall clock */
335 };
336
337 /*============================================================================
338 * Global variables
339 *============================================================================*/
340
341 /* Multigrid type names */
342
343 const char *cs_multigrid_type_name[]
344 = {N_("V-cycle"),
345 N_("K-cycle"),
346 N_("K-cycle, HPC variant"),
347 N_("K-cycle, HPC coarse level")};
348
349 /* Tunable settings */
350
351 static int _k_cycle_hpc_merge_stride = 512;
352 static int _k_cycle_hpc_recurse_threshold = 256; /* under this size, coarsest
353 level solver does not
354 use k-cycle preconditioning */
355
356 /*============================================================================
357 * Private function prototypes for recursive
358 *============================================================================*/
359
360 static void
361 _setup_k_cycle_hpc_sub(cs_multigrid_t *mg,
362 const char *name,
363 const cs_matrix_t *a,
364 int n_ranks,
365 int verbosity);
366
367 /*============================================================================
368 * Private function definitions
369 *============================================================================*/
370
371 /*----------------------------------------------------------------------------
372 * Initialize multigrid info structure.
373 *
374 * parameters:
375 * name <-- system name
376 * info <-- pointer to multigrid info structure
377 *----------------------------------------------------------------------------*/
378
379 static void
_multigrid_info_init(cs_multigrid_info_t * info)380 _multigrid_info_init(cs_multigrid_info_t *info)
381 {
382 int i;
383
384 /* Options */
385
386 info->type[0] = CS_SLES_PCG;
387 info->type[1] = CS_SLES_PCG;
388 info->type[2] = CS_SLES_PCG;
389
390 info->is_pc = false;
391 info->n_max_cycles = 100;
392
393 info->n_max_iter[0] = 2;
394 info->n_max_iter[1] = 10;
395 info->n_max_iter[2] = 10000;
396
397 info->poly_degree[0] = -1;
398 info->poly_degree[1] = -1;
399 info->poly_degree[2] = -1;
400
401 /* In theory, one should increase precision on coarsest mesh,
402 but in practice, it is more efficient to have a lower precision,
403 so we choose coarse_precision = global_precision; */
404
405 info->precision_mult[0] = -1.;
406 info->precision_mult[1] = -1.;
407 info->precision_mult[2] = 1.;
408
409 /* Counting and timing */
410
411 for (i = 0; i < 2; i++)
412 info->n_calls[i] = 0;
413
414 info->n_levels_tot = 0;
415
416 for (i = 0; i < 3; i++) {
417 info->n_levels[i] = 0;
418 info->n_cycles[i] = 0;
419 }
420
421 for (i = 0; i < 2; i++)
422 CS_TIMER_COUNTER_INIT(info->t_tot[i]);
423 }
424
425 /*----------------------------------------------------------------------------
426 * Initialize multigrid level info structure.
427 *
428 * parameters:
429 * name <-- system name
430 * info <-- pointer to multigrid info structure
431 *----------------------------------------------------------------------------*/
432
433 static void
_multigrid_level_info_init(cs_multigrid_level_info_t * info)434 _multigrid_level_info_init(cs_multigrid_level_info_t *info)
435 {
436 int i;
437
438 memset(info, 0, sizeof(cs_multigrid_level_info_t));
439
440 for (i = 0; i < 3; i++) {
441 info->imbalance[i][0] = HUGE_VALF;
442 info->imbalance[i][1] = 0.;
443 }
444
445 for (i = 0; i < 7; i++)
446 CS_TIMER_COUNTER_INIT(info->t_tot[i]);
447 }
448
449 /*----------------------------------------------------------------------------
450 * Output information regarding multigrid options.
451 *
452 * parameters:
453 * mg <-> pointer to multigrid structure
454 *----------------------------------------------------------------------------*/
455
456 static void
_multigrid_setup_log(const cs_multigrid_t * mg)457 _multigrid_setup_log(const cs_multigrid_t *mg)
458 {
459 if (mg->info.is_pc == false)
460 cs_log_printf(CS_LOG_SETUP,
461 _(" Solver type: multigrid\n"));
462 else
463 cs_log_printf(CS_LOG_SETUP,
464 _(" Multigrid preconditioner parameters:\n"));
465
466 cs_log_printf(CS_LOG_SETUP,
467 _(" Coarsening type: %s\n"
468 " Max fine rows per coarse row: %d\n"
469 " Maximum number of levels : %d\n"
470 " Minimum number of coarse rows: %llu\n"
471 " P0/P1 relaxation parameter: %g\n"
472 " Maximum number of cycles: %d\n"),
473 _(cs_grid_coarsening_type_name[mg->coarsening_type]),
474 mg->aggregation_limit,
475 mg->n_levels_max, (unsigned long long)(mg->n_g_rows_min),
476 mg->p0p1_relax, mg->info.n_max_cycles);
477
478 #if defined(HAVE_MPI)
479 if (cs_glob_n_ranks > 1)
480 cs_log_printf(CS_LOG_SETUP,
481 _("\n"
482 " Rank merge parameters:\n"
483 " merge rank stride: %d\n"
484 " mean coarse rows threshold: %d\n"
485 " total coarse rows threshold: %llu\n"),
486 mg->merge_stride,
487 (int)(mg->merge_mean_threshold),
488 (unsigned long long)(mg->merge_glob_threshold));
489 #endif
490
491 cs_log_printf(CS_LOG_SETUP,
492 _(" Cycle type: %s\n"),
493 _(cs_multigrid_type_name[mg->type]));
494
495 const char *stage_name[] = {"Descent smoother",
496 "Ascent smoother",
497 "Coarsest level solver"};
498
499 for (int i = 0; i < 3; i++) {
500
501 if ( mg->info.type[i] != CS_SLES_N_IT_TYPES
502 && mg->info.type[i] < CS_SLES_N_SMOOTHER_TYPES) {
503 cs_log_printf(CS_LOG_SETUP,
504 _(" %s:\n"
505 " Type: %s\n"),
506 _(stage_name[i]),
507 _(cs_sles_it_type_name[mg->info.type[i]]));
508
509 if (mg->info.poly_degree[i] > -1) {
510 cs_log_printf(CS_LOG_SETUP,
511 _(" Preconditioning: "));
512 if (mg->info.poly_degree[i] == 0)
513 cs_log_printf(CS_LOG_SETUP, _("Jacobi\n"));
514 else if (mg->info.poly_degree[i] < 0) {
515 if (mg->lv_mg[i] != NULL) {
516 cs_log_printf(CS_LOG_SETUP, "%s\n",
517 _(cs_multigrid_type_name[mg->lv_mg[i]->type]));
518 }
519 else
520 cs_log_printf(CS_LOG_SETUP, _("None\n"));
521 }
522 else
523 cs_log_printf(CS_LOG_SETUP, _("polynomial, degree %d\n"),
524 mg->info.poly_degree[i]);
525 }
526 cs_log_printf(CS_LOG_SETUP,
527 _(" Maximum number of iterations: %d\n"
528 " Precision multiplier: %g\n"),
529 mg->info.n_max_iter[i],
530 mg->info.precision_mult[i]);
531 }
532 else if (mg->lv_mg[i] != NULL) {
533 cs_log_printf(CS_LOG_SETUP, " %s:\n", _(stage_name[i]));
534 _multigrid_setup_log(mg->lv_mg[i]);
535 }
536
537 }
538
539 cs_log_printf(CS_LOG_SETUP,
540 _(" Postprocess coarsening: %d\n"),
541 mg->post_row_max);
542 }
543
544 /*----------------------------------------------------------------------------
545 * Output information regarding multigrid resolution.
546 *
547 * parameters:
548 * mg <-> pointer to multigrid structure
549 *----------------------------------------------------------------------------*/
550
551 static void
_multigrid_performance_log(const cs_multigrid_t * mg)552 _multigrid_performance_log(const cs_multigrid_t *mg)
553 {
554 unsigned i;
555
556 unsigned long long n_builds_denom = CS_MAX(mg->info.n_calls[0], 1);
557 unsigned long long n_solves_denom = CS_MAX(mg->info.n_calls[1], 1);
558 int n_lv_min = mg->info.n_levels[1];
559 int n_lv_max = mg->info.n_levels[2];
560 int n_lv_mean = (int)(mg->info.n_levels_tot / n_builds_denom);
561 int n_cy_mean = (int)(mg->info.n_cycles[2] / n_solves_denom);
562
563 char tmp_s[7][64] = {"", "", "", "", "", "", ""};
564 const char *stage_name[2] = {N_("Construction:"), N_("Resolution:")};
565 const char *lv_stage_name[7] = {N_("build:"), N_("solve:"),
566 N_("descent smoothe:"), N_("ascent smoothe:"),
567 N_("restrict:"), N_("prolong:"),
568 N_("BLAS")};
569
570 cs_log_printf(CS_LOG_PERFORMANCE,
571 _("\n"
572 " Multigrid:\n"
573 " %s\n"
574 " Coarsening: %s\n"),
575 _(cs_multigrid_type_name[mg->type]),
576 _(cs_grid_coarsening_type_name[mg->coarsening_type]));
577
578 if ( mg->info.type[0] != CS_SLES_N_IT_TYPES
579 && mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) {
580
581 const char *descent_smoother_name = cs_sles_it_type_name[mg->info.type[0]];
582 const char *ascent_smoother_name = cs_sles_it_type_name[mg->info.type[1]];
583
584 if (mg->info.type[0] == mg->info.type[1])
585 cs_log_printf(CS_LOG_PERFORMANCE,
586 _(" Smoother: %s\n"),
587 _(descent_smoother_name));
588 else
589 cs_log_printf(CS_LOG_PERFORMANCE,
590 _(" Descent smoother: %s\n"
591 " Ascent smoother: %s\n"),
592 _(descent_smoother_name), _(ascent_smoother_name));
593
594 cs_log_printf(CS_LOG_PERFORMANCE,
595 _(" Coarsest level solver: %s\n"),
596 _(cs_sles_it_type_name[mg->info.type[2]]));
597
598 }
599
600 sprintf(tmp_s[0], "%-36s", "");
601 cs_log_strpadl(tmp_s[1], _(" mean"), 12, 64);
602 cs_log_strpadl(tmp_s[2], _("minimum"), 12, 64);
603 cs_log_strpadl(tmp_s[3], _("maximum"), 12, 64);
604
605 cs_log_printf(CS_LOG_PERFORMANCE,
606 "\n %s %s %s %s\n",
607 tmp_s[0], tmp_s[1], tmp_s[2], tmp_s[3]);
608
609 cs_log_strpad(tmp_s[0], _("Number of levels:"), 36, 64);
610 cs_log_strpad(tmp_s[1], _("Number of cycles:"), 36, 64);
611
612 cs_log_printf(CS_LOG_PERFORMANCE,
613 " %s %12d %12d %12d\n",
614 tmp_s[0], n_lv_mean, n_lv_min, n_lv_max);
615 cs_log_printf(CS_LOG_PERFORMANCE,
616 " %s %12d %12d %12d\n\n",
617 tmp_s[1], n_cy_mean,
618 (int)(mg->info.n_cycles[0]), (int)(mg->info.n_cycles[1]));
619
620 cs_log_timer_array_header(CS_LOG_PERFORMANCE,
621 2, /* indent, */
622 "", /* header title */
623 true); /* calls column */
624 cs_log_timer_array(CS_LOG_PERFORMANCE,
625 2, /* indent, */
626 2, /* n_lines */
627 stage_name,
628 mg->info.n_calls,
629 mg->info.t_tot);
630
631 sprintf(tmp_s[0], "%-36s", "");
632 cs_log_strpadl(tmp_s[1], _(" mean"), 12, 64);
633 cs_log_strpadl(tmp_s[2], _("minimum"), 12, 64);
634 cs_log_strpadl(tmp_s[3], _("maximum"), 12, 64);
635
636 cs_log_printf(CS_LOG_PERFORMANCE,
637 "\n %s %s %s %s\n",
638 tmp_s[0], tmp_s[1], tmp_s[2], tmp_s[3]);
639
640 for (i = 0; i < mg->info.n_levels[2]; i++) {
641
642 const cs_multigrid_level_info_t *lv_info = mg->lv_info + i;
643 unsigned long long n_lv_builds = lv_info->n_calls[0];
644
645 if (n_lv_builds < 1)
646 continue;
647
648 cs_log_strpad(tmp_s[0], _("Number of rows:"), 34, 64);
649 cs_log_printf(CS_LOG_PERFORMANCE,
650 _(" Grid level %d:\n"
651 " %s %12llu %12llu %12llu\n"),
652 i, tmp_s[0],
653 lv_info->n_g_rows[3] / n_lv_builds,
654 lv_info->n_g_rows[1], lv_info->n_g_rows[2]);
655
656 if (mg->caller_n_ranks == 1) {
657 cs_log_strpad(tmp_s[1], _("Number of entries:"), 34, 64);
658 cs_log_printf(CS_LOG_PERFORMANCE,
659 " %s %12llu %12llu %12llu\n",
660 tmp_s[1],
661 lv_info->n_elts[2][3] / n_lv_builds,
662 lv_info->n_elts[2][1], lv_info->n_elts[2][2]);
663 }
664
665 #if defined(HAVE_MPI)
666
667 if (mg->caller_n_ranks > 1) {
668 cs_log_strpad(tmp_s[0], _("Number of active ranks:"), 34, 64);
669 cs_log_printf(CS_LOG_PERFORMANCE,
670 " %s %12llu %12llu %12llu\n",
671 tmp_s[0],
672 lv_info->n_ranks[3] / n_lv_builds,
673 lv_info->n_ranks[1], lv_info->n_ranks[2]);
674 cs_log_strpad(tmp_s[0], _("Mean local rows:"), 34, 64);
675 cs_log_strpad(tmp_s[1], _("Mean local columns + ghosts:"), 34, 64);
676 cs_log_strpad(tmp_s[2], _("Mean local entries:"), 34, 64);
677 cs_log_printf(CS_LOG_PERFORMANCE,
678 " %s %12llu %12llu %12llu\n"
679 " %s %12llu %12llu %12llu\n"
680 " %s %12llu %12llu %12llu\n",
681 tmp_s[0],
682 lv_info->n_elts[0][3] / n_lv_builds,
683 lv_info->n_elts[0][1], lv_info->n_elts[0][2],
684 tmp_s[1],
685 lv_info->n_elts[1][3] / n_lv_builds,
686 lv_info->n_elts[1][1], lv_info->n_elts[1][2],
687 tmp_s[2],
688 lv_info->n_elts[2][3] / n_lv_builds,
689 lv_info->n_elts[2][1], lv_info->n_elts[2][2]);
690 cs_log_strpad(tmp_s[0], _("Rows imbalance:"), 34, 64);
691 cs_log_strpad(tmp_s[1], _("Columns + ghosts imbalance:"), 34, 64);
692 cs_log_strpad(tmp_s[2], _("entries imbalance"), 34, 64);
693 cs_log_printf(CS_LOG_PERFORMANCE,
694 " %-34s %12.3f %12.3f %12.3f\n"
695 " %-34s %12.3f %12.3f %12.3f\n"
696 " %-34s %12.3f %12.3f %12.3f\n",
697 tmp_s[0],
698 lv_info->imbalance[0][3] / n_lv_builds,
699 lv_info->imbalance[0][1], lv_info->imbalance[0][2],
700 tmp_s[1],
701 lv_info->imbalance[1][3] / n_lv_builds,
702 lv_info->imbalance[1][1], lv_info->imbalance[1][2],
703 tmp_s[2],
704 lv_info->imbalance[2][3] / n_lv_builds,
705 lv_info->imbalance[2][1], lv_info->imbalance[2][2]);
706 }
707
708 #endif
709
710 if (lv_info->n_calls[1] > 0) {
711 cs_log_strpad(tmp_s[0], _("Iterations for solving:"), 34, 64);
712 cs_log_printf(CS_LOG_PERFORMANCE,
713 " %s %12llu %12llu %12llu\n",
714 tmp_s[0],
715 lv_info->n_it_solve[3] / lv_info->n_calls[1],
716 lv_info->n_it_solve[1], lv_info->n_it_solve[2]);
717 }
718
719 if (lv_info->n_calls[2] > 0) {
720 cs_log_strpad(tmp_s[1], _("Descent smoother iterations:"), 34, 64);
721 cs_log_printf(CS_LOG_PERFORMANCE,
722 " %s %12llu %12llu %12llu\n",
723 tmp_s[1],
724 lv_info->n_it_ds_smoothe[3] / lv_info->n_calls[2],
725 lv_info->n_it_ds_smoothe[1], lv_info->n_it_ds_smoothe[2]);
726 }
727
728 if (lv_info->n_calls[3] > 0) {
729 cs_log_strpad(tmp_s[2], _("Ascent smoother iterations:"), 34, 64);
730 cs_log_printf(CS_LOG_PERFORMANCE,
731 " %s %12llu %12llu %12llu\n",
732 tmp_s[2],
733 lv_info->n_it_as_smoothe[3] / lv_info->n_calls[3],
734 lv_info->n_it_as_smoothe[1], lv_info->n_it_as_smoothe[2]);
735 }
736 }
737
738 cs_log_timer_array_header(CS_LOG_PERFORMANCE,
739 2, /* indent, */
740 "", /* header title */
741 true); /* calls column */
742
743 for (i = 0; i < mg->info.n_levels[2]; i++) {
744
745 const cs_multigrid_level_info_t *lv_info = mg->lv_info + i;
746
747 cs_log_printf(CS_LOG_PERFORMANCE,
748 _(" Grid level %d:\n"), i);
749
750 cs_log_timer_array(CS_LOG_PERFORMANCE,
751 4, /* indent, */
752 7, /* n_lines */
753 lv_stage_name,
754 lv_info->n_calls,
755 lv_info->t_tot);
756
757 }
758
759 {
760 const char *names[]
761 = {N_("coarse level descent smoother"),
762 N_("coarse level ascent smoother"),
763 N_("bottom level solver")};
764
765 for (i = 0; i < 3; i++) {
766 if (mg->lv_mg[i] != NULL) {
767
768 cs_log_printf(CS_LOG_PERFORMANCE,
769 _("\n"
770 " Nested %s:\n"), _(names[i]));
771 cs_multigrid_log(mg->lv_mg[i], CS_LOG_PERFORMANCE);
772 }
773 }
774 }
775 }
776
777 /*----------------------------------------------------------------------------
778 * Create empty structure used to maintain setup data
779 * (between cs_sles_setup and cs_sles_free type calls.
780 *
781 * returns:
782 * pointer to multigrid setup data structure
783 *----------------------------------------------------------------------------*/
784
785 static cs_multigrid_setup_data_t *
_multigrid_setup_data_create(void)786 _multigrid_setup_data_create(void)
787 {
788 cs_multigrid_setup_data_t *mgd;
789
790 BFT_MALLOC(mgd, 1, cs_multigrid_setup_data_t);
791
792 mgd->n_levels = 0;
793 mgd->n_levels_alloc = 0;
794
795 mgd->grid_hierarchy = NULL;
796 mgd->sles_hierarchy = NULL;
797
798 mgd->exit_initial_residue = -1.;
799 mgd->exit_residue = -1.;
800 mgd->exit_level = -1.;
801 mgd->exit_cycle_id = -1.;
802
803 mgd->rhs_vx_buf = NULL;
804 mgd->rhs_vx = NULL;
805
806 mgd->pc_name = NULL;
807 mgd->pc_aux = NULL;
808 mgd->pc_verbosity = 0;
809
810 return mgd;
811 }
812
813 /*----------------------------------------------------------------------------
814 * Add grid to multigrid structure hierarchy.
815 *
816 * parameters:
817 * mg <-- multigrid structure
818 * grid <-- grid to add
819 *----------------------------------------------------------------------------*/
820
821 static void
_multigrid_add_level(cs_multigrid_t * mg,cs_grid_t * grid)822 _multigrid_add_level(cs_multigrid_t *mg,
823 cs_grid_t *grid)
824 {
825 cs_multigrid_setup_data_t *mgd = mg->setup_data;
826
827 unsigned ii;
828
829 /* Reallocate arrays if necessary */
830
831 if (mgd->n_levels == mgd->n_levels_alloc) {
832
833 /* Max previous */
834 unsigned int n_lv_max_prev = CS_MAX(mg->info.n_levels[2],
835 mgd->n_levels);
836
837 if (mgd->n_levels_alloc == 0) {
838 mgd->n_levels_alloc = n_lv_max_prev;
839 if (mgd->n_levels_alloc == 0)
840 mgd->n_levels_alloc = 10;
841 }
842 else
843 mgd->n_levels_alloc *= 2;
844
845 BFT_REALLOC(mgd->grid_hierarchy, mgd->n_levels_alloc, cs_grid_t *);
846 BFT_REALLOC(mgd->sles_hierarchy, mgd->n_levels_alloc*2, cs_mg_sles_t);
847
848 for (ii = mgd->n_levels; ii < mgd->n_levels_alloc*2; ii++) {
849 mgd->sles_hierarchy[ii].context = NULL;
850 mgd->sles_hierarchy[ii].setup_func = NULL;
851 mgd->sles_hierarchy[ii].solve_func = NULL;
852 mgd->sles_hierarchy[ii].destroy_func = NULL;
853 }
854
855 if (n_lv_max_prev < mgd->n_levels_alloc) {
856 BFT_REALLOC(mg->lv_info, mgd->n_levels_alloc, cs_multigrid_level_info_t);
857 for (ii = n_lv_max_prev; ii < mgd->n_levels_alloc; ii++)
858 _multigrid_level_info_init(mg->lv_info + ii);
859 }
860
861 }
862
863 /* Add new grid to hierarchy */
864
865 mgd->grid_hierarchy[mgd->n_levels] = grid;
866
867 if (mg->post_row_num != NULL) {
868 int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
869 BFT_REALLOC(mg->post_row_num, mgd->n_levels_alloc, int *);
870 for (ii = n_max_post_levels + 1; ii < mgd->n_levels_alloc; ii++)
871 mg->post_row_num[ii] = NULL;
872 }
873
874 if (mg->post_row_rank != NULL) {
875 int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
876 BFT_REALLOC(mg->post_row_rank, mgd->n_levels_alloc, int *);
877 for (ii = n_max_post_levels + 1; ii < mgd->n_levels_alloc; ii++)
878 mg->post_row_rank[ii] = NULL;
879 }
880
881 /* Update associated info */
882
883 {
884 int n_ranks;
885 cs_lnum_t n_rows, n_rows_with_ghosts, n_entries;
886 cs_gnum_t n_g_rows;
887 cs_multigrid_level_info_t *lv_info = mg->lv_info + mgd->n_levels;
888
889 cs_grid_get_info(grid,
890 NULL,
891 NULL,
892 NULL,
893 NULL,
894 &n_ranks,
895 &n_rows,
896 &n_rows_with_ghosts,
897 &n_entries,
898 &n_g_rows);
899
900 mg->info.n_levels[0] = mgd->n_levels + 1;
901
902 lv_info->n_ranks[0] = n_ranks;
903 if (lv_info->n_ranks[1] > (unsigned)n_ranks)
904 lv_info->n_ranks[1] = n_ranks;
905 else if (lv_info->n_ranks[2] < (unsigned)n_ranks)
906 lv_info->n_ranks[2] = n_ranks;
907 lv_info->n_ranks[3] += n_ranks;
908
909 lv_info->n_g_rows[0] = n_g_rows;
910 if (lv_info->n_g_rows[1] > n_g_rows)
911 lv_info->n_g_rows[1] = n_g_rows;
912 else if (lv_info->n_g_rows[2] < n_g_rows)
913 lv_info->n_g_rows[2] = n_g_rows;
914 lv_info->n_g_rows[3] += n_g_rows;
915
916 lv_info->n_elts[0][0] = n_rows;
917 lv_info->n_elts[1][0] = n_rows_with_ghosts;
918 lv_info->n_elts[2][0] = n_entries;
919
920 for (ii = 0; ii < 3; ii++) {
921 if (lv_info->n_elts[ii][1] > lv_info->n_elts[ii][0])
922 lv_info->n_elts[ii][1] = lv_info->n_elts[ii][0];
923 else if (lv_info->n_elts[ii][2] < lv_info->n_elts[ii][0])
924 lv_info->n_elts[ii][2] = lv_info->n_elts[ii][0];
925 lv_info->n_elts[ii][3] += lv_info->n_elts[ii][0];
926 }
927
928 #if defined(HAVE_MPI)
929
930 if (mg->caller_n_ranks > 1) {
931 cs_gnum_t tot_sizes[3], max_sizes[3];
932 cs_gnum_t loc_sizes[3] = {n_rows, n_rows_with_ghosts, n_entries};
933 MPI_Allreduce(loc_sizes, tot_sizes, 3, CS_MPI_GNUM, MPI_SUM,
934 mg->caller_comm);
935 MPI_Allreduce(loc_sizes, max_sizes, 3, CS_MPI_GNUM, MPI_MAX,
936 mg->caller_comm);
937 for (ii = 0; ii < 3; ii++) {
938 lv_info->imbalance[ii][0] = ( max_sizes[ii]
939 / (tot_sizes[ii]*1.0/n_ranks)) - 1.0;
940 if (lv_info->imbalance[ii][1] > lv_info->imbalance[ii][0])
941 lv_info->imbalance[ii][1] = lv_info->imbalance[ii][0];
942 else if (lv_info->imbalance[ii][2] < lv_info->imbalance[ii][0])
943 lv_info->imbalance[ii][2] = lv_info->imbalance[ii][0];
944 lv_info->imbalance[ii][3] += lv_info->imbalance[ii][0];
945 }
946 }
947
948 #endif /* defined(HAVE_MPI) */
949
950 if (lv_info->n_calls[0] == 0) {
951 lv_info->n_ranks[1] = n_ranks;
952 lv_info->n_g_rows[1] = n_g_rows;
953 for (ii = 0; ii < 3; ii++) {
954 lv_info->n_elts[ii][1] = lv_info->n_elts[ii][0];
955 #if defined(HAVE_MPI)
956 lv_info->imbalance[ii][1] = lv_info->imbalance[ii][0];
957 #endif
958 }
959 }
960
961 lv_info->n_calls[0] += 1;
962 }
963
964 /* Ready for next level */
965
966 mgd->n_levels += 1;
967 }
968
969 /*----------------------------------------------------------------------------
970 * Add postprocessing info to multigrid hierarchy
971 *
972 * parameters:
973 * mg <-> multigrid structure
974 * name <-- postprocessing name
975 * post_location <-- where to postprocess (cells or vertices)
976 * n_base_cells <-- number of cells in base grid
977 *----------------------------------------------------------------------------*/
978
979 static void
_multigrid_add_post(cs_multigrid_t * mg,const char * name,int post_location,cs_lnum_t n_base_rows)980 _multigrid_add_post(cs_multigrid_t *mg,
981 const char *name,
982 int post_location,
983 cs_lnum_t n_base_rows)
984 {
985 cs_multigrid_setup_data_t *mgd = mg->setup_data;
986
987 int ii;
988
989 assert(mg != NULL);
990
991 if (mg->post_row_max < 1)
992 return;
993
994 mg->post_location = post_location;
995 mg->n_levels_post = mgd->n_levels - 1;
996
997 BFT_REALLOC(mg->post_name, strlen(name) + 1, char);
998 strcpy(mg->post_name, name);
999
1000 assert(mg->n_levels_post <= mg->n_levels_max);
1001
1002 /* Reallocate arrays if necessary */
1003
1004 if (mg->post_row_num == NULL) {
1005 BFT_MALLOC(mg->post_row_num, mg->n_levels_max, int *);
1006 for (ii = 0; ii < mg->n_levels_max; ii++)
1007 mg->post_row_num[ii] = NULL;
1008 }
1009
1010 if (mg->post_row_rank == NULL && mg->merge_stride > 1) {
1011 BFT_MALLOC(mg->post_row_rank, mg->n_levels_max, int *);
1012 for (ii = 0; ii < mg->n_levels_max; ii++)
1013 mg->post_row_rank[ii] = NULL;
1014 }
1015
1016 for (ii = 0; ii < mg->n_levels_post; ii++) {
1017 BFT_REALLOC(mg->post_row_num[ii], n_base_rows, int);
1018 cs_grid_project_row_num(mgd->grid_hierarchy[ii+1],
1019 n_base_rows,
1020 mg->post_row_max,
1021 mg->post_row_num[ii]);
1022 }
1023
1024 if (mg->post_row_rank != NULL) {
1025 for (ii = 0; ii < mg->n_levels_post; ii++) {
1026 BFT_REALLOC(mg->post_row_rank[ii], n_base_rows, int);
1027 cs_grid_project_row_rank(mgd->grid_hierarchy[ii+1],
1028 n_base_rows,
1029 mg->post_row_rank[ii]);
1030 }
1031 }
1032 }
1033
1034 /*----------------------------------------------------------------------------
1035 * Post process variables associated with Multigrid hierarchy
1036 *
1037 * parameters:
1038 * mgh <-- multigrid hierarchy
1039 * ts <-- time step status structure
1040 *----------------------------------------------------------------------------*/
1041
1042 static void
_cs_multigrid_post_function(void * mgh,const cs_time_step_t * ts)1043 _cs_multigrid_post_function(void *mgh,
1044 const cs_time_step_t *ts)
1045 {
1046 CS_UNUSED(ts);
1047
1048 int ii;
1049 size_t name_len;
1050 char *var_name = NULL;
1051 cs_multigrid_t *mg = mgh;
1052 const char *base_name = NULL;
1053
1054 /* Return if necessary structures inconsistent or have been destroyed */
1055
1056 if (mg == NULL)
1057 return;
1058
1059 if (mg->post_row_num == NULL || cs_post_mesh_exists(-1) != true)
1060 return;
1061
1062 int *s_num = NULL;
1063 const cs_range_set_t *rs = NULL;
1064 if (mg->post_location == CS_MESH_LOCATION_VERTICES) {
1065 BFT_MALLOC(s_num, cs_glob_mesh->n_vertices, int);
1066 rs = cs_glob_mesh->vtx_range_set;
1067 }
1068
1069 /* Allocate name buffer */
1070
1071 base_name = mg->post_name;
1072 name_len = 3 + strlen(base_name) + 1 + 3 + 1 + 4 + 1;
1073 BFT_MALLOC(var_name, name_len, char);
1074
1075 /* Loop on grid levels */
1076
1077 for (ii = 0; ii < mg->n_levels_post; ii++) {
1078
1079 sprintf(var_name, "mg %s %2d", base_name, (ii+1));
1080
1081 if (mg->post_location == CS_MESH_LOCATION_CELLS)
1082 cs_post_write_var(CS_POST_MESH_VOLUME,
1083 CS_POST_WRITER_ALL_ASSOCIATED,
1084 var_name,
1085 1,
1086 false,
1087 true,
1088 CS_POST_TYPE_int,
1089 mg->post_row_num[ii],
1090 NULL,
1091 NULL,
1092 cs_glob_time_step);
1093
1094 else if (mg->post_location == CS_MESH_LOCATION_VERTICES) {
1095 cs_range_set_scatter(rs, CS_INT_TYPE, 1, mg->post_row_num[ii], s_num);
1096 cs_post_write_vertex_var(CS_POST_MESH_VOLUME,
1097 CS_POST_WRITER_ALL_ASSOCIATED,
1098 var_name,
1099 1,
1100 false,
1101 true,
1102 CS_POST_TYPE_int,
1103 s_num,
1104 cs_glob_time_step);
1105 }
1106
1107 else
1108 bft_error(__FILE__, __LINE__, 0,
1109 "%s: Invalid location for post-processing.\n", __func__);
1110
1111 BFT_FREE(mg->post_row_num[ii]);
1112
1113 if (mg->post_row_rank != NULL) {
1114
1115 sprintf(var_name, "rk %s %2d",
1116 base_name, (ii+1));
1117
1118 if (mg->post_location == CS_MESH_LOCATION_CELLS)
1119 cs_post_write_var(CS_POST_MESH_VOLUME,
1120 CS_POST_WRITER_ALL_ASSOCIATED,
1121 var_name,
1122 1,
1123 false,
1124 true,
1125 CS_POST_TYPE_int,
1126 mg->post_row_rank[ii],
1127 NULL,
1128 NULL,
1129 cs_glob_time_step);
1130 else if (mg->post_location == CS_MESH_LOCATION_VERTICES) {
1131 cs_range_set_scatter(rs, CS_INT_TYPE, 1, mg->post_row_rank[ii], s_num);
1132 cs_post_write_vertex_var(CS_POST_MESH_VOLUME,
1133 CS_POST_WRITER_ALL_ASSOCIATED,
1134 var_name,
1135 1,
1136 false,
1137 true,
1138 CS_POST_TYPE_int,
1139 s_num,
1140 cs_glob_time_step);
1141 }
1142
1143 BFT_FREE(mg->post_row_rank[ii]);
1144
1145 }
1146
1147 }
1148 mg->n_levels_post = 0;
1149
1150 BFT_FREE(s_num);
1151 BFT_FREE(var_name);
1152 }
1153
1154 /*----------------------------------------------------------------------------
1155 * Function returning the type name of polynomial preconditioner context.
1156 *
1157 * parameters:
1158 * context <-- pointer to preconditioner context
1159 * logging <-- if true, logging description; if false, canonical name
1160 *----------------------------------------------------------------------------*/
1161
1162 static const char *
_multigrid_pc_get_type(const void * context,bool logging)1163 _multigrid_pc_get_type(const void *context,
1164 bool logging)
1165 {
1166 CS_UNUSED(context);
1167
1168 if (logging == false) {
1169 static const char t[] = "multigrid";
1170 return t;
1171 }
1172 else {
1173 static const char t[] = N_("Multigrid");
1174 return _(t);
1175 }
1176 }
1177
1178 /*----------------------------------------------------------------------------
1179 * Function for setup of a multigrid preconditioner.
1180 *
1181 * parameters:
1182 * context <-> pointer to preconditioner context
1183 * name <-- pointer to name of associated linear system
1184 * a <-- matrix
1185 * verbosity <-- associated verbosity
1186 *----------------------------------------------------------------------------*/
1187
1188 static void
_multigrid_pc_setup(void * context,const char * name,const cs_matrix_t * a,int verbosity)1189 _multigrid_pc_setup(void *context,
1190 const char *name,
1191 const cs_matrix_t *a,
1192 int verbosity)
1193 {
1194 cs_multigrid_setup(context, name, a, verbosity);
1195
1196 cs_multigrid_t *mg = context;
1197 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1198
1199 BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char);
1200 strcpy(mgd->pc_name, name);
1201 }
1202
1203 /*----------------------------------------------------------------------------
1204 * Function for setup of a multigrid preconditioner.
1205 *
1206 * parameters:
1207 * context <-> pointer to preconditioner context
1208 * name <-- pointer to name of associated linear system
1209 * a <-- matrix
1210 * verbosity <-- associated verbosity
1211 *----------------------------------------------------------------------------*/
1212
1213 static void
_multigrid_pc_setup_k_sub(void * context,const char * name,const cs_matrix_t * a,int verbosity)1214 _multigrid_pc_setup_k_sub(void *context,
1215 const char *name,
1216 const cs_matrix_t *a,
1217 int verbosity)
1218 {
1219 cs_multigrid_t *mg = context;
1220 cs_multigrid_t *parent = mg->p_mg;
1221 cs_multigrid_setup_data_t *p_mgd = parent->setup_data;
1222
1223 int n_ranks = 1;
1224 #if defined(HAVE_MPI)
1225 {
1226 int p_lv = p_mgd->n_levels-1;
1227 MPI_Comm lv_comm = cs_grid_get_comm(p_mgd->grid_hierarchy[p_lv]);
1228 if (lv_comm != MPI_COMM_NULL)
1229 MPI_Comm_size(lv_comm, &n_ranks);
1230 }
1231 #endif
1232
1233 _setup_k_cycle_hpc_sub(context, name, a, n_ranks, verbosity);
1234
1235 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1236
1237 BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char);
1238 strcpy(mgd->pc_name, name);
1239 }
1240
1241 /*----------------------------------------------------------------------------
1242 * Function for setup of a multigrid (local smoother)
1243 *
1244 * parameters:
1245 * context <-> pointer to preconditioner context
1246 * name <-- pointer to name of associated linear system
1247 * a <-- matrix
1248 * verbosity <-- associated verbosity
1249 *----------------------------------------------------------------------------*/
1250
1251 static void
_multigrid_setup_k_local_smoothe(void * context,const char * name,const cs_matrix_t * a,int verbosity)1252 _multigrid_setup_k_local_smoothe(void *context,
1253 const char *name,
1254 const cs_matrix_t *a,
1255 int verbosity)
1256 {
1257 _setup_k_cycle_hpc_sub(context, name, a, 1, verbosity);
1258
1259 cs_multigrid_t *mg = context;
1260 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1261
1262 BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char);
1263 strcpy(mgd->pc_name, name);
1264 }
1265
1266 /*----------------------------------------------------------------------------
1267 * Function or setting of the required tolerance for multigrid as
1268 * as a preconditioner.
1269 *
1270 * The preconditioner is considered to have converged when
1271 * residue/r_norm <= precision, residue being the L2 norm of a.vx-rhs.
1272 *
1273 * parameters:
1274 * context <-> pointer to multigrid context
1275 * precision <-- preconditioner precision
1276 * r_norm <-- residue normalization
1277 *----------------------------------------------------------------------------*/
1278
1279 static void
_multigrid_pc_tolerance_t(void * context,double precision,double r_norm)1280 _multigrid_pc_tolerance_t(void *context,
1281 double precision,
1282 double r_norm)
1283 {
1284 cs_multigrid_t *mg = context;
1285
1286 mg->pc_precision = precision;
1287 mg->pc_r_norm = r_norm;
1288 }
1289
1290 /*----------------------------------------------------------------------------
1291 * Function for application of a Multigrid preconditioner.
1292 *
1293 * In cases where it is desired that the preconditioner modify a vector
1294 * "in place", x_in should be set to NULL, and x_out contain the vector to
1295 * be modified (\f$x_{out} \leftarrow M^{-1}x_{out})\f$).
1296 *
1297 * parameters:
1298 * context <-> pointer to preconditioner context
1299 * x_in <-- input vector
1300 * x_out <-> input/output vector
1301 *
1302 * returns:
1303 * preconditioner application status
1304 *----------------------------------------------------------------------------*/
1305
1306 static cs_sles_pc_state_t
_multigrid_pc_apply(void * context,const cs_real_t * x_in,cs_real_t * x_out)1307 _multigrid_pc_apply(void *context,
1308 const cs_real_t *x_in,
1309 cs_real_t *x_out)
1310 {
1311 int n_iter;
1312 double residue;
1313
1314 cs_multigrid_t *mg = context;
1315 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1316
1317 const cs_matrix_t *a = cs_grid_get_matrix(mgd->grid_hierarchy[0]);
1318
1319 const cs_real_t *rhs = x_in;
1320
1321 const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(a);
1322 const cs_lnum_t n_rows = cs_matrix_get_n_rows(a) * db_size[1];
1323
1324 /* If preconditioner is "in-place", use additional buffer */
1325
1326 if (x_in == NULL) {
1327 if (mgd->pc_aux == NULL) {
1328 const cs_lnum_t n_cols = cs_matrix_get_n_columns(a) * db_size[1];
1329 BFT_MALLOC(mgd->pc_aux, n_cols, cs_real_t);
1330 }
1331 cs_real_t *restrict _rhs = mgd->pc_aux;
1332 # pragma omp parallel for if(n_rows > CS_THR_MIN)
1333 for (cs_lnum_t ii = 0; ii < n_rows; ii++)
1334 _rhs[ii] = x_out[ii];
1335 rhs = _rhs;
1336 }
1337
1338 # pragma omp parallel for if(n_rows > CS_THR_MIN)
1339 for (cs_lnum_t ii = 0; ii < n_rows; ii++)
1340 x_out[ii] = 0.;
1341
1342 cs_sles_convergence_state_t cvg = cs_multigrid_solve(context,
1343 mgd->pc_name,
1344 a,
1345 mgd->pc_verbosity,
1346 mg->pc_precision,
1347 mg->pc_r_norm,
1348 &n_iter,
1349 &residue,
1350 rhs,
1351 x_out,
1352 0,
1353 NULL);
1354
1355 cs_sles_pc_state_t state;
1356
1357 switch(cvg) {
1358 case CS_SLES_DIVERGED:
1359 state = CS_SLES_PC_DIVERGED;
1360 break;
1361 case CS_SLES_BREAKDOWN:
1362 state = CS_SLES_PC_BREAKDOWN;
1363 break;
1364 case CS_SLES_CONVERGED:
1365 state = CS_SLES_PC_CONVERGED;
1366 break;
1367 default:
1368 state = CS_SLES_PC_MAX_ITERATION;
1369 }
1370
1371 return state;
1372 }
1373
1374 /*----------------------------------------------------------------------------
1375 * Create a Preconditioner structure using multigrid.
1376 *
1377 * parameters:
1378 * mg_type <-- type of multigrid algorithm to use
1379 *
1380 * returns:
1381 * pointer to newly created preconditioner object.
1382 *----------------------------------------------------------------------------*/
1383
1384 static cs_multigrid_t *
_multigrid_pc_create(cs_multigrid_type_t mg_type)1385 _multigrid_pc_create(cs_multigrid_type_t mg_type)
1386 {
1387 cs_multigrid_t *mg = cs_multigrid_create(mg_type);
1388
1389 switch(mg_type) {
1390 case CS_MULTIGRID_V_CYCLE:
1391 cs_multigrid_set_solver_options
1392 (mg,
1393 CS_SLES_P_SYM_GAUSS_SEIDEL, /* descent smoothe */
1394 CS_SLES_P_SYM_GAUSS_SEIDEL, /* ascent smoothe */
1395 CS_SLES_PCG, /* coarse smoothe */
1396 1, /* n_max_cycles */
1397 1, /* n_max_iter_descent, */
1398 1, /* n_max_iter_ascent */
1399 500, /* n_max_iter_coarse */
1400 0, 0, -1, /* precond poly_degree */
1401 -1, -1, 1); /* precision_multiplier */
1402 break;
1403
1404 case CS_MULTIGRID_K_CYCLE:
1405 cs_multigrid_set_solver_options
1406 (mg,
1407 CS_SLES_TS_F_GAUSS_SEIDEL,
1408 CS_SLES_TS_B_GAUSS_SEIDEL,
1409 CS_SLES_PCG, /* coarse smoothe */
1410 1, /* n max cycles */
1411 1, /* n max iter for descent */
1412 1, /* n max iter for ascent */
1413 500, /* n max iter for coarse solve */
1414 0, 0, 0, /* precond degree */
1415 -1, -1, 10); /* precision multiplier */
1416 break;
1417
1418 case CS_MULTIGRID_K_CYCLE_HPC:
1419 cs_multigrid_set_solver_options
1420 (mg,
1421 CS_SLES_TS_F_GAUSS_SEIDEL,
1422 CS_SLES_TS_B_GAUSS_SEIDEL,
1423 CS_SLES_FCG, /* coarse smoothe */
1424 1, /* n max cycles */
1425 1, /* n max iter for descent */
1426 1, /* n max iter for ascent */
1427 500, /* n max iter for coarse solve */
1428 0, 0, 0, /* precond degree */
1429 -1, -1, 1); /* precision multiplier */
1430 break;
1431
1432 default:
1433 assert(0);
1434 }
1435
1436 return mg;
1437 }
1438
1439 /*----------------------------------------------------------------------------*/
1440 /*!
1441 * \brief Create a multigrid smoother using a local K cycle for coarse-level
1442 * solvers used by high-performance system variant of K-cycle.
1443 *
1444 * \param[in] parent parent-level multigrid, or NULL
1445
1446 * \return pointer to newly created mutigrid object.
1447 */
1448 /*----------------------------------------------------------------------------*/
1449
1450 static cs_multigrid_t *
_multigrid_create_k_cycle_bottom_smoother(cs_multigrid_t * parent)1451 _multigrid_create_k_cycle_bottom_smoother(cs_multigrid_t *parent)
1452 {
1453 cs_multigrid_t *mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE);
1454
1455 mg->subtype = CS_MULTIGRID_BOTTOM_SMOOTHE;
1456 mg->info.is_pc = true;
1457 mg->aggregation_limit = 8;
1458 mg->k_cycle_threshold = -1;
1459
1460 #if defined(HAVE_MPI)
1461 mg->comm = MPI_COMM_NULL;
1462 if (parent != NULL)
1463 mg->caller_comm = parent->comm;
1464 else
1465 mg->caller_comm = cs_glob_mpi_comm;
1466 #endif
1467
1468 return mg;
1469 }
1470
1471 /*----------------------------------------------------------------------------*/
1472 /*!
1473 * \brief Destructor for preconditioner using persistent multigrid context.
1474 *
1475 * Using this destructor, the multigrid context is only freed, not destroyed.
1476 *
1477 * \param[in, out] context pointer to multigrid linear solver info
1478 * (actual type: cs_multigrid_t **)
1479 */
1480 /*----------------------------------------------------------------------------*/
1481
1482 static void
_pc_from_mg_destroy(void ** context)1483 _pc_from_mg_destroy(void **context)
1484 {
1485 cs_multigrid_t *mg = (cs_multigrid_t *)(*context);
1486
1487 if (mg != NULL)
1488 cs_multigrid_free(mg);
1489 }
1490
1491 /*----------------------------------------------------------------------------*/
1492 /*!
1493 * \brief Create a multigrid preconditioner based onusing a K cycle for coarse-level
1494 * solvers used by high-performance system variant of K-cycle.
1495 *
1496 * \param[in] mg associated coarse level multigrid structure
1497 *
1498 * \return pointer to newly created preconditioner object.
1499 */
1500 /*----------------------------------------------------------------------------*/
1501
1502 static cs_sles_pc_t *
_pc_create_from_mg_sub(cs_multigrid_t * mg)1503 _pc_create_from_mg_sub(cs_multigrid_t *mg)
1504 {
1505 cs_sles_pc_t *pc = cs_sles_pc_define(mg,
1506 _multigrid_pc_get_type,
1507 _multigrid_pc_setup_k_sub,
1508 _multigrid_pc_tolerance_t,
1509 _multigrid_pc_apply,
1510 cs_multigrid_free,
1511 cs_multigrid_log,
1512 cs_multigrid_copy,
1513 _pc_from_mg_destroy);
1514
1515 return pc;
1516 }
1517
1518 /*----------------------------------------------------------------------------*/
1519 /*!
1520 * \brief Create recursive high-performance system variant of K-cycle
1521 * for coarse-solver bottom-level.
1522 *
1523 * Returns NULL if the number of associated ranks is small enough that
1524 * no multigrid solver is required.
1525 *
1526 * \param[in] parent parent-level multigrid, or NULL
1527
1528 * \return pointer to newly created preconditioner object.
1529 */
1530 /*----------------------------------------------------------------------------*/
1531
1532 static cs_multigrid_t *
_multigrid_create_k_cycle_bottom_coarsest(cs_multigrid_t * parent)1533 _multigrid_create_k_cycle_bottom_coarsest(cs_multigrid_t *parent)
1534 {
1535 cs_multigrid_t *mg = NULL;
1536
1537 #if defined(HAVE_MPI)
1538 if (parent != NULL) {
1539 int size = 1;
1540 if (parent->comm != MPI_COMM_NULL)
1541 MPI_Comm_size(parent->comm, &size);
1542
1543 if (size >= _k_cycle_hpc_merge_stride) {
1544 if (size > _k_cycle_hpc_merge_stride)
1545 mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE_HPC); /* recursive */
1546 else {
1547 if (size > _k_cycle_hpc_recurse_threshold) {
1548 mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE);
1549 mg->aggregation_limit = 8;
1550 mg->k_cycle_threshold = -1;
1551 }
1552 }
1553 if (mg != NULL) {
1554 mg->caller_comm = parent->comm;
1555 mg->comm = cs_grid_get_comm_merge(parent->comm,
1556 _k_cycle_hpc_merge_stride);
1557 mg->subtype = CS_MULTIGRID_COARSE;
1558 }
1559 }
1560
1561 }
1562 #endif /* defined(HAVE_MPI) */
1563
1564 return mg;
1565 }
1566
1567 /*----------------------------------------------------------------------------*/
1568 /*!
1569 * \brief Create a 2 level coarse-level solver for use
1570 * by high-performance system variant of K-cycle.
1571 *
1572 * \param[in] parent parent-level multigrid, or NULL
1573
1574 * \return pointer to newly created preconditioner object.
1575 */
1576 /*----------------------------------------------------------------------------*/
1577
1578 static cs_multigrid_t *
_multigrid_create_k_cycle_bottom(cs_multigrid_t * parent)1579 _multigrid_create_k_cycle_bottom(cs_multigrid_t *parent)
1580 {
1581 cs_multigrid_t *mg = cs_multigrid_create(CS_MULTIGRID_V_CYCLE);
1582
1583 mg->subtype = CS_MULTIGRID_BOTTOM;
1584 mg->p_mg = parent;
1585
1586 mg->aggregation_limit = 0;
1587 mg->coarsening_type = CS_GRID_COARSENING_DEFAULT; /* Not used here */
1588 mg->n_levels_max = 2;
1589 mg->n_g_rows_min = 1;
1590
1591 mg->pc_precision = 0.0;
1592 mg->pc_r_norm = 0.0;
1593
1594 mg->n_levels_post = 0;
1595 mg->setup_data = NULL;
1596
1597 BFT_REALLOC(mg->lv_info, mg->n_levels_max, cs_multigrid_level_info_t);
1598
1599 for (int ii = 0; ii < mg->n_levels_max; ii++)
1600 _multigrid_level_info_init(mg->lv_info + ii);
1601
1602 mg->caller_n_ranks = cs_glob_n_ranks;
1603
1604 #if defined(HAVE_MPI)
1605 mg->merge_mean_threshold = 0;
1606 mg->merge_glob_threshold = 1;
1607 mg->merge_stride = _k_cycle_hpc_merge_stride;
1608 #endif
1609
1610 #if defined(HAVE_MPI)
1611 if (parent != NULL) {
1612 mg->comm = parent->comm;
1613 mg->caller_comm = parent->comm;
1614 }
1615 #endif /* defined(HAVE_MPI) */
1616
1617 /* local k-cycle for level 0 smoother,
1618 k-cycle preconditioning for level 1 solver (such as FCG) */
1619
1620 mg->lv_mg[0] = _multigrid_create_k_cycle_bottom_smoother(mg);
1621 mg->lv_mg[2] = _multigrid_create_k_cycle_bottom_coarsest(mg);
1622
1623 mg->lv_mg[0]->p_mg = parent;
1624 if (mg->lv_mg[2] != NULL)
1625 mg->lv_mg[2]->p_mg = mg;
1626
1627 int pc_degree_bottom = (mg->lv_mg[2] != NULL) ? -1 : 1;
1628
1629 cs_multigrid_set_solver_options
1630 (mg,
1631 CS_SLES_N_SMOOTHER_TYPES, CS_SLES_N_SMOOTHER_TYPES, CS_SLES_FCG,
1632 1, /* n max cycles */
1633 1, /* n max iter for descent */
1634 1, /* n max iter for ascent */
1635 500, /* n max iter coarse */
1636 -1, -1, pc_degree_bottom, /* precond degree */
1637 1, 1, 1); /* precision multiplier */
1638
1639 return mg;
1640 }
1641
1642 /*----------------------------------------------------------------------------
1643 * Allocate working array for coarse right hand sides and corrections.
1644 *
1645 * parameters:
1646 * mg <-> pointer to multigrid solver info and context
1647 * stride <-- matrix fill stride
1648 *----------------------------------------------------------------------------*/
1649
1650 static void
_multigrid_setup_sles_work_arrays(cs_multigrid_t * mg,cs_lnum_t stride)1651 _multigrid_setup_sles_work_arrays(cs_multigrid_t *mg,
1652 cs_lnum_t stride)
1653 {
1654 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1655
1656 unsigned n0 = 0, n1 = 2;
1657 if ( mg->type >= CS_MULTIGRID_K_CYCLE
1658 && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) {
1659 n0 = 4;
1660 n1 = n0 + 6;
1661 }
1662
1663 BFT_MALLOC(mgd->rhs_vx, mgd->n_levels*n1, cs_real_t *);
1664
1665 for (unsigned i = 0; i < n1; i++)
1666 mgd->rhs_vx[i] = NULL;
1667
1668 if (mgd->n_levels > 1) {
1669
1670 size_t wr_size0 = 0, wr_size1 = 0;
1671 for (unsigned i = 0; i < mgd->n_levels; i++) {
1672 size_t block_size
1673 = cs_grid_get_n_cols_max(mgd->grid_hierarchy[i])*stride;
1674 block_size = CS_SIMD_SIZE(block_size);
1675 if (i == 0)
1676 wr_size0 += block_size;
1677 else
1678 wr_size1 += block_size;
1679 }
1680
1681 BFT_MALLOC(mgd->rhs_vx_buf, wr_size0*n0 + wr_size1*n1, cs_real_t);
1682
1683 size_t block_size_shift = 0;
1684
1685 for (unsigned i = 0; i < mgd->n_levels; i++) {
1686 size_t block_size
1687 = cs_grid_get_n_cols_max(mgd->grid_hierarchy[i])*stride;
1688 cs_lnum_t n = (i == 0) ? n0 : n1;
1689 for (int j = 0; j < n; j++) {
1690 mgd->rhs_vx[i*n1 + j] = mgd->rhs_vx_buf+ block_size_shift;
1691 block_size_shift += block_size;
1692 }
1693 }
1694
1695 }
1696 }
1697
1698 /*----------------------------------------------------------------------------
1699 * Setup multigrid sparse linear equation solvers on existing hierarchy.
1700 *
1701 * parameters:
1702 * mg <-> pointer to multigrid solver info and context
1703 * name <-- linear system name
1704 * verbosity <-- associated verbosity
1705 *----------------------------------------------------------------------------*/
1706
1707 static void
_multigrid_setup_sles_k_cycle_bottom(cs_multigrid_t * mg,const char * name,int verbosity)1708 _multigrid_setup_sles_k_cycle_bottom(cs_multigrid_t *mg,
1709 const char *name,
1710 int verbosity)
1711 {
1712 cs_timer_t t0, t1;
1713
1714 cs_multigrid_level_info_t *mg_lv_info;
1715 const cs_grid_t *g;
1716 const cs_matrix_t *m;
1717
1718 size_t l = strlen(name) + 32;
1719 char *_name;
1720 BFT_MALLOC(_name, l, char);
1721
1722 /* Initialization */
1723
1724 t0 = cs_timer_time();
1725
1726 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1727
1728 cs_lnum_t stride = 1; /* For diagonal blocks */
1729
1730 /* Prepare solver context */
1731
1732 assert(mgd->n_levels == 2);
1733
1734 unsigned i = 0;
1735
1736 assert(mg->subtype == CS_MULTIGRID_BOTTOM);
1737 assert(mg->type == CS_MULTIGRID_V_CYCLE);
1738
1739 g = mgd->grid_hierarchy[i];
1740 m = cs_grid_get_matrix(g);
1741
1742 mg_lv_info = mg->lv_info + i;
1743
1744 t1 = cs_timer_time();
1745 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1746
1747 /* Intermediate grids */
1748
1749 t0 = t1;
1750
1751 g = mgd->grid_hierarchy[0];
1752 m = cs_grid_get_matrix(g);
1753
1754 mg_lv_info = mg->lv_info;
1755
1756 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[0]);
1757
1758 if (mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) {
1759 mg_sles->context
1760 = cs_multigrid_smoother_create(mg->info.type[0],
1761 mg->info.poly_degree[0],
1762 mg->info.n_max_iter[0]);
1763 mg_sles->setup_func = cs_multigrid_smoother_setup;
1764 mg_sles->solve_func = cs_multigrid_smoother_solve;
1765 mg_sles->destroy_func = cs_sles_it_destroy;
1766 }
1767 else if (i == 0 && mg->lv_mg[0] != NULL) {
1768 assert(mg_sles->context == NULL);
1769 mg_sles->context = mg->lv_mg[0];
1770 mg_sles->setup_func = _multigrid_setup_k_local_smoothe;
1771 mg_sles->solve_func = cs_multigrid_solve;
1772 mg_sles->destroy_func = NULL;
1773 }
1774
1775 snprintf(_name, l-1, "%s:smoother:%d", name, i);
1776 _name[l-1] = '\0';
1777
1778 mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
1779 #if defined(HAVE_MPI)
1780 if (mg_sles->solve_func == cs_sles_it_solve) {
1781 cs_sles_it_t *context = mg_sles->context;
1782 cs_sles_it_set_mpi_reduce_comm(context,
1783 MPI_COMM_NULL,
1784 MPI_COMM_NULL);
1785 }
1786 #endif
1787
1788 t1 = cs_timer_time();
1789 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1790
1791 /* Coarsest grid */
1792
1793 t0 = t1;
1794
1795 g = mgd->grid_hierarchy[1];
1796 m = cs_grid_get_matrix(g);
1797
1798 mg_lv_info = mg->lv_info + 1;
1799
1800 mg_sles = &(mgd->sles_hierarchy[2]);
1801 mg_sles->context
1802 = cs_sles_it_create(mg->info.type[2],
1803 mg->info.poly_degree[2],
1804 mg->info.n_max_iter[2],
1805 false); /* stats not updated here */
1806 mg_sles->setup_func = cs_sles_it_setup;
1807 mg_sles->solve_func = cs_sles_it_solve;
1808 mg_sles->destroy_func = cs_sles_it_destroy;
1809
1810 if (mg->lv_mg[2] != NULL) {
1811 cs_sles_pc_t *pc = _pc_create_from_mg_sub(mg->lv_mg[2]);
1812 cs_sles_it_transfer_pc(mg_sles->context, &pc);
1813 }
1814
1815 #if defined(HAVE_MPI)
1816 {
1817 cs_sles_it_t *context = mg_sles->context;
1818 cs_sles_it_set_mpi_reduce_comm(context,
1819 cs_grid_get_comm(mgd->grid_hierarchy[1]),
1820 mg->comm);
1821 }
1822 #endif
1823
1824 snprintf(_name, l-1, "%s:coarse:%d", name, i);
1825 _name[l-1] = '\0';
1826
1827 mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
1828
1829 /* Diagonal block size is the same for all levels */
1830
1831 const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(m);
1832 stride = db_size[1];
1833
1834 _multigrid_setup_sles_work_arrays(mg, stride);
1835
1836 BFT_FREE(_name);
1837
1838 /* Timing */
1839
1840 t1 = cs_timer_time();
1841 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1842 }
1843
1844 /*----------------------------------------------------------------------------
1845 * Setup multigrid sparse linear equation solvers on existing hierarchy.
1846 *
1847 * parameters:
1848 * mg <-> pointer to multigrid solver info and context
1849 * name <-- linear system name
1850 * verbosity <-- associated verbosity
1851 *----------------------------------------------------------------------------*/
1852
1853 static void
_multigrid_setup_sles(cs_multigrid_t * mg,const char * name,int verbosity)1854 _multigrid_setup_sles(cs_multigrid_t *mg,
1855 const char *name,
1856 int verbosity)
1857 {
1858 cs_timer_t t0, t1;
1859
1860 cs_multigrid_level_info_t *mg_lv_info;
1861 const cs_grid_t *g;
1862 const cs_matrix_t *m;
1863
1864 size_t l = strlen(name) + 32;
1865 char *_name;
1866 BFT_MALLOC(_name, l, char);
1867
1868 /* Initialization */
1869
1870 t0 = cs_timer_time();
1871
1872 cs_multigrid_setup_data_t *mgd = mg->setup_data;
1873
1874 cs_lnum_t stride = 1; /* For diagonal blocks */
1875
1876 /* Prepare solver context */
1877
1878 unsigned n_levels = mgd->n_levels;
1879
1880 unsigned i = 0;
1881
1882 g = mgd->grid_hierarchy[i];
1883 m = cs_grid_get_matrix(g);
1884
1885 mg_lv_info = mg->lv_info + i;
1886
1887 t1 = cs_timer_time();
1888 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1889
1890 /* Intermediate grids */
1891
1892 for (i = 0; i < n_levels - 1; i++) {
1893
1894 t0 = t1;
1895
1896 g = mgd->grid_hierarchy[i];
1897 m = cs_grid_get_matrix(g);
1898
1899 mg_lv_info = mg->lv_info + i;
1900
1901 int n_ops = 2;
1902 if (i == 0) {
1903 if (mg->type == CS_MULTIGRID_V_CYCLE)
1904 n_ops = 1;
1905 }
1906
1907 for (int j = 0; j < n_ops; j++) {
1908 if ( mg->info.type[j] != CS_SLES_N_IT_TYPES
1909 && mg->info.type[j] < CS_SLES_N_SMOOTHER_TYPES) {
1910 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2 + j]);
1911 mg_sles->context
1912 = cs_multigrid_smoother_create(mg->info.type[j],
1913 mg->info.poly_degree[j],
1914 mg->info.n_max_iter[j]);
1915 mg_sles->setup_func = cs_multigrid_smoother_setup;
1916 mg_sles->solve_func = cs_multigrid_smoother_solve;
1917 mg_sles->destroy_func = cs_sles_it_destroy;
1918
1919 /* Share context between descent and ascent smoothers if both
1920 are of the cs_sles_it type */
1921 if (j == 1) {
1922 if ( mg->info.type[0] != CS_SLES_N_IT_TYPES
1923 && mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES)
1924 cs_sles_it_set_shareable(mgd->sles_hierarchy[i*2 + 1].context,
1925 mgd->sles_hierarchy[i*2].context);
1926 }
1927 }
1928 }
1929
1930 if (i == 0 && mg->lv_mg[0] != NULL) {
1931 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[0]);
1932 assert(mg->subtype == CS_MULTIGRID_BOTTOM);
1933 assert(mg->type == CS_MULTIGRID_V_CYCLE);
1934 assert(n_ops == 1);
1935 assert(mg_sles->context == NULL);
1936 mg_sles->context = mg->lv_mg[0];
1937 mg_sles->setup_func = _multigrid_setup_k_local_smoothe;
1938 mg_sles->solve_func = cs_multigrid_solve;
1939 mg_sles->destroy_func = NULL;
1940 }
1941
1942 for (int j = 0; j < n_ops; j++) {
1943 if (j == 0)
1944 snprintf(_name, l-1, "%s:descent:%d", name, i);
1945 else
1946 snprintf(_name, l-1, "%s:ascent:%d", name, i);
1947 _name[l-1] = '\0';
1948 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2 + j]);
1949 mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
1950 #if defined(HAVE_MPI)
1951 if (mg_sles->solve_func == cs_sles_it_solve) {
1952 cs_sles_it_t *context = mg_sles->context;
1953 MPI_Comm lv_comm = cs_grid_get_comm(mgd->grid_hierarchy[i]);
1954 cs_sles_it_set_mpi_reduce_comm(context,
1955 lv_comm,
1956 mg->comm);
1957 }
1958 #endif
1959 }
1960
1961 t1 = cs_timer_time();
1962 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
1963
1964 }
1965
1966 /* Coarsest grid */
1967
1968 if (n_levels > 1) {
1969
1970 t0 = t1;
1971
1972 i = n_levels - 1;
1973
1974 g = mgd->grid_hierarchy[i];
1975 m = cs_grid_get_matrix(g);
1976
1977 mg_lv_info = mg->lv_info + i;
1978
1979 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2]);
1980 mg_sles->context
1981 = cs_sles_it_create(mg->info.type[2],
1982 mg->info.poly_degree[2],
1983 mg->info.n_max_iter[2],
1984 false); /* stats not updated here */
1985 mg_sles->setup_func = cs_sles_it_setup;
1986 mg_sles->solve_func = cs_sles_it_solve;
1987 mg_sles->destroy_func = cs_sles_it_destroy;
1988
1989 if (mg->lv_mg[2] != NULL) {
1990 cs_sles_pc_t *pc = _pc_create_from_mg_sub(mg->lv_mg[2]);
1991 cs_sles_it_transfer_pc(mg_sles->context, &pc);
1992 }
1993
1994 #if defined(HAVE_MPI)
1995 {
1996 cs_sles_it_t *context = mg_sles->context;
1997 cs_sles_it_set_mpi_reduce_comm(context,
1998 cs_grid_get_comm(mgd->grid_hierarchy[i]),
1999 mg->comm);
2000 }
2001 #endif
2002
2003 snprintf(_name, l-1, "%s:coarse:%d", name, i);
2004 _name[l-1] = '\0';
2005
2006 mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2);
2007
2008 /* Diagonal block size is the same for all levels */
2009
2010 const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(m);
2011 stride = db_size[1];
2012
2013 }
2014
2015 _multigrid_setup_sles_work_arrays(mg, stride);
2016
2017 BFT_FREE(_name);
2018
2019 /* Timing */
2020
2021 t1 = cs_timer_time();
2022 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
2023 }
2024
2025 /*----------------------------------------------------------------------------*/
2026 /*!
2027 * \brief Setup multigrid sparse linear equation solver.
2028 *
2029 * \param[in, out] context pointer to multigrid solver info and context
2030 * (actual type: cs_multigrid_t *)
2031 * \param[in] name pointer to name of linear system
2032 * \param[in] mesh associated mesh (for visualization), or NULL
2033 * \param[in, out] f associated fine grid
2034 * \param[in] verbosity associated verbosity
2035 */
2036 /*----------------------------------------------------------------------------*/
2037
2038 static void
_setup_hierarchy(void * context,const char * name,const cs_mesh_t * mesh,cs_grid_t * f,int verbosity)2039 _setup_hierarchy(void *context,
2040 const char *name,
2041 const cs_mesh_t *mesh,
2042 cs_grid_t *f,
2043 int verbosity)
2044
2045 {
2046 cs_multigrid_t *mg = context;
2047
2048 cs_timer_t t0, t1, t2;
2049
2050 int n_coarse_ranks = -2, n_coarse_ranks_prev = -2; /* for comparison only */
2051 cs_lnum_t n_rows = 0, n_cols_ext = 0, n_entries = 0;
2052 cs_gnum_t n_g_rows = 0, n_g_rows_prev = 0;
2053
2054 cs_multigrid_level_info_t *mg_lv_info = NULL;
2055
2056 cs_grid_t *g = f;
2057
2058 t0 = cs_timer_time();
2059
2060 /* Initialization */
2061
2062 mg->setup_data = _multigrid_setup_data_create();
2063
2064 _multigrid_add_level(mg, f); /* Assign to hierarchy */
2065
2066 /* Add info */
2067
2068 cs_grid_get_info(f,
2069 NULL,
2070 NULL,
2071 NULL,
2072 NULL,
2073 NULL,
2074 &n_rows,
2075 &n_cols_ext,
2076 &n_entries,
2077 &n_g_rows);
2078
2079 mg_lv_info = mg->lv_info;
2080
2081 t1 = cs_timer_time();
2082 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
2083
2084 bool add_grid = true;
2085
2086 while (add_grid) {
2087
2088 n_g_rows_prev = n_g_rows;
2089 n_coarse_ranks_prev = n_coarse_ranks;
2090
2091 /* Recursion test */
2092
2093 if ((int)(mg->setup_data->n_levels) >= mg->n_levels_max)
2094 break;
2095
2096 /* Build coarser grid from previous grid */
2097
2098 if (verbosity > 2)
2099 bft_printf(_("\n building level %2u grid\n"), mg->setup_data->n_levels);
2100
2101 if (mg->subtype == CS_MULTIGRID_BOTTOM)
2102 g = cs_grid_coarsen_to_single(g, mg->merge_stride, verbosity);
2103
2104 else
2105 g = cs_grid_coarsen(g,
2106 mg->coarsening_type,
2107 mg->aggregation_limit,
2108 verbosity,
2109 mg->merge_stride,
2110 mg->merge_mean_threshold,
2111 mg->merge_glob_threshold,
2112 mg->p0p1_relax);
2113
2114 bool symmetric = true;
2115 int grid_lv;
2116
2117 cs_grid_get_info(g,
2118 &grid_lv,
2119 &symmetric,
2120 NULL,
2121 NULL,
2122 &n_coarse_ranks,
2123 &n_rows,
2124 &n_cols_ext,
2125 &n_entries,
2126 &n_g_rows);
2127
2128 #if defined(HAVE_MPI)
2129 if ((n_coarse_ranks != mg->caller_n_ranks) && (mg->caller_n_ranks > 1)) {
2130 cs_gnum_t _n_g_rows = n_g_rows;
2131 MPI_Allreduce(&_n_g_rows, &n_g_rows, 1, CS_MPI_GNUM, MPI_MAX, mg->caller_comm);
2132 }
2133 #endif
2134
2135 assert((unsigned)grid_lv == mg->setup_data->n_levels);
2136
2137 /* If too few rows were grouped, we stop at this level */
2138
2139 if (mg->setup_data->n_levels > 1) {
2140 if ( (n_g_rows < mg->n_g_rows_min)
2141 || ( n_g_rows > (0.8 * n_g_rows_prev)
2142 && n_coarse_ranks == n_coarse_ranks_prev)) {
2143 add_grid = false;
2144 cs_grid_destroy(&g); /* too coarse, not added */
2145 }
2146 }
2147
2148 if (add_grid) {
2149
2150 _multigrid_add_level(mg, g); /* Assign to hierarchy */
2151
2152 /* Print coarse mesh stats */
2153
2154 if (verbosity > 2) {
2155
2156 #if defined(HAVE_MPI)
2157
2158 if (mg->caller_n_ranks > 1) {
2159
2160 int lcount[2], gcount[2];
2161 int n_c_min, n_c_max, n_f_min, n_f_max;
2162
2163 lcount[0] = n_rows; lcount[1] = n_entries;
2164 MPI_Allreduce(lcount, gcount, 2, MPI_INT, MPI_MAX,
2165 mg->caller_comm);
2166 n_c_max = gcount[0]; n_f_max = gcount[1];
2167
2168 lcount[0] = n_rows; lcount[1] = n_entries;
2169 MPI_Allreduce(lcount, gcount, 2, MPI_INT, MPI_MIN,
2170 mg->caller_comm);
2171 n_c_min = gcount[0]; n_f_min = gcount[1];
2172
2173 bft_printf
2174 (_(" total min max\n"
2175 " number of rows: %12llu %10d %10d\n"
2176 " number of entries: %10d %10d\n"),
2177 (unsigned long long)n_g_rows, n_c_min, n_c_max, n_f_min, n_f_max);
2178 }
2179
2180 #endif
2181
2182 if (mg->caller_n_ranks == 1)
2183 bft_printf(_(" number of rows: %10d\n"
2184 " number of entries: %10d\n"),
2185 (int)n_rows, (int)n_entries);
2186
2187 }
2188
2189 mg_lv_info = mg->lv_info + grid_lv;
2190 mg_lv_info->n_ranks[0] = n_coarse_ranks;
2191 mg_lv_info->n_elts[0][0] = n_rows;
2192 mg_lv_info->n_elts[1][0] = n_cols_ext;
2193 mg_lv_info->n_elts[2][0] = n_entries;
2194
2195 } /* end adding grid */
2196
2197 t2 = cs_timer_time();
2198 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t1, &t2);
2199 t1 = t2;
2200
2201 }
2202
2203 /* Print final info */
2204
2205 if (verbosity > 1)
2206 bft_printf
2207 (_(" number of grid levels: %u\n"
2208 " number of rows in coarsest grid: %llu\n\n"),
2209 mg->setup_data->n_levels, (unsigned long long)n_g_rows);
2210
2211 /* Prepare preprocessing info if necessary */
2212
2213 if (mg->post_row_max > 0) {
2214 if (mg->info.n_calls[0] == 0) {
2215 int l_id = 0;
2216 const cs_matrix_t *a = cs_grid_get_matrix(f);
2217 const cs_lnum_t n_rows_a = cs_matrix_get_n_rows(a);
2218 if (n_rows_a == mesh->n_cells)
2219 l_id = CS_MESH_LOCATION_CELLS;
2220 else if (n_rows_a <= mesh->n_vertices) {
2221 l_id = CS_MESH_LOCATION_VERTICES;
2222 if (mesh->vtx_range_set != NULL) {
2223 if (mesh->vtx_range_set->n_elts[0] != n_rows_a)
2224 l_id = CS_MESH_LOCATION_NONE;
2225 }
2226 }
2227 #if defined(HAVE_MPI)
2228 if (mg->caller_n_ranks > 1) {
2229 int _l_id = l_id;
2230 MPI_Allreduce(&_l_id, &l_id, 1, MPI_INT, MPI_MAX, mg->caller_comm);
2231 if (l_id != _l_id)
2232 _l_id = CS_MESH_LOCATION_NONE;
2233 MPI_Allreduce(&_l_id, &l_id, 1, MPI_INT, MPI_MIN, mg->caller_comm);
2234 }
2235 #endif
2236 if (l_id != CS_MESH_LOCATION_NONE) {
2237 cs_post_add_time_dep_output(_cs_multigrid_post_function, (void *)mg);
2238 _multigrid_add_post(mg, name, l_id, n_rows_a);
2239 }
2240 }
2241 }
2242
2243 /* Update info */
2244
2245 #if defined(HAVE_MPI)
2246
2247 /* In parallel, get global (average) values from local values */
2248
2249 if (mg->caller_n_ranks > 1) {
2250
2251 int i, j;
2252 cs_gnum_t *_n_elts_l = NULL, *_n_elts_s = NULL, *_n_elts_m = NULL;
2253 int grid_lv = mg->setup_data->n_levels;
2254
2255 BFT_MALLOC(_n_elts_l, 3*grid_lv, cs_gnum_t);
2256 BFT_MALLOC(_n_elts_s, 3*grid_lv, cs_gnum_t);
2257 BFT_MALLOC(_n_elts_m, 3*grid_lv, cs_gnum_t);
2258
2259 for (i = 0; i < grid_lv; i++) {
2260 cs_multigrid_level_info_t *mg_inf = mg->lv_info + i;
2261 for (j = 0; j < 3; j++)
2262 _n_elts_l[i*3 + j] = mg_inf->n_elts[j][0];
2263 }
2264
2265 MPI_Allreduce(_n_elts_l, _n_elts_s, 3*grid_lv, CS_MPI_GNUM, MPI_SUM,
2266 mg->caller_comm);
2267 MPI_Allreduce(_n_elts_l, _n_elts_m, 3*grid_lv, CS_MPI_GNUM, MPI_MAX,
2268 mg->caller_comm);
2269
2270 for (i = 0; i < grid_lv; i++) {
2271 cs_multigrid_level_info_t *mg_inf = mg->lv_info + i;
2272 cs_gnum_t n_g_ranks = mg_inf->n_ranks[0];
2273 for (j = 0; j < 3; j++) {
2274 cs_gnum_t tmp_max = n_g_ranks * _n_elts_m[i*3+j];
2275 mg_inf->n_elts[j][0] = (_n_elts_s[i*3+j] + n_g_ranks/2) / n_g_ranks;
2276 mg_inf->imbalance[j][0] = (float)(tmp_max*1.0/_n_elts_s[i*3+j]);
2277 }
2278 }
2279
2280 BFT_FREE(_n_elts_m);
2281 BFT_FREE(_n_elts_s);
2282 BFT_FREE(_n_elts_l);
2283
2284 }
2285
2286 #endif
2287
2288 mg->info.n_levels_tot += mg->setup_data->n_levels;
2289
2290 mg->info.n_levels[0] = mg->setup_data->n_levels;
2291
2292 if (mg->info.n_calls[0] > 0) {
2293 if (mg->info.n_levels[0] < mg->info.n_levels[1])
2294 mg->info.n_levels[1] = mg->info.n_levels[0];
2295 if (mg->info.n_levels[0] > mg->info.n_levels[2])
2296 mg->info.n_levels[2] = mg->info.n_levels[0];
2297 }
2298 else {
2299 mg->info.n_levels[1] = mg->info.n_levels[0];
2300 mg->info.n_levels[2] = mg->info.n_levels[0];
2301 }
2302
2303 mg->info.n_calls[0] += 1;
2304
2305 /* Cleanup temporary interpolation arrays */
2306
2307 for (unsigned i = 0; i < mg->setup_data->n_levels; i++)
2308 cs_grid_free_quantities(mg->setup_data->grid_hierarchy[i]);
2309
2310 /* Setup solvers */
2311
2312 if (mg->subtype == CS_MULTIGRID_BOTTOM)
2313 _multigrid_setup_sles_k_cycle_bottom(mg, name, verbosity);
2314 else
2315 _multigrid_setup_sles(mg, name, verbosity);
2316
2317 /* Update timers */
2318
2319 t2 = cs_timer_time();
2320 cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t2);
2321 }
2322
2323 /*----------------------------------------------------------------------------*/
2324 /*!
2325 * \brief Setup coarse multigrid for k cycle HPC variant.
2326 *
2327 * \param[in, out] mg pointer to multigrid solver info and context
2328 * \param[in] name pointer to name of linear system
2329 * \param[in] a associated matrix
2330 * \param[in] n_ranks associated number of MPI ranks
2331 * \param[in] verbosity associated verbosity
2332 */
2333 /*----------------------------------------------------------------------------*/
2334
2335 static void
_setup_k_cycle_hpc_sub(cs_multigrid_t * mg,const char * name,const cs_matrix_t * a,int n_ranks,int verbosity)2336 _setup_k_cycle_hpc_sub(cs_multigrid_t *mg,
2337 const char *name,
2338 const cs_matrix_t *a,
2339 int n_ranks,
2340 int verbosity)
2341 {
2342 /* Destroy previous hierarchy if necessary */
2343
2344 if (mg->setup_data != NULL)
2345 cs_multigrid_free(mg);
2346
2347 /* Initialization */
2348
2349 cs_timer_t t0 = cs_timer_time();
2350
2351 if (verbosity > 1)
2352 bft_printf(_("\n Construction of grid hierarchy for \"%s\"\n"),
2353 name);
2354
2355 /* Build coarse grids hierarchy */
2356 /*------------------------------*/
2357
2358 cs_grid_t *f = cs_grid_create_from_parent(a, n_ranks);
2359
2360 cs_multigrid_level_info_t *mg_lv_info = mg->lv_info;
2361
2362 cs_timer_t t1 = cs_timer_time();
2363 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
2364
2365 _setup_hierarchy(mg, name, NULL, f, verbosity); /* Assign to and build
2366 hierarchy */
2367
2368 /* Update timers */
2369
2370 t1 = cs_timer_time();
2371 cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1);
2372 }
2373
2374 /*----------------------------------------------------------------------------
2375 * Compute dot product, summing result over all ranks.
2376 *
2377 * parameters:
2378 * mg <-- pointer to solver context info
2379 * n <-- local number of elements
2380 * x <-- vector in s = x.x
2381 *
2382 * returns:
2383 * result of s = x.x
2384 *----------------------------------------------------------------------------*/
2385
2386 inline static double
_dot_xx(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x)2387 _dot_xx(const cs_multigrid_t *mg,
2388 cs_lnum_t n,
2389 const cs_real_t *x)
2390 {
2391 double s = cs_dot_xx(n, x);
2392
2393 #if defined(HAVE_MPI)
2394
2395 if (mg->comm != MPI_COMM_NULL) {
2396 double _sum;
2397 MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, mg->comm);
2398 s = _sum;
2399 }
2400
2401 #endif /* defined(HAVE_MPI) */
2402
2403 return s;
2404 }
2405
2406 /*----------------------------------------------------------------------------
2407 * Compute 2 dot products x.x and y.y, summing result over all ranks.
2408 *
2409 * parameters:
2410 * mg <-- pointer to solver context info
2411 * n <-- number of associated values
2412 * x <-- vector in s1 = x.x
2413 * y <-- vector in s2 = y.y
2414 * s1 --> result of s1 = x.x
2415 * s2 --> result of s2 = y.y
2416 *----------------------------------------------------------------------------*/
2417
2418 inline static void
_dot_xx_yy(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x,const cs_real_t * y,double * s1,double * s2)2419 _dot_xx_yy(const cs_multigrid_t *mg,
2420 cs_lnum_t n,
2421 const cs_real_t *x,
2422 const cs_real_t *y,
2423 double *s1,
2424 double *s2)
2425 {
2426 double s[2];
2427
2428 s[0] = cs_dot_xx(n, x);
2429 s[1] = cs_dot_xx(n, y);
2430
2431 #if defined(HAVE_MPI)
2432
2433 if (mg->comm != MPI_COMM_NULL) {
2434 double _sum[2];
2435 MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, mg->comm);
2436 s[0] = _sum[0];
2437 s[1] = _sum[1];
2438 }
2439
2440 #endif /* defined(HAVE_MPI) */
2441
2442 *s1 = s[0];
2443 *s2 = s[1];
2444 }
2445
2446 /*----------------------------------------------------------------------------
2447 * Compute 2 dot products x.x and x.y, summing result over all ranks.
2448 *
2449 * parameters:
2450 * mg <-- pointer to solver context info
2451 * n <-- number of associated values
2452 * x <-- vector in s1 = x.y
2453 * y <-- vector in s1 = x.y and s2 = y.z
2454 * z <-- vector in s2 = y.z
2455 * s1 --> result of s1 = x.y
2456 * s2 --> result of s2 = y.z
2457 *----------------------------------------------------------------------------*/
2458
2459 inline static void
_dot_xy_yz(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x,const cs_real_t * y,const cs_real_t * z,double * s1,double * s2)2460 _dot_xy_yz(const cs_multigrid_t *mg,
2461 cs_lnum_t n,
2462 const cs_real_t *x,
2463 const cs_real_t *y,
2464 const cs_real_t *z,
2465 double *s1,
2466 double *s2)
2467 {
2468 double s[2];
2469
2470 cs_dot_xy_yz(n, x, y, z, s, s+1);
2471
2472 #if defined(HAVE_MPI)
2473
2474 if (mg->comm != MPI_COMM_NULL) {
2475 double _sum[2];
2476 MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, mg->comm);
2477 s[0] = _sum[0];
2478 s[1] = _sum[1];
2479 }
2480
2481 #endif /* defined(HAVE_MPI) */
2482
2483 *s1 = s[0];
2484 *s2 = s[1];
2485 }
2486
2487 /*----------------------------------------------------------------------------
2488 * Compute 3 dot products x.u, xv, and x.w, summing result over all ranks.
2489 *
2490 * parameters:
2491 * mg <-- pointer to solver context info
2492 * n <-- number of associated values
2493 * x <-- vector in s1 = x.u, s2 = x.v, s3 = x.w
2494 * u <-- vector in s1 = x.u
2495 * v <-- vector in s2 = x.v
2496 * w <-- vector in s2 = x.w
2497 * s1 --> result of s1 = x.y
2498 * s2 --> result of s2 = y.z
2499 *----------------------------------------------------------------------------*/
2500
2501 inline static void
_dot_xu_xv_xw(const cs_multigrid_t * mg,cs_lnum_t n,const cs_real_t * x,const cs_real_t * u,const cs_real_t * v,const cs_real_t * w,double * s1,double * s2,double * s3)2502 _dot_xu_xv_xw(const cs_multigrid_t *mg,
2503 cs_lnum_t n,
2504 const cs_real_t *x,
2505 const cs_real_t *u,
2506 const cs_real_t *v,
2507 const cs_real_t *w,
2508 double *s1,
2509 double *s2,
2510 double *s3)
2511 {
2512 double s[3];
2513
2514 /* Use two separate call as cs_blas.c does not yet hav matching call */
2515 cs_dot_xy_yz(n, u, x, v, s, s+1);
2516 s[2] = cs_dot(n, x, w);
2517
2518 #if defined(HAVE_MPI)
2519
2520 if (mg->comm != MPI_COMM_NULL) {
2521 double _sum[3];
2522 MPI_Allreduce(s, _sum, 3, MPI_DOUBLE, MPI_SUM, mg->comm);
2523 s[0] = _sum[0];
2524 s[1] = _sum[1];
2525 s[2] = _sum[2];
2526 }
2527
2528 #endif /* defined(HAVE_MPI) */
2529
2530 *s1 = s[0];
2531 *s2 = s[1];
2532 *s3 = s[2];
2533 }
2534
2535 /*----------------------------------------------------------------------------
2536 * Test if convergence is attained.
2537 *
2538 * parameters:
2539 * mg <-- associated multigrid structure
2540 * var_name <-- variable name
2541 * n_f_rows <-- number of rows on fine mesh
2542 * n_max_cycles <-- maximum number of cycles
2543 * cycle_id <-- number of current cycle
2544 *
2545 * verbosity <-- verbosity level
2546 * n_iters <-- number of iterations
2547 * precision <-- precision limit
2548 * r_norm <-- residue normalization
2549 * initial_residue <-- initial residue
2550 * residue <-> residue
2551 * rhs <-- right-hand side
2552 *
2553 * returns:
2554 * convergence status
2555 *----------------------------------------------------------------------------*/
2556
2557 static cs_sles_convergence_state_t
_convergence_test(cs_multigrid_t * mg,const char * var_name,cs_lnum_t n_f_rows,int n_max_cycles,int cycle_id,int verbosity,int n_iters,double precision,double r_norm,double initial_residue,double * residue,const cs_real_t rhs[])2558 _convergence_test(cs_multigrid_t *mg,
2559 const char *var_name,
2560 cs_lnum_t n_f_rows,
2561 int n_max_cycles,
2562 int cycle_id,
2563 int verbosity,
2564 int n_iters,
2565 double precision,
2566 double r_norm,
2567 double initial_residue,
2568 double *residue,
2569 const cs_real_t rhs[])
2570 {
2571 const char cycle_h_fmt[]
2572 = N_(" ---------------------------------------------------\n"
2573 " n. | Cumulative iterations | Norm. residual\n"
2574 " cycles | on fine mesh | on fine mesh\n"
2575 " ---------------------------------------------------\n");
2576 const char cycle_t_fmt[]
2577 = N_(" ---------------------------------------------------\n");
2578 const char cycle_cv_fmt[]
2579 = N_(" %4d | %6d | %12.4e\n");
2580
2581 const char cycle_fmt[]
2582 = N_(" N. cycles: %4d; Fine mesh cumulative iter: %5d; "
2583 "Norm. residual %12.4e\n");
2584
2585 /* Compute residue */
2586
2587 *residue = sqrt(_dot_xx(mg, n_f_rows, rhs));
2588
2589 if (cycle_id == 1)
2590 initial_residue = *residue;
2591
2592 /* Plot convergence if requested */
2593
2594 if (mg->cycle_plot != NULL) {
2595 double vals = *residue;
2596 double wall_time = cs_timer_wtime();
2597 mg->plot_time_stamp += 1;
2598 cs_time_plot_vals_write(mg->cycle_plot,
2599 mg->plot_time_stamp,
2600 wall_time,
2601 1,
2602 &vals);
2603 }
2604
2605 if (*residue < precision*r_norm) {
2606
2607 if (verbosity == 2)
2608 bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm);
2609 else if (verbosity > 2) {
2610 bft_printf(_(cycle_h_fmt));
2611 bft_printf(_(cycle_cv_fmt),
2612 cycle_id, n_iters, *residue/r_norm);
2613 bft_printf(_(cycle_t_fmt));
2614 }
2615 return CS_SLES_CONVERGED;
2616 }
2617
2618 else if (cycle_id > n_max_cycles) {
2619
2620 if ( (verbosity > -1 && !(mg->info.is_pc || mg->subtype != CS_MULTIGRID_MAIN))
2621 || verbosity > 0) {
2622 if (verbosity == 1)
2623 bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm);
2624 else if (verbosity > 1) {
2625 bft_printf(_(cycle_fmt),
2626 cycle_id, n_iters, *residue/r_norm);
2627 bft_printf(_(cycle_t_fmt));
2628 }
2629 bft_printf(_(" @@ Warning: algebraic multigrid for [%s]\n"
2630 " ********\n"
2631 " Maximum number of cycles (%d) reached.\n"),
2632 var_name, n_max_cycles);
2633 }
2634 return CS_SLES_MAX_ITERATION;
2635 }
2636
2637 else {
2638
2639 if (*residue > initial_residue * 10000.0 && *residue > 100.) {
2640 if (verbosity > 2)
2641 bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm);
2642 return CS_SLES_DIVERGED;
2643 }
2644 else if (verbosity > 2) {
2645 if (cycle_id == 1)
2646 bft_printf(_(cycle_h_fmt));
2647 bft_printf(_(cycle_cv_fmt), cycle_id, n_iters, *residue/r_norm);
2648 }
2649
2650 #if (__STDC_VERSION__ >= 199901L)
2651 if (isnan(*residue) || isinf(*residue))
2652 return CS_SLES_DIVERGED;
2653 #endif
2654 }
2655
2656 return CS_SLES_ITERATING;
2657 }
2658
2659 /*----------------------------------------------------------------------------
2660 * Log residue A.vx - Rhs
2661 *
2662 * parameters:
2663 * mg <-- pointer to multigrid context info
2664 * int cycle_id <-- cycle id
2665 * var_name <-- variable name
2666 * a <-- matrix
2667 * rhs <-- right hand side
2668 * vx <-> system solution
2669 *
2670 * returns:
2671 * convergence state
2672 *----------------------------------------------------------------------------*/
2673
2674 static void
_log_residual(const cs_multigrid_t * mg,int cycle_id,const char * var_name,const cs_matrix_t * a,const cs_real_t * rhs,cs_real_t * restrict vx)2675 _log_residual(const cs_multigrid_t *mg,
2676 int cycle_id,
2677 const char *var_name,
2678 const cs_matrix_t *a,
2679 const cs_real_t *rhs,
2680 cs_real_t *restrict vx)
2681 {
2682 const cs_lnum_t *diag_block_size = cs_matrix_get_diag_block_size(a);
2683 const cs_lnum_t n_rows = cs_matrix_get_n_rows(a) * diag_block_size[0];
2684 const cs_lnum_t n_cols = cs_matrix_get_n_columns(a) * diag_block_size[0];
2685
2686 cs_real_t *r;
2687 BFT_MALLOC(r, n_cols, cs_real_t);
2688
2689 cs_matrix_vector_multiply(a, vx, r);
2690
2691 for (cs_lnum_t i = 0; i < n_rows; i++)
2692 r[i] -= rhs[i];
2693
2694 double s = cs_dot_xx(n_rows, r);
2695
2696 BFT_FREE(r);
2697
2698 #if defined(HAVE_MPI)
2699
2700 if (mg->comm != MPI_COMM_NULL) {
2701 double _sum;
2702 MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, mg->comm);
2703 s = _sum;
2704 }
2705
2706 #endif /* defined(HAVE_MPI) */
2707
2708 cs_log_printf(CS_LOG_DEFAULT, " mg cycle %d: %s residual: %.3g\n",
2709 cycle_id, var_name, s);
2710 }
2711
2712 /*----------------------------------------------------------------------------
2713 * Update level information iteration counts
2714 *
2715 * parameters:
2716 * lv_info_it <-> logged number of iterations (last, min, max, total)
2717 * n_iter <-- current number of iterations
2718 *----------------------------------------------------------------------------*/
2719
2720 static inline void
_lv_info_update_stage_iter(unsigned long long lv_info_it[],unsigned n_iter)2721 _lv_info_update_stage_iter(unsigned long long lv_info_it[],
2722 unsigned n_iter)
2723 {
2724 lv_info_it[0] = n_iter;
2725 if (n_iter < lv_info_it[1])
2726 lv_info_it[1] = n_iter;
2727 else if (n_iter > lv_info_it[2])
2728 lv_info_it[2] = n_iter;
2729 if (lv_info_it[1] == 0)
2730 lv_info_it[1] = n_iter;
2731 lv_info_it[3] += n_iter;
2732 }
2733
2734 /*----------------------------------------------------------------------------
2735 * Compute buffer size required for level names
2736 *
2737 * parameters:
2738 * name <-- linear system name
2739 * n_levels <-- number multigrid levels
2740 *
2741 * returns:
2742 * buffer size needed for level names
2743 *----------------------------------------------------------------------------*/
2744
2745 static size_t
_level_names_size(const char * name,int n_levels)2746 _level_names_size(const char *name,
2747 int n_levels)
2748 {
2749 /* Format name width */
2750
2751 int w = 1;
2752 for (int i = n_levels/10; i > 0; i /=10)
2753 w += 1;
2754
2755 /* First part: pointers */
2756
2757 size_t retval = n_levels*sizeof(char *)*2;
2758 retval = CS_SIMD_SIZE(retval);
2759
2760 /* Second part: buffers */
2761 size_t buf_size = 0;
2762
2763 if (n_levels > 1)
2764 buf_size = (strlen(name) + strlen(":descent:") + w + 1)
2765 * (n_levels)*2;
2766 retval += CS_SIMD_SIZE(buf_size);
2767
2768 return retval;
2769 }
2770
2771 /*----------------------------------------------------------------------------
2772 * Initialize level names
2773 *
2774 * parameters:
2775 * name <-- linear system name
2776 * n_levels <-- number multigrid levels
2777 * buffer <-- buffer
2778 *----------------------------------------------------------------------------*/
2779
2780 static void
_level_names_init(const char * name,int n_levels,void * buffer)2781 _level_names_init(const char *name,
2782 int n_levels,
2783 void *buffer)
2784 {
2785 /* Format name width */
2786
2787 int w = 1;
2788 for (int i = n_levels/10; i > 0; i /=10)
2789 w += 1;
2790
2791 /* First part: pointers */
2792
2793 size_t ptr_size = n_levels*sizeof(char *)*2;
2794 ptr_size = CS_SIMD_SIZE(ptr_size);
2795
2796 char *_buffer = buffer;
2797 char **_lv_names = buffer;
2798 const char **lv_names = (const char **)_lv_names;
2799 const size_t name_len = strlen(name) + strlen(":descent:") + w + 1;
2800
2801 /* Second part: buffers */
2802
2803 for (int i = 0; i < n_levels -1; i++) {
2804 lv_names[i*2] = _buffer + ptr_size + i*2*name_len;
2805 lv_names[i*2+1] = lv_names[i*2] + name_len;
2806 sprintf(_lv_names[i*2], "%s:descent:%0*d", name, w, i);
2807 sprintf(_lv_names[i*2+1], "%s:ascent:%0*d", name, w, i);
2808 }
2809
2810 if (n_levels > 1) {
2811 int i = n_levels - 1;
2812 lv_names[i*2] = _buffer + ptr_size + i*2*name_len;
2813 lv_names[i*2+1] = NULL;
2814 sprintf(_lv_names[i*2], "%s:coarse:%0*d", name, w, i);
2815 }
2816 }
2817
2818 /*----------------------------------------------------------------------------
2819 * Sparse linear system resolution using multigrid.
2820 *
2821 * parameters:
2822 * mg <-- multigrid system
2823 * lv_names <-- names of linear systems
2824 * (indexed as mg->setup_data->sles_hierarchy)
2825 * verbosity <-- verbosity level
2826 * cycle_id <-- id of currect cycle
2827 * n_equiv_iter <-> equivalent number of iterations
2828 * precision <-- solver precision
2829 * r_norm <-- residue normalization
2830 * initial_residue <-> initial residue
2831 * residue <-> residue
2832 * rhs <-- right hand side
2833 * vx --> system solution
2834 * aux_size <-- number of elements in aux_vectors (in bytes)
2835 * aux_vectors --- optional working area (allocation otherwise)
2836 *
2837 * returns:
2838 * convergence status
2839 *----------------------------------------------------------------------------*/
2840
2841 static cs_sles_convergence_state_t
_multigrid_v_cycle(cs_multigrid_t * mg,const char ** lv_names,int verbosity,int cycle_id,int * n_equiv_iter,double precision,double r_norm,double * initial_residue,double * residue,const cs_real_t * rhs,cs_real_t * vx,size_t aux_size,void * aux_vectors)2842 _multigrid_v_cycle(cs_multigrid_t *mg,
2843 const char **lv_names,
2844 int verbosity,
2845 int cycle_id,
2846 int *n_equiv_iter,
2847 double precision,
2848 double r_norm,
2849 double *initial_residue,
2850 double *residue,
2851 const cs_real_t *rhs,
2852 cs_real_t *vx,
2853 size_t aux_size,
2854 void *aux_vectors)
2855 {
2856 int level, coarsest_level;
2857 cs_lnum_t ii;
2858 cs_timer_t t0, t1;
2859
2860 cs_lnum_t db_size[4] = {1, 1, 1, 1};
2861 cs_lnum_t eb_size[4] = {1, 1, 1, 1};
2862 cs_sles_convergence_state_t cvg = CS_SLES_ITERATING, c_cvg = CS_SLES_ITERATING;
2863 int n_iter = 0;
2864 double _residue = -1.;
2865 double _initial_residue = 0.;
2866
2867 size_t _aux_r_size = aux_size / sizeof(cs_real_t);
2868 cs_lnum_t n_rows = 0, n_cols_ext = 0;
2869 cs_lnum_t _n_rows = 0;
2870 cs_gnum_t n_g_rows = 0;
2871 cs_real_t r_norm_l = r_norm;
2872
2873 double denom_n_g_rows_0 = 1.0;
2874
2875 cs_multigrid_setup_data_t *mgd = mg->setup_data;
2876 cs_multigrid_level_info_t *lv_info = NULL;
2877
2878 cs_real_t *_aux_vectors = aux_vectors;
2879 cs_real_t *restrict wr = NULL;
2880 cs_real_t *restrict vx_lv = NULL;
2881
2882 const cs_real_t *restrict rhs_lv = NULL;
2883 const cs_matrix_t *_matrix = NULL;
2884 const cs_grid_t *f = NULL, *c= NULL;
2885
2886 bool end_cycle = false;
2887
2888 /* Initialization */
2889
2890 coarsest_level = mgd->n_levels - 1;
2891
2892 f = mgd->grid_hierarchy[0];
2893
2894 cs_grid_get_info(f,
2895 NULL,
2896 NULL,
2897 db_size,
2898 eb_size,
2899 NULL,
2900 &n_rows,
2901 &n_cols_ext,
2902 NULL,
2903 &n_g_rows);
2904
2905 denom_n_g_rows_0 = 1.0 / n_g_rows;
2906
2907 /* Allocate wr or use working area
2908 (note the finest grid could have less elements than a coarser
2909 grid to wich rank merging has been applied, hence the test below) */
2910
2911 size_t wr_size = n_cols_ext*db_size[1];
2912 for (level = 1; level < (int)(mgd->n_levels); level++) {
2913 cs_lnum_t n_cols_max
2914 = cs_grid_get_n_cols_max(mgd->grid_hierarchy[level]);
2915 wr_size = CS_MAX(wr_size, (size_t)(n_cols_max*db_size[1]));
2916 wr_size = CS_SIMD_SIZE(wr_size);
2917 }
2918
2919 if (_aux_r_size >= wr_size) {
2920 wr = aux_vectors;
2921 _aux_vectors = wr + wr_size;
2922 _aux_r_size -= wr_size;
2923 }
2924 else
2925 BFT_MALLOC(wr, wr_size, cs_real_t);
2926
2927 /* map arrays for rhs and vx;
2928 for the finest level, simply point to input and output arrays */
2929
2930 mgd->rhs_vx[0] = NULL; /* Use _rhs_level when necessary to avoid
2931 const warning */
2932 mgd->rhs_vx[1] = vx;
2933
2934 /* Descent */
2935 /*---------*/
2936
2937 for (level = 0; level < coarsest_level; level++) {
2938
2939 lv_info = mg->lv_info + level;
2940 t0 = cs_timer_time();
2941
2942 rhs_lv = (level == 0) ? rhs : mgd->rhs_vx[level*2];
2943 vx_lv = mgd->rhs_vx[level*2 + 1];
2944
2945 c = mgd->grid_hierarchy[level+1];
2946
2947 /* Smoother pass */
2948
2949 _matrix = cs_grid_get_matrix(f);
2950
2951 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2]);
2952
2953 c_cvg = mg_sles->solve_func(mg_sles->context,
2954 lv_names[level*2],
2955 _matrix,
2956 verbosity - 4, /* verbosity */
2957 precision*mg->info.precision_mult[0],
2958 r_norm_l,
2959 &n_iter,
2960 &_residue,
2961 rhs_lv,
2962 vx_lv,
2963 _aux_r_size*sizeof(cs_real_t),
2964 _aux_vectors);
2965
2966 if (mg->plot_time_stamp > -1)
2967 mg->plot_time_stamp += n_iter+1;
2968
2969 if (mg_sles->solve_func == cs_sles_it_solve)
2970 _initial_residue
2971 = cs_sles_it_get_last_initial_residue(mg_sles->context);
2972 else
2973 _initial_residue = HUGE_VAL;
2974
2975 if (level == 0 && cycle_id == 1)
2976 *initial_residue = _initial_residue;
2977
2978 if (verbosity > 1)
2979 _log_residual(mg, cycle_id, lv_names[level*2],
2980 _matrix, rhs_lv, vx_lv);
2981
2982 if (c_cvg < CS_SLES_BREAKDOWN) {
2983 end_cycle = true;
2984 break;
2985 }
2986
2987 /* Restrict residue
2988 TODO: get residue from cs_sles_solve(). This optimisation would
2989 require adding an argument and exercising caution to ensure the
2990 correct sign and meaning of the residue
2991 (regarding timing, this stage is part of the descent smoother) */
2992
2993 cs_matrix_vector_multiply(_matrix, vx_lv, wr);
2994
2995 _n_rows = n_rows*db_size[1];
2996 # pragma omp parallel for if(_n_rows > CS_THR_MIN)
2997 for (ii = 0; ii < _n_rows; ii++)
2998 wr[ii] = rhs_lv[ii] - wr[ii];
2999
3000 /* Convergence test in beginning of cycle (fine mesh) */
3001
3002 if (level == 0) {
3003
3004 cvg = _convergence_test(mg,
3005 lv_names[0],
3006 n_rows*db_size[1],
3007 mg->info.n_max_cycles,
3008 cycle_id,
3009 verbosity,
3010 lv_info->n_it_ds_smoothe[3],
3011 precision,
3012 r_norm,
3013 *initial_residue,
3014 residue,
3015 wr);
3016
3017 /* If converged or cycle limit reached, break from descent loop */
3018
3019 if (cvg != 0) {
3020 c_cvg = cvg;
3021 end_cycle = true;
3022 t1 = cs_timer_time();
3023 cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1);
3024 lv_info->n_calls[2] += 1;
3025 _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter);
3026 *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3027 break;
3028 }
3029
3030 }
3031
3032 t1 = cs_timer_time();
3033 cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1);
3034 lv_info->n_calls[2] += 1;
3035 _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter);
3036 *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3037
3038 /* Prepare for next level */
3039
3040 cs_grid_restrict_row_var(f, c, wr, mgd->rhs_vx[(level+1)*2]);
3041
3042 cs_grid_get_info(c,
3043 NULL,
3044 NULL,
3045 NULL,
3046 NULL,
3047 NULL,
3048 &n_rows,
3049 &n_cols_ext,
3050 NULL,
3051 &n_g_rows);
3052
3053 f = c;
3054
3055 /* Initialize correction */
3056
3057 cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*2 + 1];
3058
3059 _n_rows = n_rows*db_size[1];
3060 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3061 for (ii = 0; ii < _n_rows; ii++)
3062 vx_lv1[ii] = 0.0;
3063
3064 t0 = cs_timer_time();
3065 cs_timer_counter_add_diff(&(lv_info->t_tot[4]), &t1, &t0);
3066 lv_info->n_calls[4] += 1;
3067
3068 } /* End of loop on levels (descent) */
3069
3070 if (end_cycle == false) {
3071
3072 /* Resolve coarsest level to convergence */
3073 /*---------------------------------------*/
3074
3075 assert(level == coarsest_level);
3076 assert(c == mgd->grid_hierarchy[coarsest_level]);
3077
3078 /* coarsest level == 0 should never happen, but we play it safe */
3079 rhs_lv = (level == 0) ? rhs : mgd->rhs_vx[coarsest_level*2];
3080 vx_lv = mgd->rhs_vx[level*2 + 1];
3081
3082 _matrix = cs_grid_get_matrix(c);
3083
3084 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2]);
3085
3086 _initial_residue = _residue;
3087
3088 lv_info = mg->lv_info + level;
3089
3090 t0 = cs_timer_time();
3091
3092 c_cvg = mg_sles->solve_func(mg_sles->context,
3093 lv_names[level*2],
3094 _matrix,
3095 verbosity - 3,
3096 precision*mg->info.precision_mult[2],
3097 r_norm_l,
3098 &n_iter,
3099 &_residue,
3100 rhs_lv,
3101 vx_lv,
3102 _aux_r_size*sizeof(cs_real_t),
3103 _aux_vectors);
3104
3105 t1 = cs_timer_time();
3106 cs_timer_counter_add_diff(&(lv_info->t_tot[1]), &t0, &t1);
3107 lv_info->n_calls[1] += 1;
3108 _lv_info_update_stage_iter(lv_info->n_it_solve, n_iter);
3109
3110 if (mg_sles->solve_func == cs_sles_it_solve)
3111 _initial_residue
3112 = cs_sles_it_get_last_initial_residue(mg_sles->context);
3113 else
3114 _initial_residue = HUGE_VAL;
3115
3116 *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3117
3118 if (verbosity > 1)
3119 _log_residual(mg, cycle_id, lv_names[level*2],
3120 _matrix, rhs_lv, vx_lv);
3121
3122 if (c_cvg < CS_SLES_BREAKDOWN)
3123 end_cycle = true;
3124
3125 }
3126
3127 if (end_cycle == false) {
3128
3129 /* Ascent */
3130 /*--------*/
3131
3132 for (level = coarsest_level - 1; level > -1; level--) {
3133
3134 vx_lv = mgd->rhs_vx[level*2 + 1];
3135
3136 lv_info = mg->lv_info + level;
3137
3138 c = mgd->grid_hierarchy[level+1];
3139 f = mgd->grid_hierarchy[level];
3140
3141 cs_grid_get_info(f,
3142 NULL,
3143 NULL,
3144 NULL,
3145 NULL,
3146 NULL,
3147 &n_rows,
3148 &n_cols_ext,
3149 NULL,
3150 &n_g_rows);
3151
3152 /* Prolong correction */
3153
3154 t0 = cs_timer_time();
3155
3156 cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*2 + 1];
3157 cs_grid_prolong_row_var(c, f, vx_lv1, wr);
3158
3159 _n_rows = n_rows*db_size[1];
3160 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3161 for (ii = 0; ii < _n_rows; ii++)
3162 vx_lv[ii] += wr[ii];
3163
3164 t1 = cs_timer_time();
3165 cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t0, &t1);
3166 lv_info->n_calls[5] += 1;
3167
3168 /* Smoother pass if level > 0
3169 (smoother not called for finest mesh, as it will be called in
3170 descent phase of the next cycle, before the convergence test). */
3171
3172 if (level > 0) {
3173
3174 rhs_lv = mgd->rhs_vx[level*2];
3175
3176 _matrix = cs_grid_get_matrix(f);
3177
3178 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2 + 1]);
3179
3180 c_cvg = mg_sles->solve_func(mg_sles->context,
3181 lv_names[level*2+1],
3182 _matrix,
3183 verbosity - 4, /* verbosity */
3184 precision*mg->info.precision_mult[1],
3185 r_norm_l,
3186 &n_iter,
3187 &_residue,
3188 rhs_lv,
3189 vx_lv,
3190 _aux_r_size*sizeof(cs_real_t),
3191 _aux_vectors);
3192
3193 t0 = cs_timer_time();
3194 cs_timer_counter_add_diff(&(lv_info->t_tot[3]), &t1, &t0);
3195 lv_info->n_calls[3] += 1;
3196 _lv_info_update_stage_iter(lv_info->n_it_as_smoothe, n_iter);
3197
3198 if (mg_sles->solve_func == cs_sles_it_solve)
3199 _initial_residue
3200 = cs_sles_it_get_last_initial_residue(mg_sles->context);
3201 else
3202 _initial_residue = HUGE_VAL;
3203
3204 *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0;
3205
3206 if (verbosity > 1)
3207 _log_residual(mg, cycle_id, lv_names[level*2+1],
3208 _matrix, rhs_lv, vx_lv);
3209
3210 if (c_cvg < CS_SLES_BREAKDOWN)
3211 break;
3212 }
3213
3214 } /* End loop on levels (ascent) */
3215
3216 } /* End of tests on end_cycle */
3217
3218 mgd->exit_level = level;
3219 mgd->exit_residue = _residue;
3220 if (level == 0)
3221 mgd->exit_initial_residue = *initial_residue;
3222 else
3223 mgd->exit_initial_residue = _initial_residue;
3224 mgd->exit_cycle_id = cycle_id;
3225
3226 /* Free memory */
3227
3228 if (wr != aux_vectors)
3229 BFT_FREE(wr);
3230
3231 return cvg;
3232 }
3233
3234 /*----------------------------------------------------------------------------
3235 * Apply a multigrid K-cycle on the current level.
3236 *
3237 * This version of the multigrid cycle is a K-cycle, described by Y. Notay in
3238 * "An aggregation-based algebraic multigrid method", Electronic Transactions on
3239 * Numerical Analysis, 2010, vol 37, pages 123-146.
3240 *
3241 * Used as a preconditioner, it is not constant (sometimes there is only one
3242 * visit of the coarse level, sometimes 2) and therefore, it has to be used with
3243 * IPCG (or FCG) instead of CG. Descent and ascent smoothers still have to be
3244 * symmetric for the IPCG to converge.
3245 *
3246 * parameters:
3247 * mg <-- multigrid system
3248 * level <-- level on which we apply the cycle
3249 * lv_names <-- names of linear systems
3250 * (indexed as mg->setup_data->sles_hierarchy)
3251 * verbosity <-- verbosity level
3252 * cycle_id <-- id of currect cycle
3253 * n_equiv_iter <-> equivalent number of iterations
3254 * precision <-- solver precision
3255 * r_norm <-- residue normalization
3256 * initial_residue <-> initial residue
3257 * residue <-> residue
3258 * rhs <-- right hand side
3259 * vx --> system solution
3260 * aux_size <-- number of elements in aux_vectors (in bytes)
3261 * aux_vectors --- optional working area (allocation otherwise)
3262 *
3263 * returns:
3264 * convergence status
3265 *----------------------------------------------------------------------------*/
3266
3267 static cs_sles_convergence_state_t
_multigrid_k_cycle(cs_multigrid_t * mg,int level,const char ** lv_names,int verbosity,int cycle_id,int * n_equiv_iter,double precision,double r_norm,double * initial_residue,double * residue,const cs_real_t * rhs,cs_real_t * vx,size_t aux_size,void * aux_vectors)3268 _multigrid_k_cycle(cs_multigrid_t *mg,
3269 int level,
3270 const char **lv_names,
3271 int verbosity,
3272 int cycle_id,
3273 int *n_equiv_iter,
3274 double precision,
3275 double r_norm,
3276 double *initial_residue,
3277 double *residue,
3278 const cs_real_t *rhs,
3279 cs_real_t *vx,
3280 size_t aux_size,
3281 void *aux_vectors)
3282 {
3283 cs_timer_t t0, t1;
3284
3285 bool end_cycle = false;
3286 cs_sles_convergence_state_t c_cvg = CS_SLES_ITERATING, cvg = CS_SLES_ITERATING;
3287 int n_iter = 0;
3288 double _residue = -1.;
3289 cs_real_t r_norm_l = r_norm;
3290
3291 size_t _aux_r_size = aux_size / sizeof(cs_real_t);
3292 cs_lnum_t f_n_rows = 0, c_n_rows = 0;
3293 cs_lnum_t f_n_cols_ext = 0, c_n_cols_ext = 0;
3294 cs_gnum_t f_n_g_rows = 0, c_n_g_rows = 0;
3295
3296 cs_multigrid_setup_data_t *mgd = mg->setup_data;
3297 int coarsest_level = mgd->n_levels-1;
3298
3299 cs_lnum_t db_size[4] = {1, 1, 1, 1};
3300 cs_lnum_t eb_size[4] = {1, 1, 1, 1};
3301
3302 /* Recursion threshold */
3303 const cs_real_t trsh = mg->k_cycle_threshold;
3304
3305 /* RHS and unknowns arrays */
3306 cs_real_t *restrict vx_lv = NULL;
3307 const cs_real_t *restrict rhs_lv = NULL;
3308 const cs_matrix_t *f_matrix = NULL, *c_matrix = NULL;
3309
3310 /* Initialization of the computing tools */
3311
3312 cs_grid_t *f = mgd->grid_hierarchy[level];
3313 cs_grid_t *c = mgd->grid_hierarchy[level+1];
3314
3315 f_matrix = cs_grid_get_matrix(f);
3316 c_matrix = cs_grid_get_matrix(c);
3317
3318 cs_grid_get_info(f, NULL, NULL, db_size, eb_size, NULL, &f_n_rows,
3319 &f_n_cols_ext, NULL, &f_n_g_rows);
3320 cs_grid_get_info(c, NULL, NULL, db_size, eb_size, NULL, &c_n_rows,
3321 &c_n_cols_ext, NULL, &c_n_g_rows);
3322
3323 cs_lnum_t _f_n_rows = f_n_rows*db_size[1];
3324 cs_lnum_t _c_n_rows = c_n_rows*db_size[1];
3325
3326 static double denom_n_g_rows_0 = 1;
3327 if (level == 0)
3328 denom_n_g_rows_0 = 1.0 / f_n_g_rows;
3329
3330 rhs_lv = rhs;
3331 vx_lv = vx;
3332
3333 /* Descent
3334 ------- */
3335
3336 const int na = 10; /* number of array slots in mgd->rhs_vx */
3337
3338 /* Arrays that are needed for both descent and ascent phases */
3339 cs_real_t *restrict rt_lv = mgd->rhs_vx[level*na];
3340 assert(rt_lv != NULL);
3341
3342 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3343 for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3344 vx_lv[ii] = 0.0;
3345
3346 cs_multigrid_level_info_t *lv_info = mg->lv_info + level;
3347 t0 = cs_timer_time();
3348
3349 /* Pre-smoothing */
3350
3351 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2]);
3352
3353 c_cvg = mg_sles->solve_func(mg_sles->context,
3354 lv_names[level*2],
3355 f_matrix,
3356 0, /* verbosity */
3357 precision*mg->info.precision_mult[0],
3358 r_norm_l,
3359 &n_iter,
3360 &_residue,
3361 rhs_lv,
3362 vx_lv,
3363 _aux_r_size*sizeof(cs_real_t),
3364 aux_vectors);
3365
3366 if (initial_residue != NULL && cycle_id == 1)
3367 *initial_residue = HUGE_VAL;
3368
3369 t1 = cs_timer_time();
3370 cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1);
3371 lv_info->n_calls[2] += 1;
3372 _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter);
3373 *n_equiv_iter += n_iter * f_n_g_rows * denom_n_g_rows_0;
3374
3375 if (verbosity > 0)
3376 _log_residual(mg, cycle_id, lv_names[level*2],
3377 f_matrix, rhs_lv, vx_lv);
3378
3379 /* Compute new residual */
3380 cs_matrix_vector_multiply(f_matrix, vx_lv, rt_lv);
3381
3382 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3383 for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3384 rt_lv[ii] = rhs_lv[ii] - rt_lv[ii];
3385
3386 /* Convergence test in beginning of cycle (fine mesh) */
3387
3388 if ((level == 0 && mg->info.n_max_cycles > 1) || c_cvg < CS_SLES_BREAKDOWN) {
3389
3390 if (c_cvg >= CS_SLES_BREAKDOWN)
3391 cvg = _convergence_test(mg,
3392 lv_names[0],
3393 f_n_rows*db_size[1],
3394 mg->info.n_max_cycles,
3395 cycle_id,
3396 verbosity,
3397 lv_info->n_it_ds_smoothe[3],
3398 precision,
3399 r_norm,
3400 *initial_residue,
3401 residue,
3402 rt_lv);
3403 else
3404 cvg = c_cvg;
3405
3406 /* If converged or cycle limit reached, break from descent loop */
3407
3408 if (cvg != CS_SLES_ITERATING)
3409 end_cycle = true;
3410 }
3411
3412 t0 = cs_timer_time();
3413 cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0);
3414 lv_info->n_calls[6] += 1; /* Single update for this cycle */
3415
3416 if (end_cycle) {
3417 return cvg;
3418 }
3419
3420 /* Arrays needed for the descent phase
3421 (mapped to preallocated mgd->rhs_vx to avoid extra allocations) */
3422
3423 cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*na + 4];
3424 cs_real_t *restrict rhs_lv1 = mgd->rhs_vx[(level+1)*na + 5];
3425
3426 # pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3427 for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3428 vx_lv1[i] = 0.0;
3429
3430 /* Restriction of rt_lv into the next level rhs */
3431 cs_grid_restrict_row_var(f, c, rt_lv, rhs_lv1);
3432
3433 t1 = cs_timer_time();
3434 cs_timer_counter_add_diff(&(lv_info->t_tot[4]), &t0, &t1);
3435 lv_info->n_calls[4] += 1;
3436
3437 /* Approximate solution to the coarser problem
3438 ------------------------------------------- */
3439
3440 /* If the next level is the coarsest one, then we solve it directly */
3441
3442 if (level == coarsest_level-1) {
3443
3444 lv_info = mg->lv_info + coarsest_level;
3445
3446 mg_sles = &(mgd->sles_hierarchy[coarsest_level*2]);
3447
3448 cvg = mg_sles->solve_func(mg_sles->context,
3449 lv_names[coarsest_level*2],
3450 c_matrix,
3451 verbosity - 2,
3452 precision*mg->info.precision_mult[2],
3453 r_norm_l,
3454 &n_iter,
3455 residue,
3456 rhs_lv1,
3457 vx_lv1,
3458 _aux_r_size*sizeof(cs_real_t),
3459 aux_vectors);
3460
3461 t0 = cs_timer_time();
3462 cs_timer_counter_add_diff(&(lv_info->t_tot[1]), &t1, &t0);
3463 lv_info->n_calls[1] += 1;
3464 _lv_info_update_stage_iter(lv_info->n_it_solve, n_iter);
3465
3466 if (verbosity > 0)
3467 _log_residual(mg, cycle_id, lv_names[coarsest_level*2],
3468 c_matrix, rhs_lv1, vx_lv1);
3469
3470 *n_equiv_iter += n_iter * c_n_g_rows * denom_n_g_rows_0;
3471
3472 }
3473
3474 /* Otherwise, we apply the K-cycle recursively */
3475
3476 else {
3477
3478 c_cvg = _multigrid_k_cycle(mg,
3479 level + 1,
3480 lv_names,
3481 verbosity,
3482 cycle_id,
3483 &n_iter,
3484 precision,
3485 r_norm,
3486 NULL,
3487 residue,
3488 rhs_lv1,
3489 vx_lv1,
3490 aux_size,
3491 aux_vectors);
3492
3493 t1 = cs_timer_time();
3494
3495 /* Arrays needed for the first Krylov iteration inside the cycle */
3496 cs_real_t *restrict v_lv1 = mgd->rhs_vx[(level+1)*na + 6];
3497 cs_real_t *restrict rt_lv1 = mgd->rhs_vx[(level+1)*na + 7];
3498
3499 cs_matrix_vector_multiply(c_matrix, vx_lv1, v_lv1);
3500
3501 /* Coefficients for the Krylov iteration */
3502
3503 cs_real_t rho1 = 0, alpha1 = 0;
3504 _dot_xy_yz(mg, _c_n_rows, v_lv1, vx_lv1, rhs_lv1, &rho1, &alpha1);
3505
3506 cs_real_t ar1 = alpha1 / rho1;
3507
3508 /* New residual */
3509 for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3510 rt_lv1[i] = rhs_lv1[i] - ar1 * v_lv1[i];
3511
3512 cs_real_t rt_lv1_norm = 1.0, r_lv1_norm = 0.0;
3513
3514 if (trsh > 0)
3515 _dot_xx_yy(mg, _c_n_rows, rt_lv1, rhs_lv1, &rt_lv1_norm, &r_lv1_norm);
3516
3517 /* Free (unmap) arrays that were needed only for the descent phase */
3518 rhs_lv1 = NULL;
3519
3520 /* Test for the second coarse resolution */
3521 if (rt_lv1_norm < trsh * trsh * r_lv1_norm) {
3522 # pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3523 for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3524 vx_lv1[i] = ar1 * vx_lv1[i];
3525 }
3526 else {
3527
3528 /* Arrays for the (optional) second Krylov iteration */
3529 cs_real_t *restrict vx2_lv1 = mgd->rhs_vx[(level+1)*na + 8];
3530 cs_real_t *restrict w_lv1 = mgd->rhs_vx[(level+1)*na + 9];
3531
3532 # pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3533 for (cs_lnum_t i = 0; i < _c_n_rows; i++) {
3534 vx2_lv1[i] = 0.0;
3535 w_lv1[i] = 0.0;
3536 }
3537
3538 t0 = cs_timer_time();
3539 cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0);
3540
3541 c_cvg = _multigrid_k_cycle(mg,
3542 level + 1,
3543 lv_names,
3544 verbosity,
3545 cycle_id,
3546 &n_iter,
3547 precision,
3548 r_norm,
3549 initial_residue,
3550 residue,
3551 rt_lv1,
3552 vx2_lv1,
3553 aux_size,
3554 aux_vectors);
3555
3556 t1 = cs_timer_time();
3557
3558 cs_matrix_vector_multiply(c_matrix, vx2_lv1, w_lv1);
3559
3560 /* Krylov iteration */
3561
3562 cs_real_t gamma = 0, beta = 0, alpha2 = 0;
3563
3564 _dot_xu_xv_xw(mg, _c_n_rows,
3565 vx2_lv1, v_lv1, w_lv1, rt_lv1,
3566 &gamma, &beta, &alpha2);
3567
3568 cs_real_t rho2 = beta - (gamma * gamma) / rho1;
3569 cs_real_t ar2 = alpha2 / rho2;
3570 cs_real_t ar1_ar2 = ar1 - (gamma / rho1) * ar2;
3571
3572 # pragma omp parallel for if(_c_n_rows > CS_THR_MIN)
3573 for (cs_lnum_t i = 0; i < _c_n_rows; i++)
3574 vx_lv1[i] = ar1_ar2 * vx_lv1[i] + ar2 * vx2_lv1[i];
3575
3576 vx2_lv1 = NULL;
3577 w_lv1 = NULL;
3578 }
3579
3580 t0 = cs_timer_time();
3581 cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0);
3582
3583 v_lv1 = NULL;
3584 rt_lv1 = NULL;
3585 }
3586
3587 /* Ascent
3588 ------ */
3589
3590 lv_info = mg->lv_info + level;
3591 t0 = cs_timer_time();
3592
3593 /* Arrays that are needed for the ascent phase */
3594 cs_real_t *restrict rb_lv = mgd->rhs_vx[level*na + 1];
3595 cs_real_t *restrict z1_lv = mgd->rhs_vx[level*na + 2];
3596 cs_real_t *restrict z2_lv = mgd->rhs_vx[level*na + 3];
3597
3598 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3599 for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3600 z2_lv[ii] = 0.0;
3601
3602 /* Prolongation */
3603 cs_grid_prolong_row_var(c, f, vx_lv1, z1_lv);
3604
3605 t1 = cs_timer_time();
3606 cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t0, &t1);
3607 lv_info->n_calls[5] += 1;
3608
3609 /* New residual */
3610 cs_matrix_vector_multiply(f_matrix, z1_lv, rb_lv);
3611 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3612 for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++)
3613 rb_lv[ii] = rt_lv[ii] - rb_lv[ii];
3614
3615 t0 = cs_timer_time();
3616 cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t1, &t0);
3617 t1 = t0;
3618
3619 /* Post-smoothing */
3620
3621 mg_sles = &(mgd->sles_hierarchy[level*2+1]);
3622
3623 cvg = mg_sles->solve_func(mg_sles->context,
3624 lv_names[level*2+1],
3625 f_matrix,
3626 0, /* verbosity */
3627 precision*mg->info.precision_mult[1],
3628 r_norm_l,
3629 &n_iter,
3630 &_residue,
3631 rb_lv,
3632 z2_lv,
3633 _aux_r_size*sizeof(cs_real_t),
3634 aux_vectors);
3635
3636 t0 = cs_timer_time();
3637 cs_timer_counter_add_diff(&(lv_info->t_tot[3]), &t1, &t0);
3638 lv_info->n_calls[3] += 1;
3639 _lv_info_update_stage_iter(lv_info->n_it_as_smoothe, n_iter);
3640 *n_equiv_iter += n_iter * f_n_g_rows * denom_n_g_rows_0;
3641
3642 if (verbosity > 0)
3643 _log_residual(mg, cycle_id, lv_names[level*2 + 1],
3644 c_matrix, rb_lv, z2_lv);
3645
3646 # pragma omp parallel for if(_f_n_rows > CS_THR_MIN)
3647 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
3648 vx_lv[ii] += z1_lv[ii] + z2_lv[ii];
3649
3650 t1 = cs_timer_time();
3651 cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t0, &t1);
3652
3653 /* Free/unmap arrays of the ascent phase */
3654 rb_lv = NULL;
3655 z1_lv = NULL;
3656 z2_lv = NULL;
3657
3658 /* Free/unmap arrays that were needed for both phases */
3659 rt_lv = NULL;
3660 vx_lv1 = NULL;
3661
3662 return cvg;
3663 }
3664
3665 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
3666
3667 /*============================================================================
3668 * Public function definitions
3669 *============================================================================*/
3670
3671 /*----------------------------------------------------------------------------*/
3672 /*!
3673 * \brief Initialize multigrid solver API.
3674 */
3675 /*----------------------------------------------------------------------------*/
3676
3677 void
cs_multigrid_initialize(void)3678 cs_multigrid_initialize(void)
3679 {
3680 }
3681
3682 /*----------------------------------------------------------------------------*/
3683 /*!
3684 * \brief Finalize multigrid solver API.
3685 */
3686 /*----------------------------------------------------------------------------*/
3687
3688 void
cs_multigrid_finalize(void)3689 cs_multigrid_finalize(void)
3690 {
3691 cs_grid_finalize();
3692 }
3693
3694 /*----------------------------------------------------------------------------*/
3695 /*!
3696 * \brief Define and associate a multigrid sparse linear system solver
3697 * for a given field or equation name.
3698 *
3699 * If this system did not previously exist, it is added to the list of
3700 * "known" systems. Otherwise, its definition is replaced by the one
3701 * defined here.
3702 *
3703 * This is a utility function: if finer control is needed, see
3704 * \ref cs_sles_define and \ref cs_multigrid_create.
3705 *
3706 * Note that this function returns a pointer directly to the multigrid solver
3707 * management structure. This may be used to set further options, for
3708 * example calling \ref cs_multigrid_set_coarsening_options and
3709 * \ref cs_multigrid_set_solver_options.
3710 * If needed, \ref cs_sles_find may be used to obtain a pointer to the
3711 * matching \ref cs_sles_t container.
3712 *
3713 * \param[in] f_id associated field id, or < 0
3714 * \param[in] name associated name if f_id < 0, or NULL
3715 * \param[in] mg_type type of multigrid algorithm to use
3716 *
3717 * \return pointer to new multigrid info and context
3718 */
3719 /*----------------------------------------------------------------------------*/
3720
3721 cs_multigrid_t *
cs_multigrid_define(int f_id,const char * name,cs_multigrid_type_t mg_type)3722 cs_multigrid_define(int f_id,
3723 const char *name,
3724 cs_multigrid_type_t mg_type)
3725 {
3726 cs_multigrid_t *
3727 mg = cs_multigrid_create(mg_type);
3728
3729 cs_sles_t *sc = cs_sles_define(f_id,
3730 name,
3731 mg,
3732 "cs_multigrid_t",
3733 cs_multigrid_setup,
3734 cs_multigrid_solve,
3735 cs_multigrid_free,
3736 cs_multigrid_log,
3737 cs_multigrid_copy,
3738 cs_multigrid_destroy);
3739
3740 cs_sles_set_error_handler(sc,
3741 cs_multigrid_error_post_and_abort);
3742
3743 return mg;
3744 }
3745
3746 /*----------------------------------------------------------------------------*/
3747 /*!
3748 * \brief Create multigrid linear system solver info and context.
3749 *
3750 * The multigrid variant is an ACM (Additive Corrective Multigrid) method.
3751 *
3752 * \param[in] mg_type type of multigrid algorithm to use
3753 *
3754 * \return pointer to new multigrid info and context
3755 */
3756 /*----------------------------------------------------------------------------*/
3757
3758 cs_multigrid_t *
cs_multigrid_create(cs_multigrid_type_t mg_type)3759 cs_multigrid_create(cs_multigrid_type_t mg_type)
3760 {
3761 int ii;
3762 cs_multigrid_t *mg;
3763
3764 /* Increment number of setups */
3765
3766 BFT_MALLOC(mg, 1, cs_multigrid_t);
3767
3768 mg->type = mg_type;
3769 mg->subtype = CS_MULTIGRID_MAIN;
3770
3771 #if defined(HAVE_MPI)
3772 mg->comm = cs_glob_mpi_comm;
3773 mg->caller_comm = cs_glob_mpi_comm;
3774 mg->caller_n_ranks = cs_glob_n_ranks;
3775 if (mg->caller_n_ranks < 2) {
3776 mg->comm = MPI_COMM_NULL;
3777 mg->caller_comm = cs_glob_mpi_comm;
3778 }
3779 mg->merge_mean_threshold = 300;
3780 mg->merge_glob_threshold = 500;
3781 mg->merge_stride = 1;
3782 #endif
3783
3784 mg->aggregation_limit = 3;
3785 mg->coarsening_type = CS_GRID_COARSENING_DEFAULT;
3786 mg->n_levels_max = 25;
3787 mg->n_g_rows_min = 30;
3788
3789 mg->post_row_max = 0;
3790
3791 mg->p0p1_relax = 0.;
3792 mg->k_cycle_threshold = 0;
3793
3794 _multigrid_info_init(&(mg->info));
3795 for (int i = 0; i < 3; i++)
3796 mg->lv_mg[i] = NULL;
3797 mg->p_mg = NULL;
3798
3799 if (mg->type == CS_MULTIGRID_V_CYCLE) {
3800 mg->p0p1_relax = 0.95;
3801 }
3802 else if (mg->type == CS_MULTIGRID_K_CYCLE) {
3803 mg->coarsening_type = CS_GRID_COARSENING_SPD_PW;
3804 mg->aggregation_limit = 4;
3805 mg->n_levels_max = 10;
3806 mg->n_g_rows_min = 256;
3807 mg->k_cycle_threshold = 0.25;
3808 }
3809 else if (mg->type == CS_MULTIGRID_K_CYCLE_HPC) {
3810 mg->coarsening_type = CS_GRID_COARSENING_SPD_PW;
3811 mg->aggregation_limit = 8;
3812 mg->n_levels_max = 4;
3813 mg->k_cycle_threshold = -1;
3814 mg->lv_mg[2] = _multigrid_create_k_cycle_bottom(mg);
3815 }
3816
3817 mg->pc_precision = 0.0;
3818 mg->pc_r_norm = 0.0;
3819
3820 mg->n_levels_post = 0;
3821
3822 mg->setup_data = NULL;
3823
3824 BFT_MALLOC(mg->lv_info, mg->n_levels_max, cs_multigrid_level_info_t);
3825
3826 for (ii = 0; ii < mg->n_levels_max; ii++)
3827 _multigrid_level_info_init(mg->lv_info + ii);
3828
3829 mg->post_row_num = NULL;
3830 mg->post_row_rank = NULL;
3831 mg->post_name = NULL;
3832
3833 mg->cycle_plot = NULL;
3834 mg->plot_time_stamp = -1;
3835
3836 if (mg_type == CS_MULTIGRID_V_CYCLE)
3837 cs_multigrid_set_solver_options
3838 (mg,
3839 CS_SLES_PCG, CS_SLES_PCG, CS_SLES_PCG,
3840 100, /* n max cycles */
3841 2, /* n max iter for descent */
3842 10, /* n max iter for ascent */
3843 500,
3844 0, 0, 0, /* precond degree */
3845 1, 1, 1); /* precision multiplier */
3846
3847 else if ( mg->type >= CS_MULTIGRID_K_CYCLE
3848 && mg->type <= CS_MULTIGRID_K_CYCLE_HPC)
3849 cs_multigrid_set_solver_options
3850 (mg,
3851 CS_SLES_P_SYM_GAUSS_SEIDEL,
3852 CS_SLES_P_SYM_GAUSS_SEIDEL,
3853 CS_SLES_PCG,
3854 100, /* n max cycles */
3855 1, /* n max iter for descent */
3856 1, /* n max iter for ascent */
3857 50, /* n max iter for coarse solve */
3858 -1, -1, 0, /* precond degree */
3859 -1, -1, 1); /* precision multiplier */
3860
3861 return mg;
3862 }
3863
3864 /*----------------------------------------------------------------------------*/
3865 /*!
3866 * \brief Destroy multigrid linear system solver info and context.
3867 *
3868 * \param[in, out] context pointer to multigrid linear solver info
3869 * (actual type: cs_multigrid_t **)
3870 */
3871 /*----------------------------------------------------------------------------*/
3872
3873 void
cs_multigrid_destroy(void ** context)3874 cs_multigrid_destroy(void **context)
3875 {
3876 cs_multigrid_t *mg = (cs_multigrid_t *)(*context);
3877
3878 if (mg == NULL)
3879 return;
3880
3881 BFT_FREE(mg->lv_info);
3882
3883 if (mg->post_row_num != NULL) {
3884 int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
3885 for (int i = 0; i < n_max_post_levels; i++)
3886 if (mg->post_row_num[i] != NULL)
3887 BFT_FREE(mg->post_row_num[i]);
3888 BFT_FREE(mg->post_row_num);
3889 }
3890
3891 if (mg->post_row_rank != NULL) {
3892 int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1;
3893 for (int i = 0; i < n_max_post_levels; i++)
3894 if (mg->post_row_rank[i] != NULL)
3895 BFT_FREE(mg->post_row_rank[i]);
3896 BFT_FREE(mg->post_row_rank);
3897 }
3898
3899 BFT_FREE(mg->post_name);
3900
3901 if (mg->cycle_plot != NULL)
3902 cs_time_plot_finalize(&(mg->cycle_plot));
3903
3904 for (int i = 0; i < 3; i++) {
3905 if (mg->lv_mg[i] != NULL)
3906 cs_multigrid_destroy((void **)(&(mg->lv_mg[i])));
3907 }
3908
3909 BFT_FREE(mg);
3910 *context = (void *)mg;
3911 }
3912
3913 /*----------------------------------------------------------------------------*/
3914 /*!
3915 * \brief Create multigrid sparse linear system solver info and context
3916 * based on existing info and context.
3917 *
3918 * \param[in] context pointer to reference info and context
3919 * (actual type: cs_multigrid_t *)
3920 *
3921 * \return pointer to newly created solver info object
3922 * (actual type: cs_multigrid_t *)
3923 */
3924 /*----------------------------------------------------------------------------*/
3925
3926 void *
cs_multigrid_copy(const void * context)3927 cs_multigrid_copy(const void *context)
3928 {
3929 cs_multigrid_t *d = NULL;
3930
3931 if (context != NULL) {
3932 const cs_multigrid_t *c = context;
3933 d = cs_multigrid_create(c->type);
3934 /* Beginning of cs_multigrid_info_t contains settings, the rest logging */
3935 memcpy(&(d->info), &(c->info),
3936 offsetof(cs_multigrid_info_t, n_calls));
3937 /* Same here: settings at beginningof structure */
3938 memcpy(d, c, offsetof(cs_multigrid_t, n_levels_post));
3939 }
3940
3941 return d;
3942 }
3943
3944 /*----------------------------------------------------------------------------*/
3945 /*!
3946 * \brief Log multigrid solver info.
3947 *
3948 * \param[in] context pointer to iterative solver info and context
3949 * (actual type: cs_multigrid_t *)
3950 * \param[in] log_type log type
3951 */
3952 /*----------------------------------------------------------------------------*/
3953
3954 void
cs_multigrid_log(const void * context,cs_log_t log_type)3955 cs_multigrid_log(const void *context,
3956 cs_log_t log_type)
3957 {
3958 const cs_multigrid_t *mg = context;
3959
3960 if (log_type == CS_LOG_SETUP)
3961 _multigrid_setup_log(mg);
3962
3963 else if (log_type == CS_LOG_PERFORMANCE)
3964 _multigrid_performance_log(mg);
3965 }
3966
3967 /*----------------------------------------------------------------------------*/
3968 /*!
3969 * \brief Set multigrid coarsening parameters.
3970 *
3971 * \param[in, out] mg pointer to multigrid info and context
3972 * \param[in] aggregation_limit maximum allowed fine rows
3973 * per coarse row
3974 * \param[in] coarsening_type coarsening type;
3975 * see \ref cs_grid_coarsening_t
3976 * \param[in] n_max_levels maximum number of grid levels
3977 * \param[in] min_g_rows global number of rows on coarse grids
3978 * under which no coarsening occurs
3979 * \param[in] p0p1_relax p0/p1 relaxation_parameter
3980 * \param[in] postprocess if > 0, postprocess coarsening
3981 * (uses coarse row numbers
3982 * modulo this value)
3983 */
3984 /*----------------------------------------------------------------------------*/
3985
3986 void
cs_multigrid_set_coarsening_options(cs_multigrid_t * mg,int aggregation_limit,int coarsening_type,int n_max_levels,cs_gnum_t min_g_rows,double p0p1_relax,int postprocess)3987 cs_multigrid_set_coarsening_options(cs_multigrid_t *mg,
3988 int aggregation_limit,
3989 int coarsening_type,
3990 int n_max_levels,
3991 cs_gnum_t min_g_rows,
3992 double p0p1_relax,
3993 int postprocess)
3994 {
3995 if (mg == NULL)
3996 return;
3997
3998 mg->aggregation_limit = aggregation_limit;
3999 mg->coarsening_type = coarsening_type;
4000 mg->n_levels_max = n_max_levels;
4001 mg->n_g_rows_min = min_g_rows;
4002
4003 mg->post_row_max = postprocess;
4004
4005 mg->p0p1_relax = p0p1_relax;
4006 }
4007
4008 /*----------------------------------------------------------------------------*/
4009 /*!
4010 * \brief Set multigrid parameters for associated iterative solvers.
4011 *
4012 * \param[in, out] mg pointer to multigrid info
4013 * and context
4014 * \param[in] descent_smoother_type type of smoother for descent
4015 * \param[in] ascent_smoother_type type of smoother for ascent
4016 * \param[in] coarse_solver_type type of solver for coarsest grid
4017 * \param[in] n_max_cycles maximum number of cycles
4018 * \param[in] n_max_iter_descent maximum iterations
4019 * per descent smoothing
4020 * \param[in] n_max_iter_ascent maximum iterations
4021 * per ascent smoothing
4022 * \param[in] n_max_iter_coarse maximum iterations
4023 * per coarsest solution
4024 * \param[in] poly_degree_descent preconditioning polynomial degree
4025 * for descent phases (0: diagonal)
4026 * \param[in] poly_degree_ascent preconditioning polynomial degree
4027 * for ascent phases (0: diagonal)
4028 * \param[in] poly_degree_coarse preconditioning polynomial degree
4029 * for coarse solver (0: diagonal)
4030 * \param[in] precision_mult_descent precision multiplier
4031 * for descent smoothers (levels >= 1)
4032 * \param[in] precision_mult_ascent precision multiplier
4033 * for ascent smoothers
4034 * \param[in] precision_mult_coarse precision multiplier
4035 * for coarsest grid
4036 */
4037 /*----------------------------------------------------------------------------*/
4038
4039 void
cs_multigrid_set_solver_options(cs_multigrid_t * mg,cs_sles_it_type_t descent_smoother_type,cs_sles_it_type_t ascent_smoother_type,cs_sles_it_type_t coarse_solver_type,int n_max_cycles,int n_max_iter_descent,int n_max_iter_ascent,int n_max_iter_coarse,int poly_degree_descent,int poly_degree_ascent,int poly_degree_coarse,double precision_mult_descent,double precision_mult_ascent,double precision_mult_coarse)4040 cs_multigrid_set_solver_options(cs_multigrid_t *mg,
4041 cs_sles_it_type_t descent_smoother_type,
4042 cs_sles_it_type_t ascent_smoother_type,
4043 cs_sles_it_type_t coarse_solver_type,
4044 int n_max_cycles,
4045 int n_max_iter_descent,
4046 int n_max_iter_ascent,
4047 int n_max_iter_coarse,
4048 int poly_degree_descent,
4049 int poly_degree_ascent,
4050 int poly_degree_coarse,
4051 double precision_mult_descent,
4052 double precision_mult_ascent,
4053 double precision_mult_coarse)
4054 {
4055 if (mg == NULL)
4056 return;
4057
4058 cs_multigrid_info_t *info = &(mg->info);
4059
4060 info->type[0] = descent_smoother_type;
4061 info->type[1] = ascent_smoother_type;
4062 info->type[2] = coarse_solver_type;
4063
4064 info->n_max_cycles = n_max_cycles;
4065
4066 info->n_max_iter[0] = n_max_iter_descent;
4067 info->n_max_iter[1] = n_max_iter_ascent;
4068 info->n_max_iter[2] = n_max_iter_coarse;
4069
4070 info->poly_degree[0] = poly_degree_descent;
4071 info->poly_degree[1] = poly_degree_ascent;
4072 info->poly_degree[2] = poly_degree_coarse;
4073
4074 info->precision_mult[0] = precision_mult_descent;
4075 info->precision_mult[1] = precision_mult_ascent;
4076 info->precision_mult[2] = precision_mult_coarse;
4077
4078 for (int i = 0; i < 3; i++) {
4079 switch(info->type[i]) {
4080 case CS_SLES_JACOBI:
4081 case CS_SLES_P_GAUSS_SEIDEL:
4082 case CS_SLES_P_SYM_GAUSS_SEIDEL:
4083 info->poly_degree[i] = -1;
4084 break;
4085 default:
4086 break;
4087 }
4088 }
4089 }
4090
4091 /*----------------------------------------------------------------------------*/
4092 /*!
4093 * \brief Return solver type used on fine mesh.
4094 *
4095 * \param[in] mg pointer to multigrid info and context
4096 *
4097 * \return type of smoother for descent (used for fine mesh)
4098 */
4099 /*----------------------------------------------------------------------------*/
4100
4101 cs_sles_it_type_t
cs_multigrid_get_fine_solver_type(const cs_multigrid_t * mg)4102 cs_multigrid_get_fine_solver_type(const cs_multigrid_t *mg)
4103 {
4104 if (mg == NULL)
4105 return CS_SLES_N_IT_TYPES;
4106
4107 const cs_multigrid_info_t *info = &(mg->info);
4108
4109 return info->type[0];
4110 }
4111
4112 /*----------------------------------------------------------------------------*/
4113 /*!
4114 * \brief Setup multigrid sparse linear equation solver.
4115 *
4116 * \param[in, out] context pointer to multigrid solver info and context
4117 * (actual type: cs_multigrid_t *)
4118 * \param[in] name pointer to name of linear system
4119 * \param[in] a associated matrix
4120 * \param[in] verbosity associated verbosity
4121 */
4122 /*----------------------------------------------------------------------------*/
4123
4124 void
cs_multigrid_setup(void * context,const char * name,const cs_matrix_t * a,int verbosity)4125 cs_multigrid_setup(void *context,
4126 const char *name,
4127 const cs_matrix_t *a,
4128 int verbosity)
4129 {
4130 cs_multigrid_setup_conv_diff(context,
4131 name,
4132 a,
4133 NULL,
4134 NULL,
4135 verbosity);
4136 }
4137
4138 /*----------------------------------------------------------------------------*/
4139 /*!
4140 * \brief Setup multigrid sparse linear equation solver.
4141 *
4142 * \param[in, out] context pointer to multigrid solver info and context
4143 * (actual type: cs_multigrid_t *)
4144 * \param[in] name pointer to name of linear system
4145 * \param[in] a associated matrix
4146 * \param[in] a_conv associated matrix (convection)
4147 * \param[in] a_diff associated matrix (diffusion)
4148 * \param[in] verbosity associated verbosity
4149 */
4150 /*----------------------------------------------------------------------------*/
4151
4152 void
cs_multigrid_setup_conv_diff(void * context,const char * name,const cs_matrix_t * a,const cs_matrix_t * a_conv,const cs_matrix_t * a_diff,int verbosity)4153 cs_multigrid_setup_conv_diff(void *context,
4154 const char *name,
4155 const cs_matrix_t *a,
4156 const cs_matrix_t *a_conv,
4157 const cs_matrix_t *a_diff,
4158 int verbosity)
4159
4160 {
4161 cs_multigrid_t *mg = context;
4162
4163 const cs_mesh_t *mesh = cs_glob_mesh;
4164 const cs_mesh_quantities_t *mq = cs_glob_mesh_quantities;
4165
4166 /* Destroy previous hierarchy if necessary */
4167
4168 if (mg->setup_data != NULL)
4169 cs_multigrid_free(mg);
4170
4171 /* Initialization */
4172
4173 cs_timer_t t0 = cs_timer_time();
4174
4175 if (verbosity > 1) {
4176 if (!(mg->info.is_pc)) {
4177 bft_printf(_("\n Setup of solver for linear system \"%s\"\n"),
4178 name);
4179 cs_matrix_log_info(a, verbosity);
4180 }
4181 bft_printf(_("\n Construction of grid hierarchy for \"%s\"\n"),
4182 name);
4183 }
4184
4185 /* Build coarse grids hierarchy */
4186 /*------------------------------*/
4187
4188 const cs_lnum_t *diag_block_size = cs_matrix_get_diag_block_size(a);
4189 const cs_lnum_t *extra_diag_block_size
4190 = cs_matrix_get_extra_diag_block_size(a);
4191
4192 cs_grid_t *f
4193 = cs_grid_create_from_shared(mesh->n_i_faces,
4194 diag_block_size,
4195 extra_diag_block_size,
4196 (const cs_lnum_2_t *)(mesh->i_face_cells),
4197 mq->cell_cen,
4198 mq->cell_vol,
4199 mq->i_face_normal,
4200 a,
4201 a_conv,
4202 a_diff);
4203
4204 cs_multigrid_level_info_t *mg_lv_info = mg->lv_info;
4205
4206 cs_timer_t t1 = cs_timer_time();
4207 cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1);
4208
4209 _setup_hierarchy(mg, name, mesh, f, verbosity); /* Assign to and build
4210 hierarchy */
4211
4212 /* Update timers */
4213
4214 t1 = cs_timer_time();
4215 cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1);
4216 }
4217
4218 /*----------------------------------------------------------------------------*/
4219 /*!
4220 * \brief Call multigrid sparse linear equation solver.
4221 *
4222 * \param[in, out] context pointer to multigrid solver info and context
4223 * (actual type: cs_multigrid_t *)
4224 * \param[in] name pointer to name of linear system
4225 * \param[in] a matrix
4226 * \param[in] verbosity associated verbosity
4227 * \param[in] precision solver precision
4228 * \param[in] r_norm residue normalization
4229 * \param[out] n_iter number of "equivalent" iterations
4230 * \param[out] residue residue
4231 * \param[in] rhs right hand side
4232 * \param[in, out] vx system solution
4233 * \param[in] aux_size size of aux_vectors (in bytes)
4234 * \param aux_vectors optional working area
4235 * (internal allocation if NULL)
4236 *
4237 * \return convergence state
4238 */
4239 /*----------------------------------------------------------------------------*/
4240
4241 cs_sles_convergence_state_t
cs_multigrid_solve(void * context,const char * name,const cs_matrix_t * a,int verbosity,double precision,double r_norm,int * n_iter,double * residue,const cs_real_t * rhs,cs_real_t * vx,size_t aux_size,void * aux_vectors)4242 cs_multigrid_solve(void *context,
4243 const char *name,
4244 const cs_matrix_t *a,
4245 int verbosity,
4246 double precision,
4247 double r_norm,
4248 int *n_iter,
4249 double *residue,
4250 const cs_real_t *rhs,
4251 cs_real_t *vx,
4252 size_t aux_size,
4253 void *aux_vectors)
4254 {
4255 cs_timer_t t0, t1;
4256 t0 = cs_timer_time();
4257
4258 cs_sles_convergence_state_t cvg = CS_SLES_ITERATING;
4259
4260 cs_multigrid_t *mg = context;
4261 cs_multigrid_info_t *mg_info = &(mg->info);
4262
4263 const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(a);
4264
4265 assert(db_size[0] == db_size[1]);
4266
4267 cs_lnum_t n_rows = cs_matrix_get_n_rows(a);
4268
4269 /* Initialize number of equivalent iterations and residue,
4270 check for immediate return,
4271 solve sparse linear system using multigrid algorithm. */
4272
4273 *n_iter = 0;
4274 unsigned n_cycles = 0;
4275
4276 if (mg->setup_data == NULL) {
4277 /* Stop solve timer to switch to setup timer */
4278 t1 = cs_timer_time();
4279 cs_timer_counter_add_diff(&(mg->info.t_tot[1]), &t0, &t1);
4280
4281 /* Setup grid hierarchy */
4282 cs_multigrid_setup(context, name, a, verbosity);
4283
4284 /* Restart solve timer */
4285 t0 = cs_timer_time();
4286 }
4287
4288 if (mg_info->is_pc == false && verbosity > 1)
4289 cs_log_printf(CS_LOG_DEFAULT,
4290 _(" RHS norm: %11.4e\n\n"), r_norm);
4291
4292 /* Buffer size sufficient to avoid local reallocation for most solvers */
4293 size_t lv_names_size = _level_names_size(name, mg->setup_data->n_levels);
4294 size_t _aux_size = lv_names_size
4295 + n_rows * 6 * db_size[1] * sizeof(cs_real_t);
4296 unsigned char *_aux_buf = aux_vectors;
4297
4298 if (_aux_size > aux_size)
4299 BFT_MALLOC(_aux_buf, _aux_size, unsigned char);
4300 else
4301 _aux_size = aux_size;
4302
4303 _level_names_init(name, mg->setup_data->n_levels, _aux_buf);
4304 const char **lv_names = (const char **)_aux_buf;
4305
4306 if (verbosity == 2) /* More detailed headers later if > 2 */
4307 bft_printf(_("Multigrid [%s]:\n"), name);
4308
4309 /* Initial residue should be improved, but this is consistent
4310 with the legacy (by increment) case */
4311
4312 double initial_residue = -1;
4313
4314 *residue = initial_residue; /* not known yet, so be safe */
4315
4316 /* Cycle to solution */
4317
4318 if (mg->type == CS_MULTIGRID_V_CYCLE) {
4319 for (n_cycles = 0; cvg == CS_SLES_ITERATING; n_cycles++) {
4320 int cycle_id = n_cycles+1;
4321 cvg = _multigrid_v_cycle(mg,
4322 lv_names,
4323 verbosity,
4324 cycle_id,
4325 n_iter,
4326 precision,
4327 r_norm,
4328 &initial_residue,
4329 residue,
4330 rhs,
4331 vx,
4332 _aux_size - lv_names_size,
4333 _aux_buf + lv_names_size);
4334 }
4335 }
4336 else if ( mg->type >= CS_MULTIGRID_K_CYCLE
4337 && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) {
4338 for (n_cycles = 0;
4339 n_cycles < (unsigned)mg->info.n_max_cycles && cvg == CS_SLES_ITERATING;
4340 n_cycles++) {
4341 int cycle_id = n_cycles+1;
4342 cvg = _multigrid_k_cycle(mg,
4343 0,
4344 lv_names,
4345 verbosity,
4346 cycle_id,
4347 n_iter,
4348 precision,
4349 r_norm,
4350 &initial_residue,
4351 residue,
4352 rhs,
4353 vx,
4354 _aux_size - lv_names_size,
4355 _aux_buf + lv_names_size);
4356 }
4357 }
4358
4359 if (_aux_buf != aux_vectors)
4360 BFT_FREE(_aux_buf);
4361
4362 if (verbosity > 0)
4363 cs_log_printf(CS_LOG_DEFAULT, "\n");
4364
4365 /* Update statistics */
4366
4367 t1 = cs_timer_time();
4368
4369 /* Update stats on number of iterations (last, min, max, total) */
4370
4371 mg_info->n_cycles[2] += n_cycles;
4372
4373 if (mg_info->n_calls[1] > 0) {
4374 if (mg_info->n_cycles[0] > n_cycles)
4375 mg_info->n_cycles[0] = n_cycles;
4376 if (mg_info->n_cycles[1] < n_cycles)
4377 mg_info->n_cycles[1] = n_cycles;
4378 }
4379 else {
4380 mg_info->n_cycles[0] = n_cycles;
4381 mg_info->n_cycles[1] = n_cycles;
4382 }
4383
4384 /* Update number of resolutions and timing data */
4385
4386 mg_info->n_calls[1] += 1;
4387 cs_timer_counter_add_diff(&(mg->info.t_tot[1]), &t0, &t1);
4388
4389 return cvg;
4390 }
4391
4392 /*----------------------------------------------------------------------------*/
4393 /*!
4394 * \brief Free multigrid sparse linear equation solver setup context.
4395 *
4396 * This function frees resolution-related data, incuding the current
4397 * grid hierarchy, but does not free the whole context,
4398 * as info used for logging (especially performance data) is maintained.
4399 *
4400 * \param[in, out] context pointer to multigrid solver info and context
4401 * (actual type: cs_multigrid_t *)
4402 */
4403 /*----------------------------------------------------------------------------*/
4404
4405 void
cs_multigrid_free(void * context)4406 cs_multigrid_free(void *context)
4407 {
4408 cs_multigrid_t *mg = context;
4409
4410 cs_timer_t t0, t1;
4411
4412 /* Initialization */
4413
4414 t0 = cs_timer_time();
4415
4416 /* Free subgrid info first if needed */
4417
4418 for (int i = 0; i < 3; i++) {
4419 if (mg->lv_mg[i] != NULL)
4420 cs_multigrid_free(mg->lv_mg[i]);
4421 }
4422
4423 if (mg->setup_data != NULL) {
4424
4425 cs_multigrid_setup_data_t *mgd = mg->setup_data;
4426
4427 /* Free coarse solution data */
4428
4429 BFT_FREE(mgd->rhs_vx);
4430 BFT_FREE(mgd->rhs_vx_buf);
4431
4432 /* Destroy solver hierarchy */
4433
4434 for (int i = mgd->n_levels - 1; i > -1; i--) {
4435 for (int j = 0; j < 2; j++) {
4436 cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2+j]);
4437 if (mg_sles->context != NULL && mg_sles->destroy_func)
4438 mg_sles->destroy_func(&(mg_sles->context));
4439 }
4440 }
4441 BFT_FREE(mgd->sles_hierarchy);
4442
4443 /* Destroy grid hierarchy */
4444
4445 for (int i = mgd->n_levels - 1; i > -1; i--)
4446 cs_grid_destroy(mgd->grid_hierarchy + i);
4447 BFT_FREE(mgd->grid_hierarchy);
4448
4449 /* Destroy peconditioning-only arrays */
4450
4451 BFT_FREE(mgd->pc_name);
4452 BFT_FREE(mgd->pc_aux);
4453
4454 BFT_FREE(mg->setup_data);
4455 }
4456
4457 /* Update timers */
4458
4459 t1 = cs_timer_time();
4460 cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1);
4461 }
4462
4463 /*----------------------------------------------------------------------------*/
4464 /*!
4465 * \brief Create a multigrid preconditioner.
4466 *
4467 * \param[in] mg_type type of multigrid algorithm to use
4468 *
4469 * \return pointer to newly created preconditioner object.
4470 */
4471 /*----------------------------------------------------------------------------*/
4472
4473 cs_sles_pc_t *
cs_multigrid_pc_create(cs_multigrid_type_t mg_type)4474 cs_multigrid_pc_create(cs_multigrid_type_t mg_type)
4475 {
4476 cs_multigrid_t *mg = _multigrid_pc_create(mg_type);
4477
4478 mg->info.is_pc = true;
4479
4480 cs_sles_pc_t *pc = cs_sles_pc_define(mg,
4481 _multigrid_pc_get_type,
4482 _multigrid_pc_setup,
4483 _multigrid_pc_tolerance_t,
4484 _multigrid_pc_apply,
4485 cs_multigrid_free,
4486 cs_multigrid_log,
4487 cs_multigrid_copy,
4488 cs_multigrid_destroy);
4489
4490 return pc;
4491 }
4492
4493 /*----------------------------------------------------------------------------*/
4494 /*!
4495 * \brief Error handler for multigrid sparse linear equation solver.
4496 *
4497 * In case of divergence or breakdown, this error handler outputs
4498 * postprocessing data to assist debugging, then aborts the run.
4499 * It does nothing in case the maximum iteration count is reached.
4500
4501 * \param[in, out] sles pointer to solver object
4502 * \param[in] state convergence state
4503 * \param[in] a matrix
4504 * \param[in] rhs right hand side
4505 * \param[in, out] vx system solution
4506 *
4507 * \return false (do not attempt new solve)
4508 */
4509 /*----------------------------------------------------------------------------*/
4510
4511 bool
cs_multigrid_error_post_and_abort(cs_sles_t * sles,cs_sles_convergence_state_t state,const cs_matrix_t * a,const cs_real_t rhs[],cs_real_t vx[])4512 cs_multigrid_error_post_and_abort(cs_sles_t *sles,
4513 cs_sles_convergence_state_t state,
4514 const cs_matrix_t *a,
4515 const cs_real_t rhs[],
4516 cs_real_t vx[])
4517 {
4518 if (state >= CS_SLES_MAX_ITERATION)
4519 return false;
4520
4521 const cs_multigrid_t *mg = cs_sles_get_context(sles);
4522 const char *name = cs_sles_get_name(sles);
4523
4524 cs_multigrid_setup_data_t *mgd = mg->setup_data;
4525 if (mgd == NULL)
4526 return false;
4527
4528 int level = mgd->exit_level;
4529
4530 int mesh_id = cs_post_init_error_writer_cells();
4531 int location_id = CS_MESH_LOCATION_CELLS;
4532
4533 const cs_range_set_t *rs = NULL;
4534
4535 if (mesh_id != 0) {
4536
4537 char var_name[32];
4538
4539 int lv_id = 0;
4540 cs_real_t *var = NULL, *da = NULL;
4541
4542 cs_lnum_t db_size[4] = {1, 1, 1, 1};
4543 cs_lnum_t eb_size[4] = {1, 1, 1, 1};
4544
4545 const cs_grid_t *g = mgd->grid_hierarchy[0];
4546 const cs_lnum_t n_base_rows = cs_grid_get_n_rows(g);
4547 const cs_matrix_t *_matrix = NULL;
4548
4549 BFT_MALLOC(var, cs_grid_get_n_cols_ext(g), cs_real_t);
4550 BFT_MALLOC(da, cs_grid_get_n_cols_ext(g), cs_real_t);
4551
4552 /* Output info on main level */
4553
4554 cs_sles_post_error_output_def(name,
4555 mesh_id,
4556 a,
4557 rhs,
4558 vx);
4559
4560 /* Output diagonal and diagonal dominance for all coarse levels */
4561
4562 for (lv_id = 1; lv_id < (int)(mgd->n_levels); lv_id++) {
4563
4564 g = mgd->grid_hierarchy[lv_id];
4565
4566 cs_grid_get_info(g,
4567 NULL,
4568 NULL,
4569 db_size,
4570 eb_size,
4571 NULL,
4572 NULL,
4573 NULL,
4574 NULL,
4575 NULL);
4576
4577 _matrix = cs_grid_get_matrix(g);
4578
4579 cs_matrix_copy_diagonal(_matrix, da);
4580 cs_grid_project_var(g, n_base_rows, da, var);
4581 cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4582 sprintf(var_name, "Diag_%04d", lv_id);
4583 cs_sles_post_output_var(var_name,
4584 mesh_id,
4585 location_id,
4586 CS_POST_WRITER_ERRORS,
4587 db_size[1],
4588 var);
4589
4590 cs_grid_project_diag_dom(g, n_base_rows, var);
4591 cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4592 sprintf(var_name, "Diag_Dom_%04d", lv_id);
4593 cs_sles_post_output_var(var_name,
4594 mesh_id,
4595 location_id,
4596 CS_POST_WRITER_ERRORS,
4597 db_size[1],
4598 var);
4599 }
4600
4601 /* Output info on current level if > 0 */
4602
4603 if (level > 0) {
4604
4605 cs_lnum_t ii;
4606 cs_lnum_t n_cells = 0;
4607 cs_lnum_t n_cols_ext = 0;
4608
4609 cs_real_t *c_res = NULL;
4610
4611 g = mgd->grid_hierarchy[level];
4612
4613 cs_grid_get_info(g,
4614 NULL,
4615 NULL,
4616 db_size,
4617 eb_size,
4618 NULL,
4619 &n_cells,
4620 &n_cols_ext,
4621 NULL,
4622 NULL);
4623
4624 cs_grid_project_var(g, n_base_rows, mgd->rhs_vx[level*2], var);
4625 cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4626 sprintf(var_name, "RHS_%04d", level);
4627 cs_sles_post_output_var(var_name,
4628 mesh_id,
4629 location_id,
4630 CS_POST_WRITER_ERRORS,
4631 db_size[1],
4632 var);
4633
4634 cs_grid_project_var(g, n_base_rows, mgd->rhs_vx[level*2+1], var);
4635 cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4636 sprintf(var_name, "X_%04d", level);
4637 cs_sles_post_output_var(var_name,
4638 mesh_id,
4639 location_id,
4640 CS_POST_WRITER_ERRORS,
4641 db_size[1],
4642 var);
4643
4644 /* Compute residual */
4645
4646 BFT_MALLOC(c_res, n_cols_ext*db_size[1], cs_real_t);
4647
4648 _matrix = cs_grid_get_matrix(g);
4649
4650 cs_matrix_vector_multiply(_matrix,
4651 mgd->rhs_vx[level*2+1],
4652 c_res);
4653
4654 const cs_real_t *c_rhs_lv = mgd->rhs_vx[level*2];
4655 for (ii = 0; ii < n_cells; ii++) {
4656 for (cs_lnum_t i = 0; i < db_size[0]; i++)
4657 c_res[ii*db_size[1] + i]
4658 = fabs(c_res[ii*db_size[1] + i] - c_rhs_lv[ii*db_size[1] + i]);
4659 }
4660
4661 cs_grid_project_var(g, n_base_rows, c_res, var);
4662 cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var);
4663
4664 BFT_FREE(c_res);
4665
4666 sprintf(var_name, "Residual_%04d", level);
4667 cs_sles_post_output_var(var_name,
4668 mesh_id,
4669 location_id,
4670 CS_POST_WRITER_ERRORS,
4671 db_size[1],
4672 var);
4673 }
4674
4675 cs_post_finalize();
4676
4677 BFT_FREE(da);
4678 BFT_FREE(var);
4679 }
4680
4681 /* Now abort */
4682
4683 const char *error_type[] = {N_("divergence"), N_("breakdown")};
4684 int err_id = (state == CS_SLES_BREAKDOWN) ? 1 : 0;
4685
4686 if (level == 0)
4687 bft_error(__FILE__, __LINE__, 0,
4688 _("algebraic multigrid [%s]: %s after %d cycles:\n"
4689 " initial residual: %11.4e; current residual: %11.4e"),
4690 name, _(error_type[err_id]), mgd->exit_cycle_id,
4691 mgd->exit_initial_residue, mgd->exit_residue);
4692 else
4693 bft_error(__FILE__, __LINE__, 0,
4694 _("algebraic multigrid [%s]: %s after %d cycles\n"
4695 " during resolution at level %d:\n"
4696 " initial residual: %11.4e; current residual: %11.4e"),
4697 name, _(error_type[err_id]),
4698 mgd->exit_cycle_id, level,
4699 mgd->exit_initial_residue, mgd->exit_residue);
4700
4701 return false;
4702 }
4703
4704 /*----------------------------------------------------------------------------*/
4705 /*!
4706 * \brief Set plotting options for multigrid.
4707 *
4708 * \param[in, out] mg pointer to multigrid info and context
4709 * \param[in] base_name base plot name to activate, NULL otherwise
4710 * \param[in] use_iteration if true, use iteration as time stamp
4711 * otherwise, use wall clock time
4712 */
4713 /*----------------------------------------------------------------------------*/
4714
4715 void
cs_multigrid_set_plot_options(cs_multigrid_t * mg,const char * base_name,bool use_iteration)4716 cs_multigrid_set_plot_options(cs_multigrid_t *mg,
4717 const char *base_name,
4718 bool use_iteration)
4719 {
4720 if (mg != NULL) {
4721 if (cs_glob_rank_id < 1 && base_name != NULL) {
4722
4723 /* Destroy previous plot if options reset */
4724 if (mg->cycle_plot != NULL)
4725 cs_time_plot_finalize(&(mg->cycle_plot));
4726
4727 /* Create new plot */
4728 cs_file_mkdir_default("monitoring");
4729 const char *probe_names[] = {base_name};
4730 mg->cycle_plot = cs_time_plot_init_probe(base_name,
4731 "monitoring/residue_",
4732 CS_TIME_PLOT_CSV,
4733 use_iteration,
4734 -1., /* force flush */
4735 0, /* no buffer */
4736 1, /* n_probes */
4737 NULL, /* probe_list */
4738 NULL, /* probe_coords */
4739 probe_names);
4740
4741 if (use_iteration)
4742 mg->plot_time_stamp = 0;
4743 }
4744 }
4745 }
4746
4747 /*----------------------------------------------------------------------------*/
4748 /*!
4749 * \brief Query the global multigrid parameters for parallel grid merging.
4750 *
4751 * \param[in] mg pointer to multigrid info and context
4752 * \param[out] rank_stride number of ranks over which merging
4753 * takes place, or NULL
4754 * \param[out] rows_mean_threshold mean number of rows under which merging
4755 * should be applied, or NULL
4756 * \param[out] rows_glob_threshold global number of rows under which
4757 * merging should be applied, or NULL
4758 */
4759 /*----------------------------------------------------------------------------*/
4760
4761 void
cs_multigrid_get_merge_options(const cs_multigrid_t * mg,int * rank_stride,int * rows_mean_threshold,cs_gnum_t * rows_glob_threshold)4762 cs_multigrid_get_merge_options(const cs_multigrid_t *mg,
4763 int *rank_stride,
4764 int *rows_mean_threshold,
4765 cs_gnum_t *rows_glob_threshold)
4766 {
4767 #if defined(HAVE_MPI)
4768 if (rank_stride != NULL)
4769 *rank_stride = mg->merge_stride;
4770 if (rows_mean_threshold != NULL)
4771 *rows_mean_threshold = mg->merge_mean_threshold;
4772 if (rows_glob_threshold != NULL)
4773 *rows_glob_threshold = mg->merge_glob_threshold;
4774 #else
4775 if (rank_stride != NULL)
4776 *rank_stride = 0;
4777 if (rows_mean_threshold != NULL)
4778 *rows_mean_threshold = 0;
4779 if (rows_glob_threshold != NULL)
4780 *rows_glob_threshold = 0;
4781 #endif
4782 }
4783
4784 /*----------------------------------------------------------------------------*/
4785 /*!
4786 * \brief Set global multigrid parameters for parallel grid merging behavior.
4787 *
4788 * \param[in, out] mg pointer to multigrid info and context
4789 * \param[in] rank_stride number of ranks over which merging
4790 * takes place
4791 * \param[in] rows_mean_threshold mean number of rows under which
4792 * merging should be applied
4793 * \param[in] rows_glob_threshold global number of rows under which
4794 * merging should be applied
4795 */
4796 /*----------------------------------------------------------------------------*/
4797
4798 void
cs_multigrid_set_merge_options(cs_multigrid_t * mg,int rank_stride,int rows_mean_threshold,cs_gnum_t rows_glob_threshold)4799 cs_multigrid_set_merge_options(cs_multigrid_t *mg,
4800 int rank_stride,
4801 int rows_mean_threshold,
4802 cs_gnum_t rows_glob_threshold)
4803 {
4804 #if defined(HAVE_MPI)
4805 mg->merge_stride = rank_stride;
4806 if (mg->merge_stride > cs_glob_n_ranks)
4807 mg->merge_stride = cs_glob_n_ranks;
4808 mg->merge_mean_threshold = rows_mean_threshold;
4809 mg->merge_glob_threshold = rows_glob_threshold;
4810 #endif
4811 }
4812
4813 /*----------------------------------------------------------------------------*/
4814
4815 END_C_DECLS
4816