/*============================================================================ * Multigrid solver. *============================================================================*/ /* This file is part of Code_Saturne, a general-purpose CFD tool. Copyright (C) 1998-2021 EDF S.A. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /*----------------------------------------------------------------------------*/ #include "cs_defs.h" /*---------------------------------------------------------------------------- * Standard C library headers *----------------------------------------------------------------------------*/ #include #include #include #include #include #include #if defined(HAVE_MPI) #include #endif /*---------------------------------------------------------------------------- * Local headers *----------------------------------------------------------------------------*/ #include "bft_mem.h" #include "bft_error.h" #include "bft_printf.h" #include "cs_base.h" #include "cs_blas.h" #include "cs_file.h" #include "cs_grid.h" #include "cs_halo.h" #include "cs_log.h" #include "cs_matrix.h" #include "cs_matrix_default.h" #include "cs_matrix_util.h" #include "cs_mesh.h" #include "cs_mesh_quantities.h" #include "cs_multigrid_smoother.h" #include "cs_post.h" #include "cs_sles.h" #include "cs_sles_it.h" #include "cs_sles_pc.h" #include "cs_timer.h" #include "cs_time_plot.h" #include "cs_time_step.h" /*---------------------------------------------------------------------------- * Header for the current file *----------------------------------------------------------------------------*/ #include "cs_multigrid.h" /*----------------------------------------------------------------------------*/ BEGIN_C_DECLS /*! \cond DOXYGEN_SHOULD_SKIP_THIS */ /*============================================================================= * Local Macro Definitions *============================================================================*/ #define EPZERO 1.E-12 #define RINFIN 1.E+30 #if !defined(HUGE_VAL) #define HUGE_VAL 1.E+12 #endif /* Minimum size for OpenMP loops (needs benchmarking to adjust) */ #define CS_THR_MIN 128 /* SIMD unit size to ensure SIMD alignement (2 to 4 required on most * current architectures, so 16 should be enough on most architectures * through at least 2012) */ #define CS_SIMD_SIZE(s) (((s-1)/16+1)*16) /*============================================================================= * Local Type Definitions *============================================================================*/ /*---------------------------------------------------------------------------- * Multigrid types *----------------------------------------------------------------------------*/ typedef enum { CS_MULTIGRID_MAIN, /*!< Main multigrid type */ CS_MULTIGRID_COARSE, /*!< Coarse level solver */ CS_MULTIGRID_BOTTOM, /*!< Bottom level solver */ CS_MULTIGRID_BOTTOM_SMOOTHE /*!< Bottom level smoother */ } cs_multigrid_subtype_t; /*---------------------------------------------------------------------------- * Local Structure Definitions *----------------------------------------------------------------------------*/ /* Basic linear system solver or smoother context and functions */ /*--------------------------------------------------------------*/ typedef struct _cs_mg_sles_t { void *context; /* solver context (options, state, logging) */ cs_sles_setup_t *setup_func; /* solver setup function */ cs_sles_solve_t *solve_func; /* solve function */ cs_sles_destroy_t *destroy_func; /* destruction function */ } cs_mg_sles_t; /* Basic per linear system options and logging */ /*---------------------------------------------*/ typedef struct _cs_multigrid_info_t { /* Settings */ cs_sles_it_type_t type[3]; /* Descent/ascent smoothers Coarse solver */ bool is_pc; /* True if used as preconditioner */ int n_max_cycles; /* Maximum allowed cycles */ int n_max_iter[3]; /* maximum iterations allowed (descent/ascent/coarse) */ int poly_degree[3]; /* polynomial preconditioning degree (descent/ascent/coarse) */ double precision_mult[3]; /* solver precision multiplier (descent/ascent/coarse) */ /* Logging */ unsigned n_calls[2]; /* Number of times grids built (0) or solved (1) */ unsigned long long n_levels_tot; /* Total accumulated number of grid levels built */ unsigned n_levels[3]; /* Number of grid levels: [last, min, max] */ unsigned n_cycles[3]; /* Number of cycles for system resolution: [min, max, total] */ cs_timer_counter_t t_tot[2]; /* Total time used: [build, solve] */ } cs_multigrid_info_t; /* Per level info and logging */ /*----------------------------*/ typedef struct _cs_multigrid_level_info_t { unsigned long long n_ranks[4]; /* Number of ranks for this level: [last, min, max, total] */ unsigned long long n_g_rows[4]; /* Global number of rows (last, min, max, total) */ unsigned long long n_elts[3][4]; /* Mean number of rows, rows + ghosts, and entries across ranks (last, min, max, total) */ double imbalance[3][4]; /* Imbalance for rows, rows + ghosts, and entries (last, min, max, total) */ unsigned long long n_it_solve[4]; /* Number of iterations for solving [last, min, max, total] */ unsigned long long n_it_ds_smoothe[4]; /* Number of iterations for descent smoothing: [last, min, max, total] */ unsigned long long n_it_as_smoothe[4]; /* Number of iterations for ascent smoothing: [last, min, max, total] */ unsigned n_calls[7]; /* Total number of calls: build, solve, descent smoothe, ascent smoothe, restrict from finer, prolong to finer, BLAS */ cs_timer_counter_t t_tot[7]; /* Total timers count: [build, solve, descent smoothe, ascent smoothe, restrict from finer, prolong to finer, BLAS] */ } cs_multigrid_level_info_t; /* Grid hierarchy */ /*----------------*/ typedef struct _cs_multigrid_setup_data_t { /* Setup */ unsigned n_levels; /* Current number of grid levels */ unsigned n_levels_alloc; /* Allocated number of grid levels */ cs_grid_t **grid_hierarchy; /* Array of grid pointers */ cs_mg_sles_t *sles_hierarchy; /* Contexts for associated smoothers and solvers (i*2: descent, coarse; i*2+1: ascent) */ /* Arrays used only for solving, but maintained until free, so as to be usable by convergence error handler. */ double exit_initial_residue; /* Last level initial residue */ double exit_residue; /* Last residue */ int exit_level; /* Last level during solve */ int exit_cycle_id; /* Last cycle id during solve */ cs_real_t *rhs_vx_buf; /* Coarse grid "right hand sides" and corrections buffer */ cs_real_t **rhs_vx; /* Coarse grid "right hand sides" and corrections */ /* Options used only when used as a preconditioner */ char *pc_name; /* name of preconditioning system */ int pc_verbosity; /* preconditioner verbosity */ cs_real_t *pc_aux; /* preconditioner auxiliary array */ } cs_multigrid_setup_data_t; /* Grid hierarchy */ /*----------------*/ struct _cs_multigrid_t { /* Settings */ cs_multigrid_type_t type; /* Multigrid type */ cs_multigrid_subtype_t subtype; /* Multigrid subtype */ int aggregation_limit; /* Maximum allowed fine rows per coarse cell */ int coarsening_type; /* Coarsening traversal type */ int n_levels_max; /* Maximum number of grid levels */ cs_gnum_t n_g_rows_min; /* Global number of rows on coarse grids under which no coarsening occurs */ int post_row_max; /* If > 0, activates postprocessing of coarsening, projecting coarse cell numbers (modulo post_row_max) on the base grid */ double p0p1_relax; /* p0/p1 relaxation_parameter */ double k_cycle_threshold; /* threshold for k cycle */ /* Setting for use as a preconditioner */ double pc_precision; /* preconditioner precision */ double pc_r_norm; /* preconditioner residue normalization */ /* Data for postprocessing callback */ int post_location; /* associated mesh location */ int n_levels_post; /* Current number of postprocessed levels */ int **post_row_num; /* If post_row_max > 0, array of (n_levels - 1) arrays of projected coarse cell numbers on the base grid */ int **post_row_rank; /* If post_row_max > 0 and grid merging is active, array of (n_levels - 1) arrays of projected coarse cell ranks on the base grid */ char *post_name; /* Name for postprocessing */ /* Options and maintained state (statistics) */ cs_multigrid_level_info_t *lv_info; /* Info for each level */ cs_multigrid_t *lv_mg[3]; /* Optional recursive multigrid descent, ascent, or NULL */ cs_multigrid_t *p_mg; /* Optional parent multigrid, or NULL */ cs_multigrid_info_t info; /* Base multigrid info */ /* Communicator used for reduction operations (if left at NULL, main communicator will be used) */ # if defined(HAVE_MPI) MPI_Comm comm; MPI_Comm caller_comm; # endif cs_gnum_t merge_mean_threshold; cs_gnum_t merge_glob_threshold; int merge_stride; int caller_n_ranks; /* Data available between "setup" and "solve" states */ cs_multigrid_setup_data_t *setup_data; /* setup data */ cs_time_plot_t *cycle_plot; /* plotting of cycles */ int plot_time_stamp; /* plotting time stamp; if < 0, use wall clock */ }; /*============================================================================ * Global variables *============================================================================*/ /* Multigrid type names */ const char *cs_multigrid_type_name[] = {N_("V-cycle"), N_("K-cycle"), N_("K-cycle, HPC variant"), N_("K-cycle, HPC coarse level")}; /* Tunable settings */ static int _k_cycle_hpc_merge_stride = 512; static int _k_cycle_hpc_recurse_threshold = 256; /* under this size, coarsest level solver does not use k-cycle preconditioning */ /*============================================================================ * Private function prototypes for recursive *============================================================================*/ static void _setup_k_cycle_hpc_sub(cs_multigrid_t *mg, const char *name, const cs_matrix_t *a, int n_ranks, int verbosity); /*============================================================================ * Private function definitions *============================================================================*/ /*---------------------------------------------------------------------------- * Initialize multigrid info structure. * * parameters: * name <-- system name * info <-- pointer to multigrid info structure *----------------------------------------------------------------------------*/ static void _multigrid_info_init(cs_multigrid_info_t *info) { int i; /* Options */ info->type[0] = CS_SLES_PCG; info->type[1] = CS_SLES_PCG; info->type[2] = CS_SLES_PCG; info->is_pc = false; info->n_max_cycles = 100; info->n_max_iter[0] = 2; info->n_max_iter[1] = 10; info->n_max_iter[2] = 10000; info->poly_degree[0] = -1; info->poly_degree[1] = -1; info->poly_degree[2] = -1; /* In theory, one should increase precision on coarsest mesh, but in practice, it is more efficient to have a lower precision, so we choose coarse_precision = global_precision; */ info->precision_mult[0] = -1.; info->precision_mult[1] = -1.; info->precision_mult[2] = 1.; /* Counting and timing */ for (i = 0; i < 2; i++) info->n_calls[i] = 0; info->n_levels_tot = 0; for (i = 0; i < 3; i++) { info->n_levels[i] = 0; info->n_cycles[i] = 0; } for (i = 0; i < 2; i++) CS_TIMER_COUNTER_INIT(info->t_tot[i]); } /*---------------------------------------------------------------------------- * Initialize multigrid level info structure. * * parameters: * name <-- system name * info <-- pointer to multigrid info structure *----------------------------------------------------------------------------*/ static void _multigrid_level_info_init(cs_multigrid_level_info_t *info) { int i; memset(info, 0, sizeof(cs_multigrid_level_info_t)); for (i = 0; i < 3; i++) { info->imbalance[i][0] = HUGE_VALF; info->imbalance[i][1] = 0.; } for (i = 0; i < 7; i++) CS_TIMER_COUNTER_INIT(info->t_tot[i]); } /*---------------------------------------------------------------------------- * Output information regarding multigrid options. * * parameters: * mg <-> pointer to multigrid structure *----------------------------------------------------------------------------*/ static void _multigrid_setup_log(const cs_multigrid_t *mg) { if (mg->info.is_pc == false) cs_log_printf(CS_LOG_SETUP, _(" Solver type: multigrid\n")); else cs_log_printf(CS_LOG_SETUP, _(" Multigrid preconditioner parameters:\n")); cs_log_printf(CS_LOG_SETUP, _(" Coarsening type: %s\n" " Max fine rows per coarse row: %d\n" " Maximum number of levels : %d\n" " Minimum number of coarse rows: %llu\n" " P0/P1 relaxation parameter: %g\n" " Maximum number of cycles: %d\n"), _(cs_grid_coarsening_type_name[mg->coarsening_type]), mg->aggregation_limit, mg->n_levels_max, (unsigned long long)(mg->n_g_rows_min), mg->p0p1_relax, mg->info.n_max_cycles); #if defined(HAVE_MPI) if (cs_glob_n_ranks > 1) cs_log_printf(CS_LOG_SETUP, _("\n" " Rank merge parameters:\n" " merge rank stride: %d\n" " mean coarse rows threshold: %d\n" " total coarse rows threshold: %llu\n"), mg->merge_stride, (int)(mg->merge_mean_threshold), (unsigned long long)(mg->merge_glob_threshold)); #endif cs_log_printf(CS_LOG_SETUP, _(" Cycle type: %s\n"), _(cs_multigrid_type_name[mg->type])); const char *stage_name[] = {"Descent smoother", "Ascent smoother", "Coarsest level solver"}; for (int i = 0; i < 3; i++) { if ( mg->info.type[i] != CS_SLES_N_IT_TYPES && mg->info.type[i] < CS_SLES_N_SMOOTHER_TYPES) { cs_log_printf(CS_LOG_SETUP, _(" %s:\n" " Type: %s\n"), _(stage_name[i]), _(cs_sles_it_type_name[mg->info.type[i]])); if (mg->info.poly_degree[i] > -1) { cs_log_printf(CS_LOG_SETUP, _(" Preconditioning: ")); if (mg->info.poly_degree[i] == 0) cs_log_printf(CS_LOG_SETUP, _("Jacobi\n")); else if (mg->info.poly_degree[i] < 0) { if (mg->lv_mg[i] != NULL) { cs_log_printf(CS_LOG_SETUP, "%s\n", _(cs_multigrid_type_name[mg->lv_mg[i]->type])); } else cs_log_printf(CS_LOG_SETUP, _("None\n")); } else cs_log_printf(CS_LOG_SETUP, _("polynomial, degree %d\n"), mg->info.poly_degree[i]); } cs_log_printf(CS_LOG_SETUP, _(" Maximum number of iterations: %d\n" " Precision multiplier: %g\n"), mg->info.n_max_iter[i], mg->info.precision_mult[i]); } else if (mg->lv_mg[i] != NULL) { cs_log_printf(CS_LOG_SETUP, " %s:\n", _(stage_name[i])); _multigrid_setup_log(mg->lv_mg[i]); } } cs_log_printf(CS_LOG_SETUP, _(" Postprocess coarsening: %d\n"), mg->post_row_max); } /*---------------------------------------------------------------------------- * Output information regarding multigrid resolution. * * parameters: * mg <-> pointer to multigrid structure *----------------------------------------------------------------------------*/ static void _multigrid_performance_log(const cs_multigrid_t *mg) { unsigned i; unsigned long long n_builds_denom = CS_MAX(mg->info.n_calls[0], 1); unsigned long long n_solves_denom = CS_MAX(mg->info.n_calls[1], 1); int n_lv_min = mg->info.n_levels[1]; int n_lv_max = mg->info.n_levels[2]; int n_lv_mean = (int)(mg->info.n_levels_tot / n_builds_denom); int n_cy_mean = (int)(mg->info.n_cycles[2] / n_solves_denom); char tmp_s[7][64] = {"", "", "", "", "", "", ""}; const char *stage_name[2] = {N_("Construction:"), N_("Resolution:")}; const char *lv_stage_name[7] = {N_("build:"), N_("solve:"), N_("descent smoothe:"), N_("ascent smoothe:"), N_("restrict:"), N_("prolong:"), N_("BLAS")}; cs_log_printf(CS_LOG_PERFORMANCE, _("\n" " Multigrid:\n" " %s\n" " Coarsening: %s\n"), _(cs_multigrid_type_name[mg->type]), _(cs_grid_coarsening_type_name[mg->coarsening_type])); if ( mg->info.type[0] != CS_SLES_N_IT_TYPES && mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) { const char *descent_smoother_name = cs_sles_it_type_name[mg->info.type[0]]; const char *ascent_smoother_name = cs_sles_it_type_name[mg->info.type[1]]; if (mg->info.type[0] == mg->info.type[1]) cs_log_printf(CS_LOG_PERFORMANCE, _(" Smoother: %s\n"), _(descent_smoother_name)); else cs_log_printf(CS_LOG_PERFORMANCE, _(" Descent smoother: %s\n" " Ascent smoother: %s\n"), _(descent_smoother_name), _(ascent_smoother_name)); cs_log_printf(CS_LOG_PERFORMANCE, _(" Coarsest level solver: %s\n"), _(cs_sles_it_type_name[mg->info.type[2]])); } sprintf(tmp_s[0], "%-36s", ""); cs_log_strpadl(tmp_s[1], _(" mean"), 12, 64); cs_log_strpadl(tmp_s[2], _("minimum"), 12, 64); cs_log_strpadl(tmp_s[3], _("maximum"), 12, 64); cs_log_printf(CS_LOG_PERFORMANCE, "\n %s %s %s %s\n", tmp_s[0], tmp_s[1], tmp_s[2], tmp_s[3]); cs_log_strpad(tmp_s[0], _("Number of levels:"), 36, 64); cs_log_strpad(tmp_s[1], _("Number of cycles:"), 36, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12d %12d %12d\n", tmp_s[0], n_lv_mean, n_lv_min, n_lv_max); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12d %12d %12d\n\n", tmp_s[1], n_cy_mean, (int)(mg->info.n_cycles[0]), (int)(mg->info.n_cycles[1])); cs_log_timer_array_header(CS_LOG_PERFORMANCE, 2, /* indent, */ "", /* header title */ true); /* calls column */ cs_log_timer_array(CS_LOG_PERFORMANCE, 2, /* indent, */ 2, /* n_lines */ stage_name, mg->info.n_calls, mg->info.t_tot); sprintf(tmp_s[0], "%-36s", ""); cs_log_strpadl(tmp_s[1], _(" mean"), 12, 64); cs_log_strpadl(tmp_s[2], _("minimum"), 12, 64); cs_log_strpadl(tmp_s[3], _("maximum"), 12, 64); cs_log_printf(CS_LOG_PERFORMANCE, "\n %s %s %s %s\n", tmp_s[0], tmp_s[1], tmp_s[2], tmp_s[3]); for (i = 0; i < mg->info.n_levels[2]; i++) { const cs_multigrid_level_info_t *lv_info = mg->lv_info + i; unsigned long long n_lv_builds = lv_info->n_calls[0]; if (n_lv_builds < 1) continue; cs_log_strpad(tmp_s[0], _("Number of rows:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, _(" Grid level %d:\n" " %s %12llu %12llu %12llu\n"), i, tmp_s[0], lv_info->n_g_rows[3] / n_lv_builds, lv_info->n_g_rows[1], lv_info->n_g_rows[2]); if (mg->caller_n_ranks == 1) { cs_log_strpad(tmp_s[1], _("Number of entries:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12llu %12llu %12llu\n", tmp_s[1], lv_info->n_elts[2][3] / n_lv_builds, lv_info->n_elts[2][1], lv_info->n_elts[2][2]); } #if defined(HAVE_MPI) if (mg->caller_n_ranks > 1) { cs_log_strpad(tmp_s[0], _("Number of active ranks:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12llu %12llu %12llu\n", tmp_s[0], lv_info->n_ranks[3] / n_lv_builds, lv_info->n_ranks[1], lv_info->n_ranks[2]); cs_log_strpad(tmp_s[0], _("Mean local rows:"), 34, 64); cs_log_strpad(tmp_s[1], _("Mean local columns + ghosts:"), 34, 64); cs_log_strpad(tmp_s[2], _("Mean local entries:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12llu %12llu %12llu\n" " %s %12llu %12llu %12llu\n" " %s %12llu %12llu %12llu\n", tmp_s[0], lv_info->n_elts[0][3] / n_lv_builds, lv_info->n_elts[0][1], lv_info->n_elts[0][2], tmp_s[1], lv_info->n_elts[1][3] / n_lv_builds, lv_info->n_elts[1][1], lv_info->n_elts[1][2], tmp_s[2], lv_info->n_elts[2][3] / n_lv_builds, lv_info->n_elts[2][1], lv_info->n_elts[2][2]); cs_log_strpad(tmp_s[0], _("Rows imbalance:"), 34, 64); cs_log_strpad(tmp_s[1], _("Columns + ghosts imbalance:"), 34, 64); cs_log_strpad(tmp_s[2], _("entries imbalance"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %-34s %12.3f %12.3f %12.3f\n" " %-34s %12.3f %12.3f %12.3f\n" " %-34s %12.3f %12.3f %12.3f\n", tmp_s[0], lv_info->imbalance[0][3] / n_lv_builds, lv_info->imbalance[0][1], lv_info->imbalance[0][2], tmp_s[1], lv_info->imbalance[1][3] / n_lv_builds, lv_info->imbalance[1][1], lv_info->imbalance[1][2], tmp_s[2], lv_info->imbalance[2][3] / n_lv_builds, lv_info->imbalance[2][1], lv_info->imbalance[2][2]); } #endif if (lv_info->n_calls[1] > 0) { cs_log_strpad(tmp_s[0], _("Iterations for solving:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12llu %12llu %12llu\n", tmp_s[0], lv_info->n_it_solve[3] / lv_info->n_calls[1], lv_info->n_it_solve[1], lv_info->n_it_solve[2]); } if (lv_info->n_calls[2] > 0) { cs_log_strpad(tmp_s[1], _("Descent smoother iterations:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12llu %12llu %12llu\n", tmp_s[1], lv_info->n_it_ds_smoothe[3] / lv_info->n_calls[2], lv_info->n_it_ds_smoothe[1], lv_info->n_it_ds_smoothe[2]); } if (lv_info->n_calls[3] > 0) { cs_log_strpad(tmp_s[2], _("Ascent smoother iterations:"), 34, 64); cs_log_printf(CS_LOG_PERFORMANCE, " %s %12llu %12llu %12llu\n", tmp_s[2], lv_info->n_it_as_smoothe[3] / lv_info->n_calls[3], lv_info->n_it_as_smoothe[1], lv_info->n_it_as_smoothe[2]); } } cs_log_timer_array_header(CS_LOG_PERFORMANCE, 2, /* indent, */ "", /* header title */ true); /* calls column */ for (i = 0; i < mg->info.n_levels[2]; i++) { const cs_multigrid_level_info_t *lv_info = mg->lv_info + i; cs_log_printf(CS_LOG_PERFORMANCE, _(" Grid level %d:\n"), i); cs_log_timer_array(CS_LOG_PERFORMANCE, 4, /* indent, */ 7, /* n_lines */ lv_stage_name, lv_info->n_calls, lv_info->t_tot); } { const char *names[] = {N_("coarse level descent smoother"), N_("coarse level ascent smoother"), N_("bottom level solver")}; for (i = 0; i < 3; i++) { if (mg->lv_mg[i] != NULL) { cs_log_printf(CS_LOG_PERFORMANCE, _("\n" " Nested %s:\n"), _(names[i])); cs_multigrid_log(mg->lv_mg[i], CS_LOG_PERFORMANCE); } } } } /*---------------------------------------------------------------------------- * Create empty structure used to maintain setup data * (between cs_sles_setup and cs_sles_free type calls. * * returns: * pointer to multigrid setup data structure *----------------------------------------------------------------------------*/ static cs_multigrid_setup_data_t * _multigrid_setup_data_create(void) { cs_multigrid_setup_data_t *mgd; BFT_MALLOC(mgd, 1, cs_multigrid_setup_data_t); mgd->n_levels = 0; mgd->n_levels_alloc = 0; mgd->grid_hierarchy = NULL; mgd->sles_hierarchy = NULL; mgd->exit_initial_residue = -1.; mgd->exit_residue = -1.; mgd->exit_level = -1.; mgd->exit_cycle_id = -1.; mgd->rhs_vx_buf = NULL; mgd->rhs_vx = NULL; mgd->pc_name = NULL; mgd->pc_aux = NULL; mgd->pc_verbosity = 0; return mgd; } /*---------------------------------------------------------------------------- * Add grid to multigrid structure hierarchy. * * parameters: * mg <-- multigrid structure * grid <-- grid to add *----------------------------------------------------------------------------*/ static void _multigrid_add_level(cs_multigrid_t *mg, cs_grid_t *grid) { cs_multigrid_setup_data_t *mgd = mg->setup_data; unsigned ii; /* Reallocate arrays if necessary */ if (mgd->n_levels == mgd->n_levels_alloc) { /* Max previous */ unsigned int n_lv_max_prev = CS_MAX(mg->info.n_levels[2], mgd->n_levels); if (mgd->n_levels_alloc == 0) { mgd->n_levels_alloc = n_lv_max_prev; if (mgd->n_levels_alloc == 0) mgd->n_levels_alloc = 10; } else mgd->n_levels_alloc *= 2; BFT_REALLOC(mgd->grid_hierarchy, mgd->n_levels_alloc, cs_grid_t *); BFT_REALLOC(mgd->sles_hierarchy, mgd->n_levels_alloc*2, cs_mg_sles_t); for (ii = mgd->n_levels; ii < mgd->n_levels_alloc*2; ii++) { mgd->sles_hierarchy[ii].context = NULL; mgd->sles_hierarchy[ii].setup_func = NULL; mgd->sles_hierarchy[ii].solve_func = NULL; mgd->sles_hierarchy[ii].destroy_func = NULL; } if (n_lv_max_prev < mgd->n_levels_alloc) { BFT_REALLOC(mg->lv_info, mgd->n_levels_alloc, cs_multigrid_level_info_t); for (ii = n_lv_max_prev; ii < mgd->n_levels_alloc; ii++) _multigrid_level_info_init(mg->lv_info + ii); } } /* Add new grid to hierarchy */ mgd->grid_hierarchy[mgd->n_levels] = grid; if (mg->post_row_num != NULL) { int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1; BFT_REALLOC(mg->post_row_num, mgd->n_levels_alloc, int *); for (ii = n_max_post_levels + 1; ii < mgd->n_levels_alloc; ii++) mg->post_row_num[ii] = NULL; } if (mg->post_row_rank != NULL) { int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1; BFT_REALLOC(mg->post_row_rank, mgd->n_levels_alloc, int *); for (ii = n_max_post_levels + 1; ii < mgd->n_levels_alloc; ii++) mg->post_row_rank[ii] = NULL; } /* Update associated info */ { int n_ranks; cs_lnum_t n_rows, n_rows_with_ghosts, n_entries; cs_gnum_t n_g_rows; cs_multigrid_level_info_t *lv_info = mg->lv_info + mgd->n_levels; cs_grid_get_info(grid, NULL, NULL, NULL, NULL, &n_ranks, &n_rows, &n_rows_with_ghosts, &n_entries, &n_g_rows); mg->info.n_levels[0] = mgd->n_levels + 1; lv_info->n_ranks[0] = n_ranks; if (lv_info->n_ranks[1] > (unsigned)n_ranks) lv_info->n_ranks[1] = n_ranks; else if (lv_info->n_ranks[2] < (unsigned)n_ranks) lv_info->n_ranks[2] = n_ranks; lv_info->n_ranks[3] += n_ranks; lv_info->n_g_rows[0] = n_g_rows; if (lv_info->n_g_rows[1] > n_g_rows) lv_info->n_g_rows[1] = n_g_rows; else if (lv_info->n_g_rows[2] < n_g_rows) lv_info->n_g_rows[2] = n_g_rows; lv_info->n_g_rows[3] += n_g_rows; lv_info->n_elts[0][0] = n_rows; lv_info->n_elts[1][0] = n_rows_with_ghosts; lv_info->n_elts[2][0] = n_entries; for (ii = 0; ii < 3; ii++) { if (lv_info->n_elts[ii][1] > lv_info->n_elts[ii][0]) lv_info->n_elts[ii][1] = lv_info->n_elts[ii][0]; else if (lv_info->n_elts[ii][2] < lv_info->n_elts[ii][0]) lv_info->n_elts[ii][2] = lv_info->n_elts[ii][0]; lv_info->n_elts[ii][3] += lv_info->n_elts[ii][0]; } #if defined(HAVE_MPI) if (mg->caller_n_ranks > 1) { cs_gnum_t tot_sizes[3], max_sizes[3]; cs_gnum_t loc_sizes[3] = {n_rows, n_rows_with_ghosts, n_entries}; MPI_Allreduce(loc_sizes, tot_sizes, 3, CS_MPI_GNUM, MPI_SUM, mg->caller_comm); MPI_Allreduce(loc_sizes, max_sizes, 3, CS_MPI_GNUM, MPI_MAX, mg->caller_comm); for (ii = 0; ii < 3; ii++) { lv_info->imbalance[ii][0] = ( max_sizes[ii] / (tot_sizes[ii]*1.0/n_ranks)) - 1.0; if (lv_info->imbalance[ii][1] > lv_info->imbalance[ii][0]) lv_info->imbalance[ii][1] = lv_info->imbalance[ii][0]; else if (lv_info->imbalance[ii][2] < lv_info->imbalance[ii][0]) lv_info->imbalance[ii][2] = lv_info->imbalance[ii][0]; lv_info->imbalance[ii][3] += lv_info->imbalance[ii][0]; } } #endif /* defined(HAVE_MPI) */ if (lv_info->n_calls[0] == 0) { lv_info->n_ranks[1] = n_ranks; lv_info->n_g_rows[1] = n_g_rows; for (ii = 0; ii < 3; ii++) { lv_info->n_elts[ii][1] = lv_info->n_elts[ii][0]; #if defined(HAVE_MPI) lv_info->imbalance[ii][1] = lv_info->imbalance[ii][0]; #endif } } lv_info->n_calls[0] += 1; } /* Ready for next level */ mgd->n_levels += 1; } /*---------------------------------------------------------------------------- * Add postprocessing info to multigrid hierarchy * * parameters: * mg <-> multigrid structure * name <-- postprocessing name * post_location <-- where to postprocess (cells or vertices) * n_base_cells <-- number of cells in base grid *----------------------------------------------------------------------------*/ static void _multigrid_add_post(cs_multigrid_t *mg, const char *name, int post_location, cs_lnum_t n_base_rows) { cs_multigrid_setup_data_t *mgd = mg->setup_data; int ii; assert(mg != NULL); if (mg->post_row_max < 1) return; mg->post_location = post_location; mg->n_levels_post = mgd->n_levels - 1; BFT_REALLOC(mg->post_name, strlen(name) + 1, char); strcpy(mg->post_name, name); assert(mg->n_levels_post <= mg->n_levels_max); /* Reallocate arrays if necessary */ if (mg->post_row_num == NULL) { BFT_MALLOC(mg->post_row_num, mg->n_levels_max, int *); for (ii = 0; ii < mg->n_levels_max; ii++) mg->post_row_num[ii] = NULL; } if (mg->post_row_rank == NULL && mg->merge_stride > 1) { BFT_MALLOC(mg->post_row_rank, mg->n_levels_max, int *); for (ii = 0; ii < mg->n_levels_max; ii++) mg->post_row_rank[ii] = NULL; } for (ii = 0; ii < mg->n_levels_post; ii++) { BFT_REALLOC(mg->post_row_num[ii], n_base_rows, int); cs_grid_project_row_num(mgd->grid_hierarchy[ii+1], n_base_rows, mg->post_row_max, mg->post_row_num[ii]); } if (mg->post_row_rank != NULL) { for (ii = 0; ii < mg->n_levels_post; ii++) { BFT_REALLOC(mg->post_row_rank[ii], n_base_rows, int); cs_grid_project_row_rank(mgd->grid_hierarchy[ii+1], n_base_rows, mg->post_row_rank[ii]); } } } /*---------------------------------------------------------------------------- * Post process variables associated with Multigrid hierarchy * * parameters: * mgh <-- multigrid hierarchy * ts <-- time step status structure *----------------------------------------------------------------------------*/ static void _cs_multigrid_post_function(void *mgh, const cs_time_step_t *ts) { CS_UNUSED(ts); int ii; size_t name_len; char *var_name = NULL; cs_multigrid_t *mg = mgh; const char *base_name = NULL; /* Return if necessary structures inconsistent or have been destroyed */ if (mg == NULL) return; if (mg->post_row_num == NULL || cs_post_mesh_exists(-1) != true) return; int *s_num = NULL; const cs_range_set_t *rs = NULL; if (mg->post_location == CS_MESH_LOCATION_VERTICES) { BFT_MALLOC(s_num, cs_glob_mesh->n_vertices, int); rs = cs_glob_mesh->vtx_range_set; } /* Allocate name buffer */ base_name = mg->post_name; name_len = 3 + strlen(base_name) + 1 + 3 + 1 + 4 + 1; BFT_MALLOC(var_name, name_len, char); /* Loop on grid levels */ for (ii = 0; ii < mg->n_levels_post; ii++) { sprintf(var_name, "mg %s %2d", base_name, (ii+1)); if (mg->post_location == CS_MESH_LOCATION_CELLS) cs_post_write_var(CS_POST_MESH_VOLUME, CS_POST_WRITER_ALL_ASSOCIATED, var_name, 1, false, true, CS_POST_TYPE_int, mg->post_row_num[ii], NULL, NULL, cs_glob_time_step); else if (mg->post_location == CS_MESH_LOCATION_VERTICES) { cs_range_set_scatter(rs, CS_INT_TYPE, 1, mg->post_row_num[ii], s_num); cs_post_write_vertex_var(CS_POST_MESH_VOLUME, CS_POST_WRITER_ALL_ASSOCIATED, var_name, 1, false, true, CS_POST_TYPE_int, s_num, cs_glob_time_step); } else bft_error(__FILE__, __LINE__, 0, "%s: Invalid location for post-processing.\n", __func__); BFT_FREE(mg->post_row_num[ii]); if (mg->post_row_rank != NULL) { sprintf(var_name, "rk %s %2d", base_name, (ii+1)); if (mg->post_location == CS_MESH_LOCATION_CELLS) cs_post_write_var(CS_POST_MESH_VOLUME, CS_POST_WRITER_ALL_ASSOCIATED, var_name, 1, false, true, CS_POST_TYPE_int, mg->post_row_rank[ii], NULL, NULL, cs_glob_time_step); else if (mg->post_location == CS_MESH_LOCATION_VERTICES) { cs_range_set_scatter(rs, CS_INT_TYPE, 1, mg->post_row_rank[ii], s_num); cs_post_write_vertex_var(CS_POST_MESH_VOLUME, CS_POST_WRITER_ALL_ASSOCIATED, var_name, 1, false, true, CS_POST_TYPE_int, s_num, cs_glob_time_step); } BFT_FREE(mg->post_row_rank[ii]); } } mg->n_levels_post = 0; BFT_FREE(s_num); BFT_FREE(var_name); } /*---------------------------------------------------------------------------- * Function returning the type name of polynomial preconditioner context. * * parameters: * context <-- pointer to preconditioner context * logging <-- if true, logging description; if false, canonical name *----------------------------------------------------------------------------*/ static const char * _multigrid_pc_get_type(const void *context, bool logging) { CS_UNUSED(context); if (logging == false) { static const char t[] = "multigrid"; return t; } else { static const char t[] = N_("Multigrid"); return _(t); } } /*---------------------------------------------------------------------------- * Function for setup of a multigrid preconditioner. * * parameters: * context <-> pointer to preconditioner context * name <-- pointer to name of associated linear system * a <-- matrix * verbosity <-- associated verbosity *----------------------------------------------------------------------------*/ static void _multigrid_pc_setup(void *context, const char *name, const cs_matrix_t *a, int verbosity) { cs_multigrid_setup(context, name, a, verbosity); cs_multigrid_t *mg = context; cs_multigrid_setup_data_t *mgd = mg->setup_data; BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char); strcpy(mgd->pc_name, name); } /*---------------------------------------------------------------------------- * Function for setup of a multigrid preconditioner. * * parameters: * context <-> pointer to preconditioner context * name <-- pointer to name of associated linear system * a <-- matrix * verbosity <-- associated verbosity *----------------------------------------------------------------------------*/ static void _multigrid_pc_setup_k_sub(void *context, const char *name, const cs_matrix_t *a, int verbosity) { cs_multigrid_t *mg = context; cs_multigrid_t *parent = mg->p_mg; cs_multigrid_setup_data_t *p_mgd = parent->setup_data; int n_ranks = 1; #if defined(HAVE_MPI) { int p_lv = p_mgd->n_levels-1; MPI_Comm lv_comm = cs_grid_get_comm(p_mgd->grid_hierarchy[p_lv]); if (lv_comm != MPI_COMM_NULL) MPI_Comm_size(lv_comm, &n_ranks); } #endif _setup_k_cycle_hpc_sub(context, name, a, n_ranks, verbosity); cs_multigrid_setup_data_t *mgd = mg->setup_data; BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char); strcpy(mgd->pc_name, name); } /*---------------------------------------------------------------------------- * Function for setup of a multigrid (local smoother) * * parameters: * context <-> pointer to preconditioner context * name <-- pointer to name of associated linear system * a <-- matrix * verbosity <-- associated verbosity *----------------------------------------------------------------------------*/ static void _multigrid_setup_k_local_smoothe(void *context, const char *name, const cs_matrix_t *a, int verbosity) { _setup_k_cycle_hpc_sub(context, name, a, 1, verbosity); cs_multigrid_t *mg = context; cs_multigrid_setup_data_t *mgd = mg->setup_data; BFT_REALLOC(mgd->pc_name, strlen(name) + 1, char); strcpy(mgd->pc_name, name); } /*---------------------------------------------------------------------------- * Function or setting of the required tolerance for multigrid as * as a preconditioner. * * The preconditioner is considered to have converged when * residue/r_norm <= precision, residue being the L2 norm of a.vx-rhs. * * parameters: * context <-> pointer to multigrid context * precision <-- preconditioner precision * r_norm <-- residue normalization *----------------------------------------------------------------------------*/ static void _multigrid_pc_tolerance_t(void *context, double precision, double r_norm) { cs_multigrid_t *mg = context; mg->pc_precision = precision; mg->pc_r_norm = r_norm; } /*---------------------------------------------------------------------------- * Function for application of a Multigrid preconditioner. * * In cases where it is desired that the preconditioner modify a vector * "in place", x_in should be set to NULL, and x_out contain the vector to * be modified (\f$x_{out} \leftarrow M^{-1}x_{out})\f$). * * parameters: * context <-> pointer to preconditioner context * x_in <-- input vector * x_out <-> input/output vector * * returns: * preconditioner application status *----------------------------------------------------------------------------*/ static cs_sles_pc_state_t _multigrid_pc_apply(void *context, const cs_real_t *x_in, cs_real_t *x_out) { int n_iter; double residue; cs_multigrid_t *mg = context; cs_multigrid_setup_data_t *mgd = mg->setup_data; const cs_matrix_t *a = cs_grid_get_matrix(mgd->grid_hierarchy[0]); const cs_real_t *rhs = x_in; const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(a); const cs_lnum_t n_rows = cs_matrix_get_n_rows(a) * db_size[1]; /* If preconditioner is "in-place", use additional buffer */ if (x_in == NULL) { if (mgd->pc_aux == NULL) { const cs_lnum_t n_cols = cs_matrix_get_n_columns(a) * db_size[1]; BFT_MALLOC(mgd->pc_aux, n_cols, cs_real_t); } cs_real_t *restrict _rhs = mgd->pc_aux; # pragma omp parallel for if(n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < n_rows; ii++) _rhs[ii] = x_out[ii]; rhs = _rhs; } # pragma omp parallel for if(n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < n_rows; ii++) x_out[ii] = 0.; cs_sles_convergence_state_t cvg = cs_multigrid_solve(context, mgd->pc_name, a, mgd->pc_verbosity, mg->pc_precision, mg->pc_r_norm, &n_iter, &residue, rhs, x_out, 0, NULL); cs_sles_pc_state_t state; switch(cvg) { case CS_SLES_DIVERGED: state = CS_SLES_PC_DIVERGED; break; case CS_SLES_BREAKDOWN: state = CS_SLES_PC_BREAKDOWN; break; case CS_SLES_CONVERGED: state = CS_SLES_PC_CONVERGED; break; default: state = CS_SLES_PC_MAX_ITERATION; } return state; } /*---------------------------------------------------------------------------- * Create a Preconditioner structure using multigrid. * * parameters: * mg_type <-- type of multigrid algorithm to use * * returns: * pointer to newly created preconditioner object. *----------------------------------------------------------------------------*/ static cs_multigrid_t * _multigrid_pc_create(cs_multigrid_type_t mg_type) { cs_multigrid_t *mg = cs_multigrid_create(mg_type); switch(mg_type) { case CS_MULTIGRID_V_CYCLE: cs_multigrid_set_solver_options (mg, CS_SLES_P_SYM_GAUSS_SEIDEL, /* descent smoothe */ CS_SLES_P_SYM_GAUSS_SEIDEL, /* ascent smoothe */ CS_SLES_PCG, /* coarse smoothe */ 1, /* n_max_cycles */ 1, /* n_max_iter_descent, */ 1, /* n_max_iter_ascent */ 500, /* n_max_iter_coarse */ 0, 0, -1, /* precond poly_degree */ -1, -1, 1); /* precision_multiplier */ break; case CS_MULTIGRID_K_CYCLE: cs_multigrid_set_solver_options (mg, CS_SLES_TS_F_GAUSS_SEIDEL, CS_SLES_TS_B_GAUSS_SEIDEL, CS_SLES_PCG, /* coarse smoothe */ 1, /* n max cycles */ 1, /* n max iter for descent */ 1, /* n max iter for ascent */ 500, /* n max iter for coarse solve */ 0, 0, 0, /* precond degree */ -1, -1, 10); /* precision multiplier */ break; case CS_MULTIGRID_K_CYCLE_HPC: cs_multigrid_set_solver_options (mg, CS_SLES_TS_F_GAUSS_SEIDEL, CS_SLES_TS_B_GAUSS_SEIDEL, CS_SLES_FCG, /* coarse smoothe */ 1, /* n max cycles */ 1, /* n max iter for descent */ 1, /* n max iter for ascent */ 500, /* n max iter for coarse solve */ 0, 0, 0, /* precond degree */ -1, -1, 1); /* precision multiplier */ break; default: assert(0); } return mg; } /*----------------------------------------------------------------------------*/ /*! * \brief Create a multigrid smoother using a local K cycle for coarse-level * solvers used by high-performance system variant of K-cycle. * * \param[in] parent parent-level multigrid, or NULL * \return pointer to newly created mutigrid object. */ /*----------------------------------------------------------------------------*/ static cs_multigrid_t * _multigrid_create_k_cycle_bottom_smoother(cs_multigrid_t *parent) { cs_multigrid_t *mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE); mg->subtype = CS_MULTIGRID_BOTTOM_SMOOTHE; mg->info.is_pc = true; mg->aggregation_limit = 8; mg->k_cycle_threshold = -1; #if defined(HAVE_MPI) mg->comm = MPI_COMM_NULL; if (parent != NULL) mg->caller_comm = parent->comm; else mg->caller_comm = cs_glob_mpi_comm; #endif return mg; } /*----------------------------------------------------------------------------*/ /*! * \brief Destructor for preconditioner using persistent multigrid context. * * Using this destructor, the multigrid context is only freed, not destroyed. * * \param[in, out] context pointer to multigrid linear solver info * (actual type: cs_multigrid_t **) */ /*----------------------------------------------------------------------------*/ static void _pc_from_mg_destroy(void **context) { cs_multigrid_t *mg = (cs_multigrid_t *)(*context); if (mg != NULL) cs_multigrid_free(mg); } /*----------------------------------------------------------------------------*/ /*! * \brief Create a multigrid preconditioner based onusing a K cycle for coarse-level * solvers used by high-performance system variant of K-cycle. * * \param[in] mg associated coarse level multigrid structure * * \return pointer to newly created preconditioner object. */ /*----------------------------------------------------------------------------*/ static cs_sles_pc_t * _pc_create_from_mg_sub(cs_multigrid_t *mg) { cs_sles_pc_t *pc = cs_sles_pc_define(mg, _multigrid_pc_get_type, _multigrid_pc_setup_k_sub, _multigrid_pc_tolerance_t, _multigrid_pc_apply, cs_multigrid_free, cs_multigrid_log, cs_multigrid_copy, _pc_from_mg_destroy); return pc; } /*----------------------------------------------------------------------------*/ /*! * \brief Create recursive high-performance system variant of K-cycle * for coarse-solver bottom-level. * * Returns NULL if the number of associated ranks is small enough that * no multigrid solver is required. * * \param[in] parent parent-level multigrid, or NULL * \return pointer to newly created preconditioner object. */ /*----------------------------------------------------------------------------*/ static cs_multigrid_t * _multigrid_create_k_cycle_bottom_coarsest(cs_multigrid_t *parent) { cs_multigrid_t *mg = NULL; #if defined(HAVE_MPI) if (parent != NULL) { int size = 1; if (parent->comm != MPI_COMM_NULL) MPI_Comm_size(parent->comm, &size); if (size >= _k_cycle_hpc_merge_stride) { if (size > _k_cycle_hpc_merge_stride) mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE_HPC); /* recursive */ else { if (size > _k_cycle_hpc_recurse_threshold) { mg = _multigrid_pc_create(CS_MULTIGRID_K_CYCLE); mg->aggregation_limit = 8; mg->k_cycle_threshold = -1; } } if (mg != NULL) { mg->caller_comm = parent->comm; mg->comm = cs_grid_get_comm_merge(parent->comm, _k_cycle_hpc_merge_stride); mg->subtype = CS_MULTIGRID_COARSE; } } } #endif /* defined(HAVE_MPI) */ return mg; } /*----------------------------------------------------------------------------*/ /*! * \brief Create a 2 level coarse-level solver for use * by high-performance system variant of K-cycle. * * \param[in] parent parent-level multigrid, or NULL * \return pointer to newly created preconditioner object. */ /*----------------------------------------------------------------------------*/ static cs_multigrid_t * _multigrid_create_k_cycle_bottom(cs_multigrid_t *parent) { cs_multigrid_t *mg = cs_multigrid_create(CS_MULTIGRID_V_CYCLE); mg->subtype = CS_MULTIGRID_BOTTOM; mg->p_mg = parent; mg->aggregation_limit = 0; mg->coarsening_type = CS_GRID_COARSENING_DEFAULT; /* Not used here */ mg->n_levels_max = 2; mg->n_g_rows_min = 1; mg->pc_precision = 0.0; mg->pc_r_norm = 0.0; mg->n_levels_post = 0; mg->setup_data = NULL; BFT_REALLOC(mg->lv_info, mg->n_levels_max, cs_multigrid_level_info_t); for (int ii = 0; ii < mg->n_levels_max; ii++) _multigrid_level_info_init(mg->lv_info + ii); mg->caller_n_ranks = cs_glob_n_ranks; #if defined(HAVE_MPI) mg->merge_mean_threshold = 0; mg->merge_glob_threshold = 1; mg->merge_stride = _k_cycle_hpc_merge_stride; #endif #if defined(HAVE_MPI) if (parent != NULL) { mg->comm = parent->comm; mg->caller_comm = parent->comm; } #endif /* defined(HAVE_MPI) */ /* local k-cycle for level 0 smoother, k-cycle preconditioning for level 1 solver (such as FCG) */ mg->lv_mg[0] = _multigrid_create_k_cycle_bottom_smoother(mg); mg->lv_mg[2] = _multigrid_create_k_cycle_bottom_coarsest(mg); mg->lv_mg[0]->p_mg = parent; if (mg->lv_mg[2] != NULL) mg->lv_mg[2]->p_mg = mg; int pc_degree_bottom = (mg->lv_mg[2] != NULL) ? -1 : 1; cs_multigrid_set_solver_options (mg, CS_SLES_N_SMOOTHER_TYPES, CS_SLES_N_SMOOTHER_TYPES, CS_SLES_FCG, 1, /* n max cycles */ 1, /* n max iter for descent */ 1, /* n max iter for ascent */ 500, /* n max iter coarse */ -1, -1, pc_degree_bottom, /* precond degree */ 1, 1, 1); /* precision multiplier */ return mg; } /*---------------------------------------------------------------------------- * Allocate working array for coarse right hand sides and corrections. * * parameters: * mg <-> pointer to multigrid solver info and context * stride <-- matrix fill stride *----------------------------------------------------------------------------*/ static void _multigrid_setup_sles_work_arrays(cs_multigrid_t *mg, cs_lnum_t stride) { cs_multigrid_setup_data_t *mgd = mg->setup_data; unsigned n0 = 0, n1 = 2; if ( mg->type >= CS_MULTIGRID_K_CYCLE && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) { n0 = 4; n1 = n0 + 6; } BFT_MALLOC(mgd->rhs_vx, mgd->n_levels*n1, cs_real_t *); for (unsigned i = 0; i < n1; i++) mgd->rhs_vx[i] = NULL; if (mgd->n_levels > 1) { size_t wr_size0 = 0, wr_size1 = 0; for (unsigned i = 0; i < mgd->n_levels; i++) { size_t block_size = cs_grid_get_n_cols_max(mgd->grid_hierarchy[i])*stride; block_size = CS_SIMD_SIZE(block_size); if (i == 0) wr_size0 += block_size; else wr_size1 += block_size; } BFT_MALLOC(mgd->rhs_vx_buf, wr_size0*n0 + wr_size1*n1, cs_real_t); size_t block_size_shift = 0; for (unsigned i = 0; i < mgd->n_levels; i++) { size_t block_size = cs_grid_get_n_cols_max(mgd->grid_hierarchy[i])*stride; cs_lnum_t n = (i == 0) ? n0 : n1; for (int j = 0; j < n; j++) { mgd->rhs_vx[i*n1 + j] = mgd->rhs_vx_buf+ block_size_shift; block_size_shift += block_size; } } } } /*---------------------------------------------------------------------------- * Setup multigrid sparse linear equation solvers on existing hierarchy. * * parameters: * mg <-> pointer to multigrid solver info and context * name <-- linear system name * verbosity <-- associated verbosity *----------------------------------------------------------------------------*/ static void _multigrid_setup_sles_k_cycle_bottom(cs_multigrid_t *mg, const char *name, int verbosity) { cs_timer_t t0, t1; cs_multigrid_level_info_t *mg_lv_info; const cs_grid_t *g; const cs_matrix_t *m; size_t l = strlen(name) + 32; char *_name; BFT_MALLOC(_name, l, char); /* Initialization */ t0 = cs_timer_time(); cs_multigrid_setup_data_t *mgd = mg->setup_data; cs_lnum_t stride = 1; /* For diagonal blocks */ /* Prepare solver context */ assert(mgd->n_levels == 2); unsigned i = 0; assert(mg->subtype == CS_MULTIGRID_BOTTOM); assert(mg->type == CS_MULTIGRID_V_CYCLE); g = mgd->grid_hierarchy[i]; m = cs_grid_get_matrix(g); mg_lv_info = mg->lv_info + i; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); /* Intermediate grids */ t0 = t1; g = mgd->grid_hierarchy[0]; m = cs_grid_get_matrix(g); mg_lv_info = mg->lv_info; cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[0]); if (mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) { mg_sles->context = cs_multigrid_smoother_create(mg->info.type[0], mg->info.poly_degree[0], mg->info.n_max_iter[0]); mg_sles->setup_func = cs_multigrid_smoother_setup; mg_sles->solve_func = cs_multigrid_smoother_solve; mg_sles->destroy_func = cs_sles_it_destroy; } else if (i == 0 && mg->lv_mg[0] != NULL) { assert(mg_sles->context == NULL); mg_sles->context = mg->lv_mg[0]; mg_sles->setup_func = _multigrid_setup_k_local_smoothe; mg_sles->solve_func = cs_multigrid_solve; mg_sles->destroy_func = NULL; } snprintf(_name, l-1, "%s:smoother:%d", name, i); _name[l-1] = '\0'; mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2); #if defined(HAVE_MPI) if (mg_sles->solve_func == cs_sles_it_solve) { cs_sles_it_t *context = mg_sles->context; cs_sles_it_set_mpi_reduce_comm(context, MPI_COMM_NULL, MPI_COMM_NULL); } #endif t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); /* Coarsest grid */ t0 = t1; g = mgd->grid_hierarchy[1]; m = cs_grid_get_matrix(g); mg_lv_info = mg->lv_info + 1; mg_sles = &(mgd->sles_hierarchy[2]); mg_sles->context = cs_sles_it_create(mg->info.type[2], mg->info.poly_degree[2], mg->info.n_max_iter[2], false); /* stats not updated here */ mg_sles->setup_func = cs_sles_it_setup; mg_sles->solve_func = cs_sles_it_solve; mg_sles->destroy_func = cs_sles_it_destroy; if (mg->lv_mg[2] != NULL) { cs_sles_pc_t *pc = _pc_create_from_mg_sub(mg->lv_mg[2]); cs_sles_it_transfer_pc(mg_sles->context, &pc); } #if defined(HAVE_MPI) { cs_sles_it_t *context = mg_sles->context; cs_sles_it_set_mpi_reduce_comm(context, cs_grid_get_comm(mgd->grid_hierarchy[1]), mg->comm); } #endif snprintf(_name, l-1, "%s:coarse:%d", name, i); _name[l-1] = '\0'; mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2); /* Diagonal block size is the same for all levels */ const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(m); stride = db_size[1]; _multigrid_setup_sles_work_arrays(mg, stride); BFT_FREE(_name); /* Timing */ t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); } /*---------------------------------------------------------------------------- * Setup multigrid sparse linear equation solvers on existing hierarchy. * * parameters: * mg <-> pointer to multigrid solver info and context * name <-- linear system name * verbosity <-- associated verbosity *----------------------------------------------------------------------------*/ static void _multigrid_setup_sles(cs_multigrid_t *mg, const char *name, int verbosity) { cs_timer_t t0, t1; cs_multigrid_level_info_t *mg_lv_info; const cs_grid_t *g; const cs_matrix_t *m; size_t l = strlen(name) + 32; char *_name; BFT_MALLOC(_name, l, char); /* Initialization */ t0 = cs_timer_time(); cs_multigrid_setup_data_t *mgd = mg->setup_data; cs_lnum_t stride = 1; /* For diagonal blocks */ /* Prepare solver context */ unsigned n_levels = mgd->n_levels; unsigned i = 0; g = mgd->grid_hierarchy[i]; m = cs_grid_get_matrix(g); mg_lv_info = mg->lv_info + i; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); /* Intermediate grids */ for (i = 0; i < n_levels - 1; i++) { t0 = t1; g = mgd->grid_hierarchy[i]; m = cs_grid_get_matrix(g); mg_lv_info = mg->lv_info + i; int n_ops = 2; if (i == 0) { if (mg->type == CS_MULTIGRID_V_CYCLE) n_ops = 1; } for (int j = 0; j < n_ops; j++) { if ( mg->info.type[j] != CS_SLES_N_IT_TYPES && mg->info.type[j] < CS_SLES_N_SMOOTHER_TYPES) { cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2 + j]); mg_sles->context = cs_multigrid_smoother_create(mg->info.type[j], mg->info.poly_degree[j], mg->info.n_max_iter[j]); mg_sles->setup_func = cs_multigrid_smoother_setup; mg_sles->solve_func = cs_multigrid_smoother_solve; mg_sles->destroy_func = cs_sles_it_destroy; /* Share context between descent and ascent smoothers if both are of the cs_sles_it type */ if (j == 1) { if ( mg->info.type[0] != CS_SLES_N_IT_TYPES && mg->info.type[0] < CS_SLES_N_SMOOTHER_TYPES) cs_sles_it_set_shareable(mgd->sles_hierarchy[i*2 + 1].context, mgd->sles_hierarchy[i*2].context); } } } if (i == 0 && mg->lv_mg[0] != NULL) { cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[0]); assert(mg->subtype == CS_MULTIGRID_BOTTOM); assert(mg->type == CS_MULTIGRID_V_CYCLE); assert(n_ops == 1); assert(mg_sles->context == NULL); mg_sles->context = mg->lv_mg[0]; mg_sles->setup_func = _multigrid_setup_k_local_smoothe; mg_sles->solve_func = cs_multigrid_solve; mg_sles->destroy_func = NULL; } for (int j = 0; j < n_ops; j++) { if (j == 0) snprintf(_name, l-1, "%s:descent:%d", name, i); else snprintf(_name, l-1, "%s:ascent:%d", name, i); _name[l-1] = '\0'; cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2 + j]); mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2); #if defined(HAVE_MPI) if (mg_sles->solve_func == cs_sles_it_solve) { cs_sles_it_t *context = mg_sles->context; MPI_Comm lv_comm = cs_grid_get_comm(mgd->grid_hierarchy[i]); cs_sles_it_set_mpi_reduce_comm(context, lv_comm, mg->comm); } #endif } t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); } /* Coarsest grid */ if (n_levels > 1) { t0 = t1; i = n_levels - 1; g = mgd->grid_hierarchy[i]; m = cs_grid_get_matrix(g); mg_lv_info = mg->lv_info + i; cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2]); mg_sles->context = cs_sles_it_create(mg->info.type[2], mg->info.poly_degree[2], mg->info.n_max_iter[2], false); /* stats not updated here */ mg_sles->setup_func = cs_sles_it_setup; mg_sles->solve_func = cs_sles_it_solve; mg_sles->destroy_func = cs_sles_it_destroy; if (mg->lv_mg[2] != NULL) { cs_sles_pc_t *pc = _pc_create_from_mg_sub(mg->lv_mg[2]); cs_sles_it_transfer_pc(mg_sles->context, &pc); } #if defined(HAVE_MPI) { cs_sles_it_t *context = mg_sles->context; cs_sles_it_set_mpi_reduce_comm(context, cs_grid_get_comm(mgd->grid_hierarchy[i]), mg->comm); } #endif snprintf(_name, l-1, "%s:coarse:%d", name, i); _name[l-1] = '\0'; mg_sles->setup_func(mg_sles->context, _name, m, verbosity - 2); /* Diagonal block size is the same for all levels */ const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(m); stride = db_size[1]; } _multigrid_setup_sles_work_arrays(mg, stride); BFT_FREE(_name); /* Timing */ t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); } /*----------------------------------------------------------------------------*/ /*! * \brief Setup multigrid sparse linear equation solver. * * \param[in, out] context pointer to multigrid solver info and context * (actual type: cs_multigrid_t *) * \param[in] name pointer to name of linear system * \param[in] mesh associated mesh (for visualization), or NULL * \param[in, out] f associated fine grid * \param[in] verbosity associated verbosity */ /*----------------------------------------------------------------------------*/ static void _setup_hierarchy(void *context, const char *name, const cs_mesh_t *mesh, cs_grid_t *f, int verbosity) { cs_multigrid_t *mg = context; cs_timer_t t0, t1, t2; int n_coarse_ranks = -2, n_coarse_ranks_prev = -2; /* for comparison only */ cs_lnum_t n_rows = 0, n_cols_ext = 0, n_entries = 0; cs_gnum_t n_g_rows = 0, n_g_rows_prev = 0; cs_multigrid_level_info_t *mg_lv_info = NULL; cs_grid_t *g = f; t0 = cs_timer_time(); /* Initialization */ mg->setup_data = _multigrid_setup_data_create(); _multigrid_add_level(mg, f); /* Assign to hierarchy */ /* Add info */ cs_grid_get_info(f, NULL, NULL, NULL, NULL, NULL, &n_rows, &n_cols_ext, &n_entries, &n_g_rows); mg_lv_info = mg->lv_info; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); bool add_grid = true; while (add_grid) { n_g_rows_prev = n_g_rows; n_coarse_ranks_prev = n_coarse_ranks; /* Recursion test */ if ((int)(mg->setup_data->n_levels) >= mg->n_levels_max) break; /* Build coarser grid from previous grid */ if (verbosity > 2) bft_printf(_("\n building level %2u grid\n"), mg->setup_data->n_levels); if (mg->subtype == CS_MULTIGRID_BOTTOM) g = cs_grid_coarsen_to_single(g, mg->merge_stride, verbosity); else g = cs_grid_coarsen(g, mg->coarsening_type, mg->aggregation_limit, verbosity, mg->merge_stride, mg->merge_mean_threshold, mg->merge_glob_threshold, mg->p0p1_relax); bool symmetric = true; int grid_lv; cs_grid_get_info(g, &grid_lv, &symmetric, NULL, NULL, &n_coarse_ranks, &n_rows, &n_cols_ext, &n_entries, &n_g_rows); #if defined(HAVE_MPI) if ((n_coarse_ranks != mg->caller_n_ranks) && (mg->caller_n_ranks > 1)) { cs_gnum_t _n_g_rows = n_g_rows; MPI_Allreduce(&_n_g_rows, &n_g_rows, 1, CS_MPI_GNUM, MPI_MAX, mg->caller_comm); } #endif assert((unsigned)grid_lv == mg->setup_data->n_levels); /* If too few rows were grouped, we stop at this level */ if (mg->setup_data->n_levels > 1) { if ( (n_g_rows < mg->n_g_rows_min) || ( n_g_rows > (0.8 * n_g_rows_prev) && n_coarse_ranks == n_coarse_ranks_prev)) { add_grid = false; cs_grid_destroy(&g); /* too coarse, not added */ } } if (add_grid) { _multigrid_add_level(mg, g); /* Assign to hierarchy */ /* Print coarse mesh stats */ if (verbosity > 2) { #if defined(HAVE_MPI) if (mg->caller_n_ranks > 1) { int lcount[2], gcount[2]; int n_c_min, n_c_max, n_f_min, n_f_max; lcount[0] = n_rows; lcount[1] = n_entries; MPI_Allreduce(lcount, gcount, 2, MPI_INT, MPI_MAX, mg->caller_comm); n_c_max = gcount[0]; n_f_max = gcount[1]; lcount[0] = n_rows; lcount[1] = n_entries; MPI_Allreduce(lcount, gcount, 2, MPI_INT, MPI_MIN, mg->caller_comm); n_c_min = gcount[0]; n_f_min = gcount[1]; bft_printf (_(" total min max\n" " number of rows: %12llu %10d %10d\n" " number of entries: %10d %10d\n"), (unsigned long long)n_g_rows, n_c_min, n_c_max, n_f_min, n_f_max); } #endif if (mg->caller_n_ranks == 1) bft_printf(_(" number of rows: %10d\n" " number of entries: %10d\n"), (int)n_rows, (int)n_entries); } mg_lv_info = mg->lv_info + grid_lv; mg_lv_info->n_ranks[0] = n_coarse_ranks; mg_lv_info->n_elts[0][0] = n_rows; mg_lv_info->n_elts[1][0] = n_cols_ext; mg_lv_info->n_elts[2][0] = n_entries; } /* end adding grid */ t2 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t1, &t2); t1 = t2; } /* Print final info */ if (verbosity > 1) bft_printf (_(" number of grid levels: %u\n" " number of rows in coarsest grid: %llu\n\n"), mg->setup_data->n_levels, (unsigned long long)n_g_rows); /* Prepare preprocessing info if necessary */ if (mg->post_row_max > 0) { if (mg->info.n_calls[0] == 0) { int l_id = 0; const cs_matrix_t *a = cs_grid_get_matrix(f); const cs_lnum_t n_rows_a = cs_matrix_get_n_rows(a); if (n_rows_a == mesh->n_cells) l_id = CS_MESH_LOCATION_CELLS; else if (n_rows_a <= mesh->n_vertices) { l_id = CS_MESH_LOCATION_VERTICES; if (mesh->vtx_range_set != NULL) { if (mesh->vtx_range_set->n_elts[0] != n_rows_a) l_id = CS_MESH_LOCATION_NONE; } } #if defined(HAVE_MPI) if (mg->caller_n_ranks > 1) { int _l_id = l_id; MPI_Allreduce(&_l_id, &l_id, 1, MPI_INT, MPI_MAX, mg->caller_comm); if (l_id != _l_id) _l_id = CS_MESH_LOCATION_NONE; MPI_Allreduce(&_l_id, &l_id, 1, MPI_INT, MPI_MIN, mg->caller_comm); } #endif if (l_id != CS_MESH_LOCATION_NONE) { cs_post_add_time_dep_output(_cs_multigrid_post_function, (void *)mg); _multigrid_add_post(mg, name, l_id, n_rows_a); } } } /* Update info */ #if defined(HAVE_MPI) /* In parallel, get global (average) values from local values */ if (mg->caller_n_ranks > 1) { int i, j; cs_gnum_t *_n_elts_l = NULL, *_n_elts_s = NULL, *_n_elts_m = NULL; int grid_lv = mg->setup_data->n_levels; BFT_MALLOC(_n_elts_l, 3*grid_lv, cs_gnum_t); BFT_MALLOC(_n_elts_s, 3*grid_lv, cs_gnum_t); BFT_MALLOC(_n_elts_m, 3*grid_lv, cs_gnum_t); for (i = 0; i < grid_lv; i++) { cs_multigrid_level_info_t *mg_inf = mg->lv_info + i; for (j = 0; j < 3; j++) _n_elts_l[i*3 + j] = mg_inf->n_elts[j][0]; } MPI_Allreduce(_n_elts_l, _n_elts_s, 3*grid_lv, CS_MPI_GNUM, MPI_SUM, mg->caller_comm); MPI_Allreduce(_n_elts_l, _n_elts_m, 3*grid_lv, CS_MPI_GNUM, MPI_MAX, mg->caller_comm); for (i = 0; i < grid_lv; i++) { cs_multigrid_level_info_t *mg_inf = mg->lv_info + i; cs_gnum_t n_g_ranks = mg_inf->n_ranks[0]; for (j = 0; j < 3; j++) { cs_gnum_t tmp_max = n_g_ranks * _n_elts_m[i*3+j]; mg_inf->n_elts[j][0] = (_n_elts_s[i*3+j] + n_g_ranks/2) / n_g_ranks; mg_inf->imbalance[j][0] = (float)(tmp_max*1.0/_n_elts_s[i*3+j]); } } BFT_FREE(_n_elts_m); BFT_FREE(_n_elts_s); BFT_FREE(_n_elts_l); } #endif mg->info.n_levels_tot += mg->setup_data->n_levels; mg->info.n_levels[0] = mg->setup_data->n_levels; if (mg->info.n_calls[0] > 0) { if (mg->info.n_levels[0] < mg->info.n_levels[1]) mg->info.n_levels[1] = mg->info.n_levels[0]; if (mg->info.n_levels[0] > mg->info.n_levels[2]) mg->info.n_levels[2] = mg->info.n_levels[0]; } else { mg->info.n_levels[1] = mg->info.n_levels[0]; mg->info.n_levels[2] = mg->info.n_levels[0]; } mg->info.n_calls[0] += 1; /* Cleanup temporary interpolation arrays */ for (unsigned i = 0; i < mg->setup_data->n_levels; i++) cs_grid_free_quantities(mg->setup_data->grid_hierarchy[i]); /* Setup solvers */ if (mg->subtype == CS_MULTIGRID_BOTTOM) _multigrid_setup_sles_k_cycle_bottom(mg, name, verbosity); else _multigrid_setup_sles(mg, name, verbosity); /* Update timers */ t2 = cs_timer_time(); cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t2); } /*----------------------------------------------------------------------------*/ /*! * \brief Setup coarse multigrid for k cycle HPC variant. * * \param[in, out] mg pointer to multigrid solver info and context * \param[in] name pointer to name of linear system * \param[in] a associated matrix * \param[in] n_ranks associated number of MPI ranks * \param[in] verbosity associated verbosity */ /*----------------------------------------------------------------------------*/ static void _setup_k_cycle_hpc_sub(cs_multigrid_t *mg, const char *name, const cs_matrix_t *a, int n_ranks, int verbosity) { /* Destroy previous hierarchy if necessary */ if (mg->setup_data != NULL) cs_multigrid_free(mg); /* Initialization */ cs_timer_t t0 = cs_timer_time(); if (verbosity > 1) bft_printf(_("\n Construction of grid hierarchy for \"%s\"\n"), name); /* Build coarse grids hierarchy */ /*------------------------------*/ cs_grid_t *f = cs_grid_create_from_parent(a, n_ranks); cs_multigrid_level_info_t *mg_lv_info = mg->lv_info; cs_timer_t t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); _setup_hierarchy(mg, name, NULL, f, verbosity); /* Assign to and build hierarchy */ /* Update timers */ t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1); } /*---------------------------------------------------------------------------- * Compute dot product, summing result over all ranks. * * parameters: * mg <-- pointer to solver context info * n <-- local number of elements * x <-- vector in s = x.x * * returns: * result of s = x.x *----------------------------------------------------------------------------*/ inline static double _dot_xx(const cs_multigrid_t *mg, cs_lnum_t n, const cs_real_t *x) { double s = cs_dot_xx(n, x); #if defined(HAVE_MPI) if (mg->comm != MPI_COMM_NULL) { double _sum; MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, mg->comm); s = _sum; } #endif /* defined(HAVE_MPI) */ return s; } /*---------------------------------------------------------------------------- * Compute 2 dot products x.x and y.y, summing result over all ranks. * * parameters: * mg <-- pointer to solver context info * n <-- number of associated values * x <-- vector in s1 = x.x * y <-- vector in s2 = y.y * s1 --> result of s1 = x.x * s2 --> result of s2 = y.y *----------------------------------------------------------------------------*/ inline static void _dot_xx_yy(const cs_multigrid_t *mg, cs_lnum_t n, const cs_real_t *x, const cs_real_t *y, double *s1, double *s2) { double s[2]; s[0] = cs_dot_xx(n, x); s[1] = cs_dot_xx(n, y); #if defined(HAVE_MPI) if (mg->comm != MPI_COMM_NULL) { double _sum[2]; MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, mg->comm); s[0] = _sum[0]; s[1] = _sum[1]; } #endif /* defined(HAVE_MPI) */ *s1 = s[0]; *s2 = s[1]; } /*---------------------------------------------------------------------------- * Compute 2 dot products x.x and x.y, summing result over all ranks. * * parameters: * mg <-- pointer to solver context info * n <-- number of associated values * x <-- vector in s1 = x.y * y <-- vector in s1 = x.y and s2 = y.z * z <-- vector in s2 = y.z * s1 --> result of s1 = x.y * s2 --> result of s2 = y.z *----------------------------------------------------------------------------*/ inline static void _dot_xy_yz(const cs_multigrid_t *mg, cs_lnum_t n, const cs_real_t *x, const cs_real_t *y, const cs_real_t *z, double *s1, double *s2) { double s[2]; cs_dot_xy_yz(n, x, y, z, s, s+1); #if defined(HAVE_MPI) if (mg->comm != MPI_COMM_NULL) { double _sum[2]; MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, mg->comm); s[0] = _sum[0]; s[1] = _sum[1]; } #endif /* defined(HAVE_MPI) */ *s1 = s[0]; *s2 = s[1]; } /*---------------------------------------------------------------------------- * Compute 3 dot products x.u, xv, and x.w, summing result over all ranks. * * parameters: * mg <-- pointer to solver context info * n <-- number of associated values * x <-- vector in s1 = x.u, s2 = x.v, s3 = x.w * u <-- vector in s1 = x.u * v <-- vector in s2 = x.v * w <-- vector in s2 = x.w * s1 --> result of s1 = x.y * s2 --> result of s2 = y.z *----------------------------------------------------------------------------*/ inline static void _dot_xu_xv_xw(const cs_multigrid_t *mg, cs_lnum_t n, const cs_real_t *x, const cs_real_t *u, const cs_real_t *v, const cs_real_t *w, double *s1, double *s2, double *s3) { double s[3]; /* Use two separate call as cs_blas.c does not yet hav matching call */ cs_dot_xy_yz(n, u, x, v, s, s+1); s[2] = cs_dot(n, x, w); #if defined(HAVE_MPI) if (mg->comm != MPI_COMM_NULL) { double _sum[3]; MPI_Allreduce(s, _sum, 3, MPI_DOUBLE, MPI_SUM, mg->comm); s[0] = _sum[0]; s[1] = _sum[1]; s[2] = _sum[2]; } #endif /* defined(HAVE_MPI) */ *s1 = s[0]; *s2 = s[1]; *s3 = s[2]; } /*---------------------------------------------------------------------------- * Test if convergence is attained. * * parameters: * mg <-- associated multigrid structure * var_name <-- variable name * n_f_rows <-- number of rows on fine mesh * n_max_cycles <-- maximum number of cycles * cycle_id <-- number of current cycle * * verbosity <-- verbosity level * n_iters <-- number of iterations * precision <-- precision limit * r_norm <-- residue normalization * initial_residue <-- initial residue * residue <-> residue * rhs <-- right-hand side * * returns: * convergence status *----------------------------------------------------------------------------*/ static cs_sles_convergence_state_t _convergence_test(cs_multigrid_t *mg, const char *var_name, cs_lnum_t n_f_rows, int n_max_cycles, int cycle_id, int verbosity, int n_iters, double precision, double r_norm, double initial_residue, double *residue, const cs_real_t rhs[]) { const char cycle_h_fmt[] = N_(" ---------------------------------------------------\n" " n. | Cumulative iterations | Norm. residual\n" " cycles | on fine mesh | on fine mesh\n" " ---------------------------------------------------\n"); const char cycle_t_fmt[] = N_(" ---------------------------------------------------\n"); const char cycle_cv_fmt[] = N_(" %4d | %6d | %12.4e\n"); const char cycle_fmt[] = N_(" N. cycles: %4d; Fine mesh cumulative iter: %5d; " "Norm. residual %12.4e\n"); /* Compute residue */ *residue = sqrt(_dot_xx(mg, n_f_rows, rhs)); if (cycle_id == 1) initial_residue = *residue; /* Plot convergence if requested */ if (mg->cycle_plot != NULL) { double vals = *residue; double wall_time = cs_timer_wtime(); mg->plot_time_stamp += 1; cs_time_plot_vals_write(mg->cycle_plot, mg->plot_time_stamp, wall_time, 1, &vals); } if (*residue < precision*r_norm) { if (verbosity == 2) bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm); else if (verbosity > 2) { bft_printf(_(cycle_h_fmt)); bft_printf(_(cycle_cv_fmt), cycle_id, n_iters, *residue/r_norm); bft_printf(_(cycle_t_fmt)); } return CS_SLES_CONVERGED; } else if (cycle_id > n_max_cycles) { if ( (verbosity > -1 && !(mg->info.is_pc || mg->subtype != CS_MULTIGRID_MAIN)) || verbosity > 0) { if (verbosity == 1) bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm); else if (verbosity > 1) { bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm); bft_printf(_(cycle_t_fmt)); } bft_printf(_(" @@ Warning: algebraic multigrid for [%s]\n" " ********\n" " Maximum number of cycles (%d) reached.\n"), var_name, n_max_cycles); } return CS_SLES_MAX_ITERATION; } else { if (*residue > initial_residue * 10000.0 && *residue > 100.) { if (verbosity > 2) bft_printf(_(cycle_fmt), cycle_id, n_iters, *residue/r_norm); return CS_SLES_DIVERGED; } else if (verbosity > 2) { if (cycle_id == 1) bft_printf(_(cycle_h_fmt)); bft_printf(_(cycle_cv_fmt), cycle_id, n_iters, *residue/r_norm); } #if (__STDC_VERSION__ >= 199901L) if (isnan(*residue) || isinf(*residue)) return CS_SLES_DIVERGED; #endif } return CS_SLES_ITERATING; } /*---------------------------------------------------------------------------- * Log residue A.vx - Rhs * * parameters: * mg <-- pointer to multigrid context info * int cycle_id <-- cycle id * var_name <-- variable name * a <-- matrix * rhs <-- right hand side * vx <-> system solution * * returns: * convergence state *----------------------------------------------------------------------------*/ static void _log_residual(const cs_multigrid_t *mg, int cycle_id, const char *var_name, const cs_matrix_t *a, const cs_real_t *rhs, cs_real_t *restrict vx) { const cs_lnum_t *diag_block_size = cs_matrix_get_diag_block_size(a); const cs_lnum_t n_rows = cs_matrix_get_n_rows(a) * diag_block_size[0]; const cs_lnum_t n_cols = cs_matrix_get_n_columns(a) * diag_block_size[0]; cs_real_t *r; BFT_MALLOC(r, n_cols, cs_real_t); cs_matrix_vector_multiply(a, vx, r); for (cs_lnum_t i = 0; i < n_rows; i++) r[i] -= rhs[i]; double s = cs_dot_xx(n_rows, r); BFT_FREE(r); #if defined(HAVE_MPI) if (mg->comm != MPI_COMM_NULL) { double _sum; MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, mg->comm); s = _sum; } #endif /* defined(HAVE_MPI) */ cs_log_printf(CS_LOG_DEFAULT, " mg cycle %d: %s residual: %.3g\n", cycle_id, var_name, s); } /*---------------------------------------------------------------------------- * Update level information iteration counts * * parameters: * lv_info_it <-> logged number of iterations (last, min, max, total) * n_iter <-- current number of iterations *----------------------------------------------------------------------------*/ static inline void _lv_info_update_stage_iter(unsigned long long lv_info_it[], unsigned n_iter) { lv_info_it[0] = n_iter; if (n_iter < lv_info_it[1]) lv_info_it[1] = n_iter; else if (n_iter > lv_info_it[2]) lv_info_it[2] = n_iter; if (lv_info_it[1] == 0) lv_info_it[1] = n_iter; lv_info_it[3] += n_iter; } /*---------------------------------------------------------------------------- * Compute buffer size required for level names * * parameters: * name <-- linear system name * n_levels <-- number multigrid levels * * returns: * buffer size needed for level names *----------------------------------------------------------------------------*/ static size_t _level_names_size(const char *name, int n_levels) { /* Format name width */ int w = 1; for (int i = n_levels/10; i > 0; i /=10) w += 1; /* First part: pointers */ size_t retval = n_levels*sizeof(char *)*2; retval = CS_SIMD_SIZE(retval); /* Second part: buffers */ size_t buf_size = 0; if (n_levels > 1) buf_size = (strlen(name) + strlen(":descent:") + w + 1) * (n_levels)*2; retval += CS_SIMD_SIZE(buf_size); return retval; } /*---------------------------------------------------------------------------- * Initialize level names * * parameters: * name <-- linear system name * n_levels <-- number multigrid levels * buffer <-- buffer *----------------------------------------------------------------------------*/ static void _level_names_init(const char *name, int n_levels, void *buffer) { /* Format name width */ int w = 1; for (int i = n_levels/10; i > 0; i /=10) w += 1; /* First part: pointers */ size_t ptr_size = n_levels*sizeof(char *)*2; ptr_size = CS_SIMD_SIZE(ptr_size); char *_buffer = buffer; char **_lv_names = buffer; const char **lv_names = (const char **)_lv_names; const size_t name_len = strlen(name) + strlen(":descent:") + w + 1; /* Second part: buffers */ for (int i = 0; i < n_levels -1; i++) { lv_names[i*2] = _buffer + ptr_size + i*2*name_len; lv_names[i*2+1] = lv_names[i*2] + name_len; sprintf(_lv_names[i*2], "%s:descent:%0*d", name, w, i); sprintf(_lv_names[i*2+1], "%s:ascent:%0*d", name, w, i); } if (n_levels > 1) { int i = n_levels - 1; lv_names[i*2] = _buffer + ptr_size + i*2*name_len; lv_names[i*2+1] = NULL; sprintf(_lv_names[i*2], "%s:coarse:%0*d", name, w, i); } } /*---------------------------------------------------------------------------- * Sparse linear system resolution using multigrid. * * parameters: * mg <-- multigrid system * lv_names <-- names of linear systems * (indexed as mg->setup_data->sles_hierarchy) * verbosity <-- verbosity level * cycle_id <-- id of currect cycle * n_equiv_iter <-> equivalent number of iterations * precision <-- solver precision * r_norm <-- residue normalization * initial_residue <-> initial residue * residue <-> residue * rhs <-- right hand side * vx --> system solution * aux_size <-- number of elements in aux_vectors (in bytes) * aux_vectors --- optional working area (allocation otherwise) * * returns: * convergence status *----------------------------------------------------------------------------*/ static cs_sles_convergence_state_t _multigrid_v_cycle(cs_multigrid_t *mg, const char **lv_names, int verbosity, int cycle_id, int *n_equiv_iter, double precision, double r_norm, double *initial_residue, double *residue, const cs_real_t *rhs, cs_real_t *vx, size_t aux_size, void *aux_vectors) { int level, coarsest_level; cs_lnum_t ii; cs_timer_t t0, t1; cs_lnum_t db_size[4] = {1, 1, 1, 1}; cs_lnum_t eb_size[4] = {1, 1, 1, 1}; cs_sles_convergence_state_t cvg = CS_SLES_ITERATING, c_cvg = CS_SLES_ITERATING; int n_iter = 0; double _residue = -1.; double _initial_residue = 0.; size_t _aux_r_size = aux_size / sizeof(cs_real_t); cs_lnum_t n_rows = 0, n_cols_ext = 0; cs_lnum_t _n_rows = 0; cs_gnum_t n_g_rows = 0; cs_real_t r_norm_l = r_norm; double denom_n_g_rows_0 = 1.0; cs_multigrid_setup_data_t *mgd = mg->setup_data; cs_multigrid_level_info_t *lv_info = NULL; cs_real_t *_aux_vectors = aux_vectors; cs_real_t *restrict wr = NULL; cs_real_t *restrict vx_lv = NULL; const cs_real_t *restrict rhs_lv = NULL; const cs_matrix_t *_matrix = NULL; const cs_grid_t *f = NULL, *c= NULL; bool end_cycle = false; /* Initialization */ coarsest_level = mgd->n_levels - 1; f = mgd->grid_hierarchy[0]; cs_grid_get_info(f, NULL, NULL, db_size, eb_size, NULL, &n_rows, &n_cols_ext, NULL, &n_g_rows); denom_n_g_rows_0 = 1.0 / n_g_rows; /* Allocate wr or use working area (note the finest grid could have less elements than a coarser grid to wich rank merging has been applied, hence the test below) */ size_t wr_size = n_cols_ext*db_size[1]; for (level = 1; level < (int)(mgd->n_levels); level++) { cs_lnum_t n_cols_max = cs_grid_get_n_cols_max(mgd->grid_hierarchy[level]); wr_size = CS_MAX(wr_size, (size_t)(n_cols_max*db_size[1])); wr_size = CS_SIMD_SIZE(wr_size); } if (_aux_r_size >= wr_size) { wr = aux_vectors; _aux_vectors = wr + wr_size; _aux_r_size -= wr_size; } else BFT_MALLOC(wr, wr_size, cs_real_t); /* map arrays for rhs and vx; for the finest level, simply point to input and output arrays */ mgd->rhs_vx[0] = NULL; /* Use _rhs_level when necessary to avoid const warning */ mgd->rhs_vx[1] = vx; /* Descent */ /*---------*/ for (level = 0; level < coarsest_level; level++) { lv_info = mg->lv_info + level; t0 = cs_timer_time(); rhs_lv = (level == 0) ? rhs : mgd->rhs_vx[level*2]; vx_lv = mgd->rhs_vx[level*2 + 1]; c = mgd->grid_hierarchy[level+1]; /* Smoother pass */ _matrix = cs_grid_get_matrix(f); cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2]); c_cvg = mg_sles->solve_func(mg_sles->context, lv_names[level*2], _matrix, verbosity - 4, /* verbosity */ precision*mg->info.precision_mult[0], r_norm_l, &n_iter, &_residue, rhs_lv, vx_lv, _aux_r_size*sizeof(cs_real_t), _aux_vectors); if (mg->plot_time_stamp > -1) mg->plot_time_stamp += n_iter+1; if (mg_sles->solve_func == cs_sles_it_solve) _initial_residue = cs_sles_it_get_last_initial_residue(mg_sles->context); else _initial_residue = HUGE_VAL; if (level == 0 && cycle_id == 1) *initial_residue = _initial_residue; if (verbosity > 1) _log_residual(mg, cycle_id, lv_names[level*2], _matrix, rhs_lv, vx_lv); if (c_cvg < CS_SLES_BREAKDOWN) { end_cycle = true; break; } /* Restrict residue TODO: get residue from cs_sles_solve(). This optimisation would require adding an argument and exercising caution to ensure the correct sign and meaning of the residue (regarding timing, this stage is part of the descent smoother) */ cs_matrix_vector_multiply(_matrix, vx_lv, wr); _n_rows = n_rows*db_size[1]; # pragma omp parallel for if(_n_rows > CS_THR_MIN) for (ii = 0; ii < _n_rows; ii++) wr[ii] = rhs_lv[ii] - wr[ii]; /* Convergence test in beginning of cycle (fine mesh) */ if (level == 0) { cvg = _convergence_test(mg, lv_names[0], n_rows*db_size[1], mg->info.n_max_cycles, cycle_id, verbosity, lv_info->n_it_ds_smoothe[3], precision, r_norm, *initial_residue, residue, wr); /* If converged or cycle limit reached, break from descent loop */ if (cvg != 0) { c_cvg = cvg; end_cycle = true; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1); lv_info->n_calls[2] += 1; _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter); *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0; break; } } t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1); lv_info->n_calls[2] += 1; _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter); *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0; /* Prepare for next level */ cs_grid_restrict_row_var(f, c, wr, mgd->rhs_vx[(level+1)*2]); cs_grid_get_info(c, NULL, NULL, NULL, NULL, NULL, &n_rows, &n_cols_ext, NULL, &n_g_rows); f = c; /* Initialize correction */ cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*2 + 1]; _n_rows = n_rows*db_size[1]; # pragma omp parallel for if(n_rows > CS_THR_MIN) for (ii = 0; ii < _n_rows; ii++) vx_lv1[ii] = 0.0; t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[4]), &t1, &t0); lv_info->n_calls[4] += 1; } /* End of loop on levels (descent) */ if (end_cycle == false) { /* Resolve coarsest level to convergence */ /*---------------------------------------*/ assert(level == coarsest_level); assert(c == mgd->grid_hierarchy[coarsest_level]); /* coarsest level == 0 should never happen, but we play it safe */ rhs_lv = (level == 0) ? rhs : mgd->rhs_vx[coarsest_level*2]; vx_lv = mgd->rhs_vx[level*2 + 1]; _matrix = cs_grid_get_matrix(c); cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2]); _initial_residue = _residue; lv_info = mg->lv_info + level; t0 = cs_timer_time(); c_cvg = mg_sles->solve_func(mg_sles->context, lv_names[level*2], _matrix, verbosity - 3, precision*mg->info.precision_mult[2], r_norm_l, &n_iter, &_residue, rhs_lv, vx_lv, _aux_r_size*sizeof(cs_real_t), _aux_vectors); t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[1]), &t0, &t1); lv_info->n_calls[1] += 1; _lv_info_update_stage_iter(lv_info->n_it_solve, n_iter); if (mg_sles->solve_func == cs_sles_it_solve) _initial_residue = cs_sles_it_get_last_initial_residue(mg_sles->context); else _initial_residue = HUGE_VAL; *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0; if (verbosity > 1) _log_residual(mg, cycle_id, lv_names[level*2], _matrix, rhs_lv, vx_lv); if (c_cvg < CS_SLES_BREAKDOWN) end_cycle = true; } if (end_cycle == false) { /* Ascent */ /*--------*/ for (level = coarsest_level - 1; level > -1; level--) { vx_lv = mgd->rhs_vx[level*2 + 1]; lv_info = mg->lv_info + level; c = mgd->grid_hierarchy[level+1]; f = mgd->grid_hierarchy[level]; cs_grid_get_info(f, NULL, NULL, NULL, NULL, NULL, &n_rows, &n_cols_ext, NULL, &n_g_rows); /* Prolong correction */ t0 = cs_timer_time(); cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*2 + 1]; cs_grid_prolong_row_var(c, f, vx_lv1, wr); _n_rows = n_rows*db_size[1]; # pragma omp parallel for if(n_rows > CS_THR_MIN) for (ii = 0; ii < _n_rows; ii++) vx_lv[ii] += wr[ii]; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t0, &t1); lv_info->n_calls[5] += 1; /* Smoother pass if level > 0 (smoother not called for finest mesh, as it will be called in descent phase of the next cycle, before the convergence test). */ if (level > 0) { rhs_lv = mgd->rhs_vx[level*2]; _matrix = cs_grid_get_matrix(f); cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2 + 1]); c_cvg = mg_sles->solve_func(mg_sles->context, lv_names[level*2+1], _matrix, verbosity - 4, /* verbosity */ precision*mg->info.precision_mult[1], r_norm_l, &n_iter, &_residue, rhs_lv, vx_lv, _aux_r_size*sizeof(cs_real_t), _aux_vectors); t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[3]), &t1, &t0); lv_info->n_calls[3] += 1; _lv_info_update_stage_iter(lv_info->n_it_as_smoothe, n_iter); if (mg_sles->solve_func == cs_sles_it_solve) _initial_residue = cs_sles_it_get_last_initial_residue(mg_sles->context); else _initial_residue = HUGE_VAL; *n_equiv_iter += n_iter * n_g_rows * denom_n_g_rows_0; if (verbosity > 1) _log_residual(mg, cycle_id, lv_names[level*2+1], _matrix, rhs_lv, vx_lv); if (c_cvg < CS_SLES_BREAKDOWN) break; } } /* End loop on levels (ascent) */ } /* End of tests on end_cycle */ mgd->exit_level = level; mgd->exit_residue = _residue; if (level == 0) mgd->exit_initial_residue = *initial_residue; else mgd->exit_initial_residue = _initial_residue; mgd->exit_cycle_id = cycle_id; /* Free memory */ if (wr != aux_vectors) BFT_FREE(wr); return cvg; } /*---------------------------------------------------------------------------- * Apply a multigrid K-cycle on the current level. * * This version of the multigrid cycle is a K-cycle, described by Y. Notay in * "An aggregation-based algebraic multigrid method", Electronic Transactions on * Numerical Analysis, 2010, vol 37, pages 123-146. * * Used as a preconditioner, it is not constant (sometimes there is only one * visit of the coarse level, sometimes 2) and therefore, it has to be used with * IPCG (or FCG) instead of CG. Descent and ascent smoothers still have to be * symmetric for the IPCG to converge. * * parameters: * mg <-- multigrid system * level <-- level on which we apply the cycle * lv_names <-- names of linear systems * (indexed as mg->setup_data->sles_hierarchy) * verbosity <-- verbosity level * cycle_id <-- id of currect cycle * n_equiv_iter <-> equivalent number of iterations * precision <-- solver precision * r_norm <-- residue normalization * initial_residue <-> initial residue * residue <-> residue * rhs <-- right hand side * vx --> system solution * aux_size <-- number of elements in aux_vectors (in bytes) * aux_vectors --- optional working area (allocation otherwise) * * returns: * convergence status *----------------------------------------------------------------------------*/ static cs_sles_convergence_state_t _multigrid_k_cycle(cs_multigrid_t *mg, int level, const char **lv_names, int verbosity, int cycle_id, int *n_equiv_iter, double precision, double r_norm, double *initial_residue, double *residue, const cs_real_t *rhs, cs_real_t *vx, size_t aux_size, void *aux_vectors) { cs_timer_t t0, t1; bool end_cycle = false; cs_sles_convergence_state_t c_cvg = CS_SLES_ITERATING, cvg = CS_SLES_ITERATING; int n_iter = 0; double _residue = -1.; cs_real_t r_norm_l = r_norm; size_t _aux_r_size = aux_size / sizeof(cs_real_t); cs_lnum_t f_n_rows = 0, c_n_rows = 0; cs_lnum_t f_n_cols_ext = 0, c_n_cols_ext = 0; cs_gnum_t f_n_g_rows = 0, c_n_g_rows = 0; cs_multigrid_setup_data_t *mgd = mg->setup_data; int coarsest_level = mgd->n_levels-1; cs_lnum_t db_size[4] = {1, 1, 1, 1}; cs_lnum_t eb_size[4] = {1, 1, 1, 1}; /* Recursion threshold */ const cs_real_t trsh = mg->k_cycle_threshold; /* RHS and unknowns arrays */ cs_real_t *restrict vx_lv = NULL; const cs_real_t *restrict rhs_lv = NULL; const cs_matrix_t *f_matrix = NULL, *c_matrix = NULL; /* Initialization of the computing tools */ cs_grid_t *f = mgd->grid_hierarchy[level]; cs_grid_t *c = mgd->grid_hierarchy[level+1]; f_matrix = cs_grid_get_matrix(f); c_matrix = cs_grid_get_matrix(c); cs_grid_get_info(f, NULL, NULL, db_size, eb_size, NULL, &f_n_rows, &f_n_cols_ext, NULL, &f_n_g_rows); cs_grid_get_info(c, NULL, NULL, db_size, eb_size, NULL, &c_n_rows, &c_n_cols_ext, NULL, &c_n_g_rows); cs_lnum_t _f_n_rows = f_n_rows*db_size[1]; cs_lnum_t _c_n_rows = c_n_rows*db_size[1]; static double denom_n_g_rows_0 = 1; if (level == 0) denom_n_g_rows_0 = 1.0 / f_n_g_rows; rhs_lv = rhs; vx_lv = vx; /* Descent ------- */ const int na = 10; /* number of array slots in mgd->rhs_vx */ /* Arrays that are needed for both descent and ascent phases */ cs_real_t *restrict rt_lv = mgd->rhs_vx[level*na]; assert(rt_lv != NULL); # pragma omp parallel for if(_f_n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++) vx_lv[ii] = 0.0; cs_multigrid_level_info_t *lv_info = mg->lv_info + level; t0 = cs_timer_time(); /* Pre-smoothing */ cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[level*2]); c_cvg = mg_sles->solve_func(mg_sles->context, lv_names[level*2], f_matrix, 0, /* verbosity */ precision*mg->info.precision_mult[0], r_norm_l, &n_iter, &_residue, rhs_lv, vx_lv, _aux_r_size*sizeof(cs_real_t), aux_vectors); if (initial_residue != NULL && cycle_id == 1) *initial_residue = HUGE_VAL; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[2]), &t0, &t1); lv_info->n_calls[2] += 1; _lv_info_update_stage_iter(lv_info->n_it_ds_smoothe, n_iter); *n_equiv_iter += n_iter * f_n_g_rows * denom_n_g_rows_0; if (verbosity > 0) _log_residual(mg, cycle_id, lv_names[level*2], f_matrix, rhs_lv, vx_lv); /* Compute new residual */ cs_matrix_vector_multiply(f_matrix, vx_lv, rt_lv); # pragma omp parallel for if(_f_n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++) rt_lv[ii] = rhs_lv[ii] - rt_lv[ii]; /* Convergence test in beginning of cycle (fine mesh) */ if ((level == 0 && mg->info.n_max_cycles > 1) || c_cvg < CS_SLES_BREAKDOWN) { if (c_cvg >= CS_SLES_BREAKDOWN) cvg = _convergence_test(mg, lv_names[0], f_n_rows*db_size[1], mg->info.n_max_cycles, cycle_id, verbosity, lv_info->n_it_ds_smoothe[3], precision, r_norm, *initial_residue, residue, rt_lv); else cvg = c_cvg; /* If converged or cycle limit reached, break from descent loop */ if (cvg != CS_SLES_ITERATING) end_cycle = true; } t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0); lv_info->n_calls[6] += 1; /* Single update for this cycle */ if (end_cycle) { return cvg; } /* Arrays needed for the descent phase (mapped to preallocated mgd->rhs_vx to avoid extra allocations) */ cs_real_t *restrict vx_lv1 = mgd->rhs_vx[(level+1)*na + 4]; cs_real_t *restrict rhs_lv1 = mgd->rhs_vx[(level+1)*na + 5]; # pragma omp parallel for if(_c_n_rows > CS_THR_MIN) for (cs_lnum_t i = 0; i < _c_n_rows; i++) vx_lv1[i] = 0.0; /* Restriction of rt_lv into the next level rhs */ cs_grid_restrict_row_var(f, c, rt_lv, rhs_lv1); t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[4]), &t0, &t1); lv_info->n_calls[4] += 1; /* Approximate solution to the coarser problem ------------------------------------------- */ /* If the next level is the coarsest one, then we solve it directly */ if (level == coarsest_level-1) { lv_info = mg->lv_info + coarsest_level; mg_sles = &(mgd->sles_hierarchy[coarsest_level*2]); cvg = mg_sles->solve_func(mg_sles->context, lv_names[coarsest_level*2], c_matrix, verbosity - 2, precision*mg->info.precision_mult[2], r_norm_l, &n_iter, residue, rhs_lv1, vx_lv1, _aux_r_size*sizeof(cs_real_t), aux_vectors); t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[1]), &t1, &t0); lv_info->n_calls[1] += 1; _lv_info_update_stage_iter(lv_info->n_it_solve, n_iter); if (verbosity > 0) _log_residual(mg, cycle_id, lv_names[coarsest_level*2], c_matrix, rhs_lv1, vx_lv1); *n_equiv_iter += n_iter * c_n_g_rows * denom_n_g_rows_0; } /* Otherwise, we apply the K-cycle recursively */ else { c_cvg = _multigrid_k_cycle(mg, level + 1, lv_names, verbosity, cycle_id, &n_iter, precision, r_norm, NULL, residue, rhs_lv1, vx_lv1, aux_size, aux_vectors); t1 = cs_timer_time(); /* Arrays needed for the first Krylov iteration inside the cycle */ cs_real_t *restrict v_lv1 = mgd->rhs_vx[(level+1)*na + 6]; cs_real_t *restrict rt_lv1 = mgd->rhs_vx[(level+1)*na + 7]; cs_matrix_vector_multiply(c_matrix, vx_lv1, v_lv1); /* Coefficients for the Krylov iteration */ cs_real_t rho1 = 0, alpha1 = 0; _dot_xy_yz(mg, _c_n_rows, v_lv1, vx_lv1, rhs_lv1, &rho1, &alpha1); cs_real_t ar1 = alpha1 / rho1; /* New residual */ for (cs_lnum_t i = 0; i < _c_n_rows; i++) rt_lv1[i] = rhs_lv1[i] - ar1 * v_lv1[i]; cs_real_t rt_lv1_norm = 1.0, r_lv1_norm = 0.0; if (trsh > 0) _dot_xx_yy(mg, _c_n_rows, rt_lv1, rhs_lv1, &rt_lv1_norm, &r_lv1_norm); /* Free (unmap) arrays that were needed only for the descent phase */ rhs_lv1 = NULL; /* Test for the second coarse resolution */ if (rt_lv1_norm < trsh * trsh * r_lv1_norm) { # pragma omp parallel for if(_c_n_rows > CS_THR_MIN) for (cs_lnum_t i = 0; i < _c_n_rows; i++) vx_lv1[i] = ar1 * vx_lv1[i]; } else { /* Arrays for the (optional) second Krylov iteration */ cs_real_t *restrict vx2_lv1 = mgd->rhs_vx[(level+1)*na + 8]; cs_real_t *restrict w_lv1 = mgd->rhs_vx[(level+1)*na + 9]; # pragma omp parallel for if(_c_n_rows > CS_THR_MIN) for (cs_lnum_t i = 0; i < _c_n_rows; i++) { vx2_lv1[i] = 0.0; w_lv1[i] = 0.0; } t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0); c_cvg = _multigrid_k_cycle(mg, level + 1, lv_names, verbosity, cycle_id, &n_iter, precision, r_norm, initial_residue, residue, rt_lv1, vx2_lv1, aux_size, aux_vectors); t1 = cs_timer_time(); cs_matrix_vector_multiply(c_matrix, vx2_lv1, w_lv1); /* Krylov iteration */ cs_real_t gamma = 0, beta = 0, alpha2 = 0; _dot_xu_xv_xw(mg, _c_n_rows, vx2_lv1, v_lv1, w_lv1, rt_lv1, &gamma, &beta, &alpha2); cs_real_t rho2 = beta - (gamma * gamma) / rho1; cs_real_t ar2 = alpha2 / rho2; cs_real_t ar1_ar2 = ar1 - (gamma / rho1) * ar2; # pragma omp parallel for if(_c_n_rows > CS_THR_MIN) for (cs_lnum_t i = 0; i < _c_n_rows; i++) vx_lv1[i] = ar1_ar2 * vx_lv1[i] + ar2 * vx2_lv1[i]; vx2_lv1 = NULL; w_lv1 = NULL; } t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t1, &t0); v_lv1 = NULL; rt_lv1 = NULL; } /* Ascent ------ */ lv_info = mg->lv_info + level; t0 = cs_timer_time(); /* Arrays that are needed for the ascent phase */ cs_real_t *restrict rb_lv = mgd->rhs_vx[level*na + 1]; cs_real_t *restrict z1_lv = mgd->rhs_vx[level*na + 2]; cs_real_t *restrict z2_lv = mgd->rhs_vx[level*na + 3]; # pragma omp parallel for if(_f_n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++) z2_lv[ii] = 0.0; /* Prolongation */ cs_grid_prolong_row_var(c, f, vx_lv1, z1_lv); t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t0, &t1); lv_info->n_calls[5] += 1; /* New residual */ cs_matrix_vector_multiply(f_matrix, z1_lv, rb_lv); # pragma omp parallel for if(_f_n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < _f_n_rows; ii++) rb_lv[ii] = rt_lv[ii] - rb_lv[ii]; t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[5]), &t1, &t0); t1 = t0; /* Post-smoothing */ mg_sles = &(mgd->sles_hierarchy[level*2+1]); cvg = mg_sles->solve_func(mg_sles->context, lv_names[level*2+1], f_matrix, 0, /* verbosity */ precision*mg->info.precision_mult[1], r_norm_l, &n_iter, &_residue, rb_lv, z2_lv, _aux_r_size*sizeof(cs_real_t), aux_vectors); t0 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[3]), &t1, &t0); lv_info->n_calls[3] += 1; _lv_info_update_stage_iter(lv_info->n_it_as_smoothe, n_iter); *n_equiv_iter += n_iter * f_n_g_rows * denom_n_g_rows_0; if (verbosity > 0) _log_residual(mg, cycle_id, lv_names[level*2 + 1], c_matrix, rb_lv, z2_lv); # pragma omp parallel for if(_f_n_rows > CS_THR_MIN) for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) vx_lv[ii] += z1_lv[ii] + z2_lv[ii]; t1 = cs_timer_time(); cs_timer_counter_add_diff(&(lv_info->t_tot[6]), &t0, &t1); /* Free/unmap arrays of the ascent phase */ rb_lv = NULL; z1_lv = NULL; z2_lv = NULL; /* Free/unmap arrays that were needed for both phases */ rt_lv = NULL; vx_lv1 = NULL; return cvg; } /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */ /*============================================================================ * Public function definitions *============================================================================*/ /*----------------------------------------------------------------------------*/ /*! * \brief Initialize multigrid solver API. */ /*----------------------------------------------------------------------------*/ void cs_multigrid_initialize(void) { } /*----------------------------------------------------------------------------*/ /*! * \brief Finalize multigrid solver API. */ /*----------------------------------------------------------------------------*/ void cs_multigrid_finalize(void) { cs_grid_finalize(); } /*----------------------------------------------------------------------------*/ /*! * \brief Define and associate a multigrid sparse linear system solver * for a given field or equation name. * * If this system did not previously exist, it is added to the list of * "known" systems. Otherwise, its definition is replaced by the one * defined here. * * This is a utility function: if finer control is needed, see * \ref cs_sles_define and \ref cs_multigrid_create. * * Note that this function returns a pointer directly to the multigrid solver * management structure. This may be used to set further options, for * example calling \ref cs_multigrid_set_coarsening_options and * \ref cs_multigrid_set_solver_options. * If needed, \ref cs_sles_find may be used to obtain a pointer to the * matching \ref cs_sles_t container. * * \param[in] f_id associated field id, or < 0 * \param[in] name associated name if f_id < 0, or NULL * \param[in] mg_type type of multigrid algorithm to use * * \return pointer to new multigrid info and context */ /*----------------------------------------------------------------------------*/ cs_multigrid_t * cs_multigrid_define(int f_id, const char *name, cs_multigrid_type_t mg_type) { cs_multigrid_t * mg = cs_multigrid_create(mg_type); cs_sles_t *sc = cs_sles_define(f_id, name, mg, "cs_multigrid_t", cs_multigrid_setup, cs_multigrid_solve, cs_multigrid_free, cs_multigrid_log, cs_multigrid_copy, cs_multigrid_destroy); cs_sles_set_error_handler(sc, cs_multigrid_error_post_and_abort); return mg; } /*----------------------------------------------------------------------------*/ /*! * \brief Create multigrid linear system solver info and context. * * The multigrid variant is an ACM (Additive Corrective Multigrid) method. * * \param[in] mg_type type of multigrid algorithm to use * * \return pointer to new multigrid info and context */ /*----------------------------------------------------------------------------*/ cs_multigrid_t * cs_multigrid_create(cs_multigrid_type_t mg_type) { int ii; cs_multigrid_t *mg; /* Increment number of setups */ BFT_MALLOC(mg, 1, cs_multigrid_t); mg->type = mg_type; mg->subtype = CS_MULTIGRID_MAIN; #if defined(HAVE_MPI) mg->comm = cs_glob_mpi_comm; mg->caller_comm = cs_glob_mpi_comm; mg->caller_n_ranks = cs_glob_n_ranks; if (mg->caller_n_ranks < 2) { mg->comm = MPI_COMM_NULL; mg->caller_comm = cs_glob_mpi_comm; } mg->merge_mean_threshold = 300; mg->merge_glob_threshold = 500; mg->merge_stride = 1; #endif mg->aggregation_limit = 3; mg->coarsening_type = CS_GRID_COARSENING_DEFAULT; mg->n_levels_max = 25; mg->n_g_rows_min = 30; mg->post_row_max = 0; mg->p0p1_relax = 0.; mg->k_cycle_threshold = 0; _multigrid_info_init(&(mg->info)); for (int i = 0; i < 3; i++) mg->lv_mg[i] = NULL; mg->p_mg = NULL; if (mg->type == CS_MULTIGRID_V_CYCLE) { mg->p0p1_relax = 0.95; } else if (mg->type == CS_MULTIGRID_K_CYCLE) { mg->coarsening_type = CS_GRID_COARSENING_SPD_PW; mg->aggregation_limit = 4; mg->n_levels_max = 10; mg->n_g_rows_min = 256; mg->k_cycle_threshold = 0.25; } else if (mg->type == CS_MULTIGRID_K_CYCLE_HPC) { mg->coarsening_type = CS_GRID_COARSENING_SPD_PW; mg->aggregation_limit = 8; mg->n_levels_max = 4; mg->k_cycle_threshold = -1; mg->lv_mg[2] = _multigrid_create_k_cycle_bottom(mg); } mg->pc_precision = 0.0; mg->pc_r_norm = 0.0; mg->n_levels_post = 0; mg->setup_data = NULL; BFT_MALLOC(mg->lv_info, mg->n_levels_max, cs_multigrid_level_info_t); for (ii = 0; ii < mg->n_levels_max; ii++) _multigrid_level_info_init(mg->lv_info + ii); mg->post_row_num = NULL; mg->post_row_rank = NULL; mg->post_name = NULL; mg->cycle_plot = NULL; mg->plot_time_stamp = -1; if (mg_type == CS_MULTIGRID_V_CYCLE) cs_multigrid_set_solver_options (mg, CS_SLES_PCG, CS_SLES_PCG, CS_SLES_PCG, 100, /* n max cycles */ 2, /* n max iter for descent */ 10, /* n max iter for ascent */ 500, 0, 0, 0, /* precond degree */ 1, 1, 1); /* precision multiplier */ else if ( mg->type >= CS_MULTIGRID_K_CYCLE && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) cs_multigrid_set_solver_options (mg, CS_SLES_P_SYM_GAUSS_SEIDEL, CS_SLES_P_SYM_GAUSS_SEIDEL, CS_SLES_PCG, 100, /* n max cycles */ 1, /* n max iter for descent */ 1, /* n max iter for ascent */ 50, /* n max iter for coarse solve */ -1, -1, 0, /* precond degree */ -1, -1, 1); /* precision multiplier */ return mg; } /*----------------------------------------------------------------------------*/ /*! * \brief Destroy multigrid linear system solver info and context. * * \param[in, out] context pointer to multigrid linear solver info * (actual type: cs_multigrid_t **) */ /*----------------------------------------------------------------------------*/ void cs_multigrid_destroy(void **context) { cs_multigrid_t *mg = (cs_multigrid_t *)(*context); if (mg == NULL) return; BFT_FREE(mg->lv_info); if (mg->post_row_num != NULL) { int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1; for (int i = 0; i < n_max_post_levels; i++) if (mg->post_row_num[i] != NULL) BFT_FREE(mg->post_row_num[i]); BFT_FREE(mg->post_row_num); } if (mg->post_row_rank != NULL) { int n_max_post_levels = (int)(mg->info.n_levels[2]) - 1; for (int i = 0; i < n_max_post_levels; i++) if (mg->post_row_rank[i] != NULL) BFT_FREE(mg->post_row_rank[i]); BFT_FREE(mg->post_row_rank); } BFT_FREE(mg->post_name); if (mg->cycle_plot != NULL) cs_time_plot_finalize(&(mg->cycle_plot)); for (int i = 0; i < 3; i++) { if (mg->lv_mg[i] != NULL) cs_multigrid_destroy((void **)(&(mg->lv_mg[i]))); } BFT_FREE(mg); *context = (void *)mg; } /*----------------------------------------------------------------------------*/ /*! * \brief Create multigrid sparse linear system solver info and context * based on existing info and context. * * \param[in] context pointer to reference info and context * (actual type: cs_multigrid_t *) * * \return pointer to newly created solver info object * (actual type: cs_multigrid_t *) */ /*----------------------------------------------------------------------------*/ void * cs_multigrid_copy(const void *context) { cs_multigrid_t *d = NULL; if (context != NULL) { const cs_multigrid_t *c = context; d = cs_multigrid_create(c->type); /* Beginning of cs_multigrid_info_t contains settings, the rest logging */ memcpy(&(d->info), &(c->info), offsetof(cs_multigrid_info_t, n_calls)); /* Same here: settings at beginningof structure */ memcpy(d, c, offsetof(cs_multigrid_t, n_levels_post)); } return d; } /*----------------------------------------------------------------------------*/ /*! * \brief Log multigrid solver info. * * \param[in] context pointer to iterative solver info and context * (actual type: cs_multigrid_t *) * \param[in] log_type log type */ /*----------------------------------------------------------------------------*/ void cs_multigrid_log(const void *context, cs_log_t log_type) { const cs_multigrid_t *mg = context; if (log_type == CS_LOG_SETUP) _multigrid_setup_log(mg); else if (log_type == CS_LOG_PERFORMANCE) _multigrid_performance_log(mg); } /*----------------------------------------------------------------------------*/ /*! * \brief Set multigrid coarsening parameters. * * \param[in, out] mg pointer to multigrid info and context * \param[in] aggregation_limit maximum allowed fine rows * per coarse row * \param[in] coarsening_type coarsening type; * see \ref cs_grid_coarsening_t * \param[in] n_max_levels maximum number of grid levels * \param[in] min_g_rows global number of rows on coarse grids * under which no coarsening occurs * \param[in] p0p1_relax p0/p1 relaxation_parameter * \param[in] postprocess if > 0, postprocess coarsening * (uses coarse row numbers * modulo this value) */ /*----------------------------------------------------------------------------*/ void cs_multigrid_set_coarsening_options(cs_multigrid_t *mg, int aggregation_limit, int coarsening_type, int n_max_levels, cs_gnum_t min_g_rows, double p0p1_relax, int postprocess) { if (mg == NULL) return; mg->aggregation_limit = aggregation_limit; mg->coarsening_type = coarsening_type; mg->n_levels_max = n_max_levels; mg->n_g_rows_min = min_g_rows; mg->post_row_max = postprocess; mg->p0p1_relax = p0p1_relax; } /*----------------------------------------------------------------------------*/ /*! * \brief Set multigrid parameters for associated iterative solvers. * * \param[in, out] mg pointer to multigrid info * and context * \param[in] descent_smoother_type type of smoother for descent * \param[in] ascent_smoother_type type of smoother for ascent * \param[in] coarse_solver_type type of solver for coarsest grid * \param[in] n_max_cycles maximum number of cycles * \param[in] n_max_iter_descent maximum iterations * per descent smoothing * \param[in] n_max_iter_ascent maximum iterations * per ascent smoothing * \param[in] n_max_iter_coarse maximum iterations * per coarsest solution * \param[in] poly_degree_descent preconditioning polynomial degree * for descent phases (0: diagonal) * \param[in] poly_degree_ascent preconditioning polynomial degree * for ascent phases (0: diagonal) * \param[in] poly_degree_coarse preconditioning polynomial degree * for coarse solver (0: diagonal) * \param[in] precision_mult_descent precision multiplier * for descent smoothers (levels >= 1) * \param[in] precision_mult_ascent precision multiplier * for ascent smoothers * \param[in] precision_mult_coarse precision multiplier * for coarsest grid */ /*----------------------------------------------------------------------------*/ void cs_multigrid_set_solver_options(cs_multigrid_t *mg, cs_sles_it_type_t descent_smoother_type, cs_sles_it_type_t ascent_smoother_type, cs_sles_it_type_t coarse_solver_type, int n_max_cycles, int n_max_iter_descent, int n_max_iter_ascent, int n_max_iter_coarse, int poly_degree_descent, int poly_degree_ascent, int poly_degree_coarse, double precision_mult_descent, double precision_mult_ascent, double precision_mult_coarse) { if (mg == NULL) return; cs_multigrid_info_t *info = &(mg->info); info->type[0] = descent_smoother_type; info->type[1] = ascent_smoother_type; info->type[2] = coarse_solver_type; info->n_max_cycles = n_max_cycles; info->n_max_iter[0] = n_max_iter_descent; info->n_max_iter[1] = n_max_iter_ascent; info->n_max_iter[2] = n_max_iter_coarse; info->poly_degree[0] = poly_degree_descent; info->poly_degree[1] = poly_degree_ascent; info->poly_degree[2] = poly_degree_coarse; info->precision_mult[0] = precision_mult_descent; info->precision_mult[1] = precision_mult_ascent; info->precision_mult[2] = precision_mult_coarse; for (int i = 0; i < 3; i++) { switch(info->type[i]) { case CS_SLES_JACOBI: case CS_SLES_P_GAUSS_SEIDEL: case CS_SLES_P_SYM_GAUSS_SEIDEL: info->poly_degree[i] = -1; break; default: break; } } } /*----------------------------------------------------------------------------*/ /*! * \brief Return solver type used on fine mesh. * * \param[in] mg pointer to multigrid info and context * * \return type of smoother for descent (used for fine mesh) */ /*----------------------------------------------------------------------------*/ cs_sles_it_type_t cs_multigrid_get_fine_solver_type(const cs_multigrid_t *mg) { if (mg == NULL) return CS_SLES_N_IT_TYPES; const cs_multigrid_info_t *info = &(mg->info); return info->type[0]; } /*----------------------------------------------------------------------------*/ /*! * \brief Setup multigrid sparse linear equation solver. * * \param[in, out] context pointer to multigrid solver info and context * (actual type: cs_multigrid_t *) * \param[in] name pointer to name of linear system * \param[in] a associated matrix * \param[in] verbosity associated verbosity */ /*----------------------------------------------------------------------------*/ void cs_multigrid_setup(void *context, const char *name, const cs_matrix_t *a, int verbosity) { cs_multigrid_setup_conv_diff(context, name, a, NULL, NULL, verbosity); } /*----------------------------------------------------------------------------*/ /*! * \brief Setup multigrid sparse linear equation solver. * * \param[in, out] context pointer to multigrid solver info and context * (actual type: cs_multigrid_t *) * \param[in] name pointer to name of linear system * \param[in] a associated matrix * \param[in] a_conv associated matrix (convection) * \param[in] a_diff associated matrix (diffusion) * \param[in] verbosity associated verbosity */ /*----------------------------------------------------------------------------*/ void cs_multigrid_setup_conv_diff(void *context, const char *name, const cs_matrix_t *a, const cs_matrix_t *a_conv, const cs_matrix_t *a_diff, int verbosity) { cs_multigrid_t *mg = context; const cs_mesh_t *mesh = cs_glob_mesh; const cs_mesh_quantities_t *mq = cs_glob_mesh_quantities; /* Destroy previous hierarchy if necessary */ if (mg->setup_data != NULL) cs_multigrid_free(mg); /* Initialization */ cs_timer_t t0 = cs_timer_time(); if (verbosity > 1) { if (!(mg->info.is_pc)) { bft_printf(_("\n Setup of solver for linear system \"%s\"\n"), name); cs_matrix_log_info(a, verbosity); } bft_printf(_("\n Construction of grid hierarchy for \"%s\"\n"), name); } /* Build coarse grids hierarchy */ /*------------------------------*/ const cs_lnum_t *diag_block_size = cs_matrix_get_diag_block_size(a); const cs_lnum_t *extra_diag_block_size = cs_matrix_get_extra_diag_block_size(a); cs_grid_t *f = cs_grid_create_from_shared(mesh->n_i_faces, diag_block_size, extra_diag_block_size, (const cs_lnum_2_t *)(mesh->i_face_cells), mq->cell_cen, mq->cell_vol, mq->i_face_normal, a, a_conv, a_diff); cs_multigrid_level_info_t *mg_lv_info = mg->lv_info; cs_timer_t t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg_lv_info->t_tot[0]), &t0, &t1); _setup_hierarchy(mg, name, mesh, f, verbosity); /* Assign to and build hierarchy */ /* Update timers */ t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1); } /*----------------------------------------------------------------------------*/ /*! * \brief Call multigrid sparse linear equation solver. * * \param[in, out] context pointer to multigrid solver info and context * (actual type: cs_multigrid_t *) * \param[in] name pointer to name of linear system * \param[in] a matrix * \param[in] verbosity associated verbosity * \param[in] precision solver precision * \param[in] r_norm residue normalization * \param[out] n_iter number of "equivalent" iterations * \param[out] residue residue * \param[in] rhs right hand side * \param[in, out] vx system solution * \param[in] aux_size size of aux_vectors (in bytes) * \param aux_vectors optional working area * (internal allocation if NULL) * * \return convergence state */ /*----------------------------------------------------------------------------*/ cs_sles_convergence_state_t cs_multigrid_solve(void *context, const char *name, const cs_matrix_t *a, int verbosity, double precision, double r_norm, int *n_iter, double *residue, const cs_real_t *rhs, cs_real_t *vx, size_t aux_size, void *aux_vectors) { cs_timer_t t0, t1; t0 = cs_timer_time(); cs_sles_convergence_state_t cvg = CS_SLES_ITERATING; cs_multigrid_t *mg = context; cs_multigrid_info_t *mg_info = &(mg->info); const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(a); assert(db_size[0] == db_size[1]); cs_lnum_t n_rows = cs_matrix_get_n_rows(a); /* Initialize number of equivalent iterations and residue, check for immediate return, solve sparse linear system using multigrid algorithm. */ *n_iter = 0; unsigned n_cycles = 0; if (mg->setup_data == NULL) { /* Stop solve timer to switch to setup timer */ t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg->info.t_tot[1]), &t0, &t1); /* Setup grid hierarchy */ cs_multigrid_setup(context, name, a, verbosity); /* Restart solve timer */ t0 = cs_timer_time(); } if (mg_info->is_pc == false && verbosity > 1) cs_log_printf(CS_LOG_DEFAULT, _(" RHS norm: %11.4e\n\n"), r_norm); /* Buffer size sufficient to avoid local reallocation for most solvers */ size_t lv_names_size = _level_names_size(name, mg->setup_data->n_levels); size_t _aux_size = lv_names_size + n_rows * 6 * db_size[1] * sizeof(cs_real_t); unsigned char *_aux_buf = aux_vectors; if (_aux_size > aux_size) BFT_MALLOC(_aux_buf, _aux_size, unsigned char); else _aux_size = aux_size; _level_names_init(name, mg->setup_data->n_levels, _aux_buf); const char **lv_names = (const char **)_aux_buf; if (verbosity == 2) /* More detailed headers later if > 2 */ bft_printf(_("Multigrid [%s]:\n"), name); /* Initial residue should be improved, but this is consistent with the legacy (by increment) case */ double initial_residue = -1; *residue = initial_residue; /* not known yet, so be safe */ /* Cycle to solution */ if (mg->type == CS_MULTIGRID_V_CYCLE) { for (n_cycles = 0; cvg == CS_SLES_ITERATING; n_cycles++) { int cycle_id = n_cycles+1; cvg = _multigrid_v_cycle(mg, lv_names, verbosity, cycle_id, n_iter, precision, r_norm, &initial_residue, residue, rhs, vx, _aux_size - lv_names_size, _aux_buf + lv_names_size); } } else if ( mg->type >= CS_MULTIGRID_K_CYCLE && mg->type <= CS_MULTIGRID_K_CYCLE_HPC) { for (n_cycles = 0; n_cycles < (unsigned)mg->info.n_max_cycles && cvg == CS_SLES_ITERATING; n_cycles++) { int cycle_id = n_cycles+1; cvg = _multigrid_k_cycle(mg, 0, lv_names, verbosity, cycle_id, n_iter, precision, r_norm, &initial_residue, residue, rhs, vx, _aux_size - lv_names_size, _aux_buf + lv_names_size); } } if (_aux_buf != aux_vectors) BFT_FREE(_aux_buf); if (verbosity > 0) cs_log_printf(CS_LOG_DEFAULT, "\n"); /* Update statistics */ t1 = cs_timer_time(); /* Update stats on number of iterations (last, min, max, total) */ mg_info->n_cycles[2] += n_cycles; if (mg_info->n_calls[1] > 0) { if (mg_info->n_cycles[0] > n_cycles) mg_info->n_cycles[0] = n_cycles; if (mg_info->n_cycles[1] < n_cycles) mg_info->n_cycles[1] = n_cycles; } else { mg_info->n_cycles[0] = n_cycles; mg_info->n_cycles[1] = n_cycles; } /* Update number of resolutions and timing data */ mg_info->n_calls[1] += 1; cs_timer_counter_add_diff(&(mg->info.t_tot[1]), &t0, &t1); return cvg; } /*----------------------------------------------------------------------------*/ /*! * \brief Free multigrid sparse linear equation solver setup context. * * This function frees resolution-related data, incuding the current * grid hierarchy, but does not free the whole context, * as info used for logging (especially performance data) is maintained. * * \param[in, out] context pointer to multigrid solver info and context * (actual type: cs_multigrid_t *) */ /*----------------------------------------------------------------------------*/ void cs_multigrid_free(void *context) { cs_multigrid_t *mg = context; cs_timer_t t0, t1; /* Initialization */ t0 = cs_timer_time(); /* Free subgrid info first if needed */ for (int i = 0; i < 3; i++) { if (mg->lv_mg[i] != NULL) cs_multigrid_free(mg->lv_mg[i]); } if (mg->setup_data != NULL) { cs_multigrid_setup_data_t *mgd = mg->setup_data; /* Free coarse solution data */ BFT_FREE(mgd->rhs_vx); BFT_FREE(mgd->rhs_vx_buf); /* Destroy solver hierarchy */ for (int i = mgd->n_levels - 1; i > -1; i--) { for (int j = 0; j < 2; j++) { cs_mg_sles_t *mg_sles = &(mgd->sles_hierarchy[i*2+j]); if (mg_sles->context != NULL && mg_sles->destroy_func) mg_sles->destroy_func(&(mg_sles->context)); } } BFT_FREE(mgd->sles_hierarchy); /* Destroy grid hierarchy */ for (int i = mgd->n_levels - 1; i > -1; i--) cs_grid_destroy(mgd->grid_hierarchy + i); BFT_FREE(mgd->grid_hierarchy); /* Destroy peconditioning-only arrays */ BFT_FREE(mgd->pc_name); BFT_FREE(mgd->pc_aux); BFT_FREE(mg->setup_data); } /* Update timers */ t1 = cs_timer_time(); cs_timer_counter_add_diff(&(mg->info.t_tot[0]), &t0, &t1); } /*----------------------------------------------------------------------------*/ /*! * \brief Create a multigrid preconditioner. * * \param[in] mg_type type of multigrid algorithm to use * * \return pointer to newly created preconditioner object. */ /*----------------------------------------------------------------------------*/ cs_sles_pc_t * cs_multigrid_pc_create(cs_multigrid_type_t mg_type) { cs_multigrid_t *mg = _multigrid_pc_create(mg_type); mg->info.is_pc = true; cs_sles_pc_t *pc = cs_sles_pc_define(mg, _multigrid_pc_get_type, _multigrid_pc_setup, _multigrid_pc_tolerance_t, _multigrid_pc_apply, cs_multigrid_free, cs_multigrid_log, cs_multigrid_copy, cs_multigrid_destroy); return pc; } /*----------------------------------------------------------------------------*/ /*! * \brief Error handler for multigrid sparse linear equation solver. * * In case of divergence or breakdown, this error handler outputs * postprocessing data to assist debugging, then aborts the run. * It does nothing in case the maximum iteration count is reached. * \param[in, out] sles pointer to solver object * \param[in] state convergence state * \param[in] a matrix * \param[in] rhs right hand side * \param[in, out] vx system solution * * \return false (do not attempt new solve) */ /*----------------------------------------------------------------------------*/ bool cs_multigrid_error_post_and_abort(cs_sles_t *sles, cs_sles_convergence_state_t state, const cs_matrix_t *a, const cs_real_t rhs[], cs_real_t vx[]) { if (state >= CS_SLES_MAX_ITERATION) return false; const cs_multigrid_t *mg = cs_sles_get_context(sles); const char *name = cs_sles_get_name(sles); cs_multigrid_setup_data_t *mgd = mg->setup_data; if (mgd == NULL) return false; int level = mgd->exit_level; int mesh_id = cs_post_init_error_writer_cells(); int location_id = CS_MESH_LOCATION_CELLS; const cs_range_set_t *rs = NULL; if (mesh_id != 0) { char var_name[32]; int lv_id = 0; cs_real_t *var = NULL, *da = NULL; cs_lnum_t db_size[4] = {1, 1, 1, 1}; cs_lnum_t eb_size[4] = {1, 1, 1, 1}; const cs_grid_t *g = mgd->grid_hierarchy[0]; const cs_lnum_t n_base_rows = cs_grid_get_n_rows(g); const cs_matrix_t *_matrix = NULL; BFT_MALLOC(var, cs_grid_get_n_cols_ext(g), cs_real_t); BFT_MALLOC(da, cs_grid_get_n_cols_ext(g), cs_real_t); /* Output info on main level */ cs_sles_post_error_output_def(name, mesh_id, a, rhs, vx); /* Output diagonal and diagonal dominance for all coarse levels */ for (lv_id = 1; lv_id < (int)(mgd->n_levels); lv_id++) { g = mgd->grid_hierarchy[lv_id]; cs_grid_get_info(g, NULL, NULL, db_size, eb_size, NULL, NULL, NULL, NULL, NULL); _matrix = cs_grid_get_matrix(g); cs_matrix_copy_diagonal(_matrix, da); cs_grid_project_var(g, n_base_rows, da, var); cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var); sprintf(var_name, "Diag_%04d", lv_id); cs_sles_post_output_var(var_name, mesh_id, location_id, CS_POST_WRITER_ERRORS, db_size[1], var); cs_grid_project_diag_dom(g, n_base_rows, var); cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var); sprintf(var_name, "Diag_Dom_%04d", lv_id); cs_sles_post_output_var(var_name, mesh_id, location_id, CS_POST_WRITER_ERRORS, db_size[1], var); } /* Output info on current level if > 0 */ if (level > 0) { cs_lnum_t ii; cs_lnum_t n_cells = 0; cs_lnum_t n_cols_ext = 0; cs_real_t *c_res = NULL; g = mgd->grid_hierarchy[level]; cs_grid_get_info(g, NULL, NULL, db_size, eb_size, NULL, &n_cells, &n_cols_ext, NULL, NULL); cs_grid_project_var(g, n_base_rows, mgd->rhs_vx[level*2], var); cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var); sprintf(var_name, "RHS_%04d", level); cs_sles_post_output_var(var_name, mesh_id, location_id, CS_POST_WRITER_ERRORS, db_size[1], var); cs_grid_project_var(g, n_base_rows, mgd->rhs_vx[level*2+1], var); cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var); sprintf(var_name, "X_%04d", level); cs_sles_post_output_var(var_name, mesh_id, location_id, CS_POST_WRITER_ERRORS, db_size[1], var); /* Compute residual */ BFT_MALLOC(c_res, n_cols_ext*db_size[1], cs_real_t); _matrix = cs_grid_get_matrix(g); cs_matrix_vector_multiply(_matrix, mgd->rhs_vx[level*2+1], c_res); const cs_real_t *c_rhs_lv = mgd->rhs_vx[level*2]; for (ii = 0; ii < n_cells; ii++) { for (cs_lnum_t i = 0; i < db_size[0]; i++) c_res[ii*db_size[1] + i] = fabs(c_res[ii*db_size[1] + i] - c_rhs_lv[ii*db_size[1] + i]); } cs_grid_project_var(g, n_base_rows, c_res, var); cs_range_set_scatter(rs, CS_REAL_TYPE, db_size[1], var, var); BFT_FREE(c_res); sprintf(var_name, "Residual_%04d", level); cs_sles_post_output_var(var_name, mesh_id, location_id, CS_POST_WRITER_ERRORS, db_size[1], var); } cs_post_finalize(); BFT_FREE(da); BFT_FREE(var); } /* Now abort */ const char *error_type[] = {N_("divergence"), N_("breakdown")}; int err_id = (state == CS_SLES_BREAKDOWN) ? 1 : 0; if (level == 0) bft_error(__FILE__, __LINE__, 0, _("algebraic multigrid [%s]: %s after %d cycles:\n" " initial residual: %11.4e; current residual: %11.4e"), name, _(error_type[err_id]), mgd->exit_cycle_id, mgd->exit_initial_residue, mgd->exit_residue); else bft_error(__FILE__, __LINE__, 0, _("algebraic multigrid [%s]: %s after %d cycles\n" " during resolution at level %d:\n" " initial residual: %11.4e; current residual: %11.4e"), name, _(error_type[err_id]), mgd->exit_cycle_id, level, mgd->exit_initial_residue, mgd->exit_residue); return false; } /*----------------------------------------------------------------------------*/ /*! * \brief Set plotting options for multigrid. * * \param[in, out] mg pointer to multigrid info and context * \param[in] base_name base plot name to activate, NULL otherwise * \param[in] use_iteration if true, use iteration as time stamp * otherwise, use wall clock time */ /*----------------------------------------------------------------------------*/ void cs_multigrid_set_plot_options(cs_multigrid_t *mg, const char *base_name, bool use_iteration) { if (mg != NULL) { if (cs_glob_rank_id < 1 && base_name != NULL) { /* Destroy previous plot if options reset */ if (mg->cycle_plot != NULL) cs_time_plot_finalize(&(mg->cycle_plot)); /* Create new plot */ cs_file_mkdir_default("monitoring"); const char *probe_names[] = {base_name}; mg->cycle_plot = cs_time_plot_init_probe(base_name, "monitoring/residue_", CS_TIME_PLOT_CSV, use_iteration, -1., /* force flush */ 0, /* no buffer */ 1, /* n_probes */ NULL, /* probe_list */ NULL, /* probe_coords */ probe_names); if (use_iteration) mg->plot_time_stamp = 0; } } } /*----------------------------------------------------------------------------*/ /*! * \brief Query the global multigrid parameters for parallel grid merging. * * \param[in] mg pointer to multigrid info and context * \param[out] rank_stride number of ranks over which merging * takes place, or NULL * \param[out] rows_mean_threshold mean number of rows under which merging * should be applied, or NULL * \param[out] rows_glob_threshold global number of rows under which * merging should be applied, or NULL */ /*----------------------------------------------------------------------------*/ void cs_multigrid_get_merge_options(const cs_multigrid_t *mg, int *rank_stride, int *rows_mean_threshold, cs_gnum_t *rows_glob_threshold) { #if defined(HAVE_MPI) if (rank_stride != NULL) *rank_stride = mg->merge_stride; if (rows_mean_threshold != NULL) *rows_mean_threshold = mg->merge_mean_threshold; if (rows_glob_threshold != NULL) *rows_glob_threshold = mg->merge_glob_threshold; #else if (rank_stride != NULL) *rank_stride = 0; if (rows_mean_threshold != NULL) *rows_mean_threshold = 0; if (rows_glob_threshold != NULL) *rows_glob_threshold = 0; #endif } /*----------------------------------------------------------------------------*/ /*! * \brief Set global multigrid parameters for parallel grid merging behavior. * * \param[in, out] mg pointer to multigrid info and context * \param[in] rank_stride number of ranks over which merging * takes place * \param[in] rows_mean_threshold mean number of rows under which * merging should be applied * \param[in] rows_glob_threshold global number of rows under which * merging should be applied */ /*----------------------------------------------------------------------------*/ void cs_multigrid_set_merge_options(cs_multigrid_t *mg, int rank_stride, int rows_mean_threshold, cs_gnum_t rows_glob_threshold) { #if defined(HAVE_MPI) mg->merge_stride = rank_stride; if (mg->merge_stride > cs_glob_n_ranks) mg->merge_stride = cs_glob_n_ranks; mg->merge_mean_threshold = rows_mean_threshold; mg->merge_glob_threshold = rows_glob_threshold; #endif } /*----------------------------------------------------------------------------*/ END_C_DECLS