1 /*============================================================================
2  * Grid connectivity and data used for multigrid coarsening
3  * and associated matrix construction.
4  *============================================================================*/
5 
6 /*
7   This file is part of Code_Saturne, a general-purpose CFD tool.
8 
9   Copyright (C) 1998-2021 EDF S.A.
10 
11   This program is free software; you can redistribute it and/or modify it under
12   the terms of the GNU General Public License as published by the Free Software
13   Foundation; either version 2 of the License, or (at your option) any later
14   version.
15 
16   This program is distributed in the hope that it will be useful, but WITHOUT
17   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
19   details.
20 
21   You should have received a copy of the GNU General Public License along with
22   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
23   Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 */
25 
26 /*----------------------------------------------------------------------------*/
27 
28 #include "cs_defs.h"
29 
30 /*----------------------------------------------------------------------------
31  * Standard C library headers
32  *----------------------------------------------------------------------------*/
33 
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40 
41 #if defined(__STDC_VERSION__)      /* size_t */
42 #if (__STDC_VERSION__ == 199901L)
43 #    include <stddef.h>
44 #  else
45 #    include <stdlib.h>
46 #  endif
47 #else
48 #include <stdlib.h>
49 #endif
50 
51 #if defined(HAVE_MPI)
52 #include <mpi.h>
53 #endif
54 
55 /*----------------------------------------------------------------------------
56  * Local headers
57  *----------------------------------------------------------------------------*/
58 
59 #include "bft_mem.h"
60 #include "bft_error.h"
61 #include "bft_printf.h"
62 
63 #include "cs_base.h"
64 #include "cs_halo.h"
65 #include "cs_halo_perio.h"
66 #include "cs_log.h"
67 #include "cs_matrix.h"
68 #include "cs_matrix_default.h"
69 #include "cs_matrix_tuning.h"
70 #include "cs_matrix_util.h"
71 #include "cs_order.h"
72 #include "cs_prototypes.h"
73 #include "cs_sles.h"
74 #include "cs_sort.h"
75 
76 #include "fvm_defs.h"
77 
78 /*----------------------------------------------------------------------------
79  *  Header for the current file
80  *----------------------------------------------------------------------------*/
81 
82 #include "cs_grid.h"
83 
84 /*----------------------------------------------------------------------------*/
85 
86 BEGIN_C_DECLS
87 
88 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
89 
90 /*=============================================================================
91  * Local Macro Definitions
92  *============================================================================*/
93 
94 #define EPZERO  1.E-12
95 
96 #if !defined(HUGE_VAL)
97 #define HUGE_VAL  1.E+12
98 #endif
99 
100 /*=============================================================================
101  * Local Type Definitions
102  *============================================================================*/
103 
104 /*----------------------------------------------------------------------------
105  * Local Structure Definitions
106  *----------------------------------------------------------------------------*/
107 
108 /* Structure associated with grid */
109 /*--------------------------------*/
110 
111 struct _cs_grid_t {
112 
113   int                 level;        /* Level in multigrid hierarchy */
114 
115   bool                conv_diff;    /* true if convection/diffusion case,
116                                        false otherwhise */
117   bool                symmetric;    /* Symmetric matrix coefficients
118                                        indicator */
119 
120   cs_lnum_t           db_size[4];   /* Block sizes for diagonal */
121   cs_lnum_t           eb_size[4];   /* Block sizes for extra diagonal */
122 
123   cs_gnum_t           n_g_rows;     /* Global number of rows */
124 
125   cs_lnum_t           n_rows;       /* Local number of rows */
126   cs_lnum_t           n_cols_ext;   /* Local number of participating cells
127                                        (cells + ghost cells sharing a face) */
128 
129   cs_lnum_t           n_elts_r[2];  /* Size of array used for restriction
130                                        operations ({n_rows, n_cols_ext} when
131                                        no grid merging has taken place) */
132 
133   /* Grid hierarchy information */
134 
135   const struct _cs_grid_t  *parent; /* Pointer to parent (finer) grid */
136 
137   /* Connectivity information */
138 
139   cs_lnum_t           n_faces;      /* Local number of faces */
140   const cs_lnum_2_t  *face_cell;    /* Face -> cells connectivity (1 to n) */
141   cs_lnum_2_t        *_face_cell;   /* Face -> cells connectivity
142                                        (private array) */
143 
144   /* Restriction from parent to current level */
145 
146   cs_lnum_t          *coarse_row;   /* Fine -> coarse row connectivity;
147                                        size: parent n_cols_ext */
148   cs_lnum_t          *coarse_face;  /* Fine -> coarse face connectivity
149                                        (1 to n, signed:
150                                        = 0 fine face inside coarse cell
151                                        > 0 orientation same as parent
152                                        < 0 orientation opposite as parent);
153                                        size: parent n_faces */
154 
155   /* Geometric data */
156 
157   cs_real_t         relaxation;     /* P0/P1 relaxation parameter */
158   const cs_real_t  *cell_cen;       /* Cell center (shared) */
159   cs_real_t        *_cell_cen;      /* Cell center (private) */
160 
161   const cs_real_t  *cell_vol;       /* Cell volume (shared) */
162   cs_real_t        *_cell_vol;      /* Cell volume (private) */
163 
164   const cs_real_t  *face_normal;    /* Surface normal of internal faces.
165                                        (shared; L2 norm = face area) */
166   cs_real_t        *_face_normal;   /* Surface normal of internal faces.
167                                        (private; L2 norm = face area) */
168 
169   /* Parallel / periodic halo */
170 
171   const cs_halo_t  *halo;           /* Halo for this connectivity (shared) */
172   cs_halo_t        *_halo;          /* Halo for this connectivity (private) */
173 
174   /* Matrix-related data */
175 
176   const cs_real_t  *da;             /* Diagonal (shared) */
177   cs_real_t        *_da;            /* Diagonal (private) */
178   const cs_real_t  *da_conv;        /* Diagonal (shared) */
179   cs_real_t        *_da_conv;       /* Diagonal (private) */
180   const cs_real_t  *da_diff;        /* Diagonal (shared) */
181   cs_real_t        *_da_diff;       /* Diagonal (private) */
182 
183   const cs_real_t  *xa;             /* Extra-diagonal (shared) */
184   cs_real_t        *_xa;            /* Extra-diagonal (private) */
185   const cs_real_t  *xa_conv;        /* Extra-diagonal (shared) */
186   cs_real_t        *_xa_conv;       /* Extra-diagonal (private) */
187   const cs_real_t  *xa_diff;        /* Extra-diagonal (shared) */
188   cs_real_t        *_xa_diff;       /* Extra-diagonal (private) */
189 
190   const cs_real_t  *xa0;            /* Symmetrized extra-diagonal (shared) */
191   cs_real_t        *_xa0;           /* Symmetrized extra-diagonal (private) */
192   const cs_real_t  *xa0_diff;       /* Symmetrized extra-diagonal (shared) */
193   cs_real_t        *_xa0_diff;      /* Symmetrized extra-diagonal (private) */
194 
195   cs_real_t        *xa0ij;
196 
197   cs_matrix_structure_t   *matrix_struct;  /* Associated matrix structure */
198   const cs_matrix_t       *matrix;         /* Associated matrix (shared) */
199   cs_matrix_t             *_matrix;        /* Associated matrix (private) */
200 
201 #if defined(HAVE_MPI)
202 
203   /* Additional fields to allow merging grids */
204 
205   int               merge_sub_root;    /* sub-root when merging */
206   int               merge_sub_rank;    /* sub-rank when merging
207                                           (0 <= sub-rank < merge_sub_size) */
208   int               merge_sub_size;    /* current number of merged ranks
209                                           for this subset */
210   int               merge_stride;      /* total number of ranks over which
211                                           merging occurred at previous levels */
212   int               next_merge_stride; /* total number of ranks over which
213                                           merging occurred at current level */
214 
215   cs_lnum_t        *merge_cell_idx;    /* start cell_id for each sub-rank
216                                           when merge_sub_rank = 0
217                                           (size: merge_size + 1) */
218 
219   int               n_ranks;           /* Number of active ranks */
220   MPI_Comm          comm;              /* Associated communicator */
221 
222 #endif
223 };
224 
225 /* Structure associated with traversal and update of graph */
226 /*---------------------------------------------------------*/
227 
228 typedef struct _cs_graph_m_ptr_t {
229 
230   cs_lnum_t           m_min;        /* Minimum local number of columns */
231   cs_lnum_t           m_max;        /* Maximum local number of columns */
232 
233   cs_lnum_t          *m_head;       /* pointer to head of rows list for each
234                                        value of m */
235 
236   cs_lnum_t          *next;         /* pointer to next row */
237   cs_lnum_t          *prev;         /* pointer to previous row */
238 
239 } cs_graph_m_ptr_t;
240 
241 /*============================================================================
242  *  Global variables
243  *============================================================================*/
244 
245 cs_real_t _penalization_threshold = 1e4;
246 
247  /* Threshold under which diagonal dominant rows are ignored in the
248     aggregation scheme (assuming that dominance allows strong enough
249     convergence for those rows by itself). Y Notay recommends using
250     a value of 5. */
251 
252 cs_real_t _dd_threshold_pw = 5;
253 
254 /* Names for coarsening options */
255 
256 const char *cs_grid_coarsening_type_name[]
257   = {N_("default"),
258      N_("SPD, diag/extra-diag ratio based"),
259      N_("SPD, max extra-diag ratio based"),
260      N_("SPD, (multiple) pairwise aggregation"),
261      N_("convection + diffusion")};
262 
263 /* Select tuning options */
264 
265 static int _grid_tune_max_level = 0;
266 static int *_grid_tune_max_fill_level = NULL;
267 static cs_matrix_variant_t **_grid_tune_variant = NULL;
268 
269 /*============================================================================
270  * Private function definitions
271  *============================================================================*/
272 
273 /*----------------------------------------------------------------------------*/
274 /*!
275  * \brief Binary search for a given local id in a given array of
276  *        ordered local ids, when the id might not be present
277  *
278  * We assume the id is present in the array.
279  *
280  * \param[in]  l_id_array size  array_size
281  * \param[in]  l_id             local id to search for
282  * \param[in]  l_id_array       ordered unique local ids array
283  *
284  * \return  index of l_id in l_id_array, or -1 if not found
285  */
286 /*----------------------------------------------------------------------------*/
287 
288 static inline cs_lnum_t
_l_id_binary_search(cs_lnum_t l_id_array_size,cs_lnum_t l_id,const cs_lnum_t l_id_array[])289 _l_id_binary_search(cs_lnum_t        l_id_array_size,
290                     cs_lnum_t        l_id,
291                     const cs_lnum_t  l_id_array[])
292 {
293   if (l_id_array_size < 1)
294     return -1;
295 
296   cs_lnum_t start_id = 0;
297   cs_lnum_t end_id = l_id_array_size - 1;
298   cs_lnum_t mid_id = (end_id -start_id) / 2;
299   while (start_id < end_id) {
300     if (l_id_array[mid_id] < l_id)
301       start_id = mid_id + 1;
302     else if (l_id_array[mid_id] > l_id)
303       end_id = mid_id - 1;
304     else
305       break;
306     mid_id = start_id + ((end_id -start_id) / 2);
307   }
308   if (l_id_array[mid_id] != l_id)
309     mid_id = -1;
310 
311   return mid_id;
312 }
313 
314 /*----------------------------------------------------------------------------
315  * Reduce block values to a single equivalent value for computation
316  * of aggregation criteria.
317  *
318  * parameters:
319  *   n_blocs   <-- number of blocks
320  *   b_size    <-- block dimensions
321  *   b_vals    <-- block values
322  *   b_vals    --> scalar values
323  *----------------------------------------------------------------------------*/
324 
325 static void
_reduce_block(cs_lnum_t n_blocks,const cs_lnum_t b_size[4],const cs_real_t b_vals[],cs_real_t s_vals[])326 _reduce_block(cs_lnum_t         n_blocks,
327               const cs_lnum_t   b_size[4],
328               const cs_real_t   b_vals[],
329               cs_real_t         s_vals[])
330 {
331   const cs_real_t b_div = 1.0 / b_size[0];
332 
333 # pragma omp parallel for if(n_blocks > CS_THR_MIN)
334   for (cs_lnum_t i = 0; i < n_blocks; i++) {
335     cs_real_t s = 0;
336     for (cs_lnum_t j = 0; j < b_size[0]; j++)
337       s += b_vals[i*b_size[3] + b_size[2]*j + j];
338     s_vals[i] = s * b_div;
339   }
340 }
341 
342 /*----------------------------------------------------------------------------
343  * Allocate empty grid structure
344  *
345  * - Periodic faces are not handled yet
346  *
347  * returns:
348  *   empty grid structure
349  *----------------------------------------------------------------------------*/
350 
351 static cs_grid_t *
_create_grid(void)352 _create_grid(void)
353 {
354   cs_grid_t *g;
355 
356   BFT_MALLOC(g, 1, cs_grid_t);
357 
358   g->conv_diff = false;
359   g->symmetric = false;
360 
361   g->db_size[0] = 1;
362   g->db_size[1] = 1;
363   g->db_size[2] = 1;
364   g->db_size[3] = 1;
365 
366   g->eb_size[0] = 1;
367   g->eb_size[1] = 1;
368   g->eb_size[2] = 1;
369   g->eb_size[3] = 1;
370 
371   g->level = 0;
372 
373   g->n_g_rows = 0;
374   g->n_rows = 0;
375   g->n_cols_ext = 0;
376 
377   g->n_faces = 0;
378 
379   g->n_elts_r[0] = 0;
380   g->n_elts_r[1] = 0;
381 
382   g->parent = NULL;
383   g->conv_diff = false;
384 
385   g->relaxation = 0;
386 
387   g->face_cell = NULL;
388   g->_face_cell = NULL;
389 
390   g->coarse_row = NULL;
391   g->coarse_face = NULL;
392 
393   g->cell_cen = NULL;
394   g->_cell_cen = NULL;
395   g->cell_vol = NULL;
396   g->_cell_vol = NULL;
397   g->face_normal = NULL;
398   g->_face_normal = NULL;
399 
400   g->halo = NULL;
401   g->_halo = NULL;
402 
403   g->da = NULL;
404   g->_da = NULL;
405   g->da_conv = NULL;
406   g->_da_conv = NULL;
407   g->da_diff = NULL;
408   g->_da_diff = NULL;
409   g->xa = NULL;
410   g->_xa = NULL;
411   g->xa_conv = NULL;
412   g->_xa_conv = NULL;
413   g->xa_diff = NULL;
414   g->_xa_diff = NULL;
415   g->xa0 = NULL;
416   g->_xa0 = NULL;
417   g->xa0_diff = NULL;
418   g->_xa0_diff = NULL;
419 
420   g->xa0ij = NULL;
421 
422   g->matrix_struct = NULL;
423   g->matrix = NULL;
424   g->_matrix = NULL;
425 
426 #if defined(HAVE_MPI)
427 
428   g->merge_sub_root = 0;
429   g->merge_sub_rank = 0;
430   g->merge_sub_size = 1;
431   g->merge_stride = 0;
432   g->next_merge_stride = 1;
433 
434   g->merge_cell_idx = NULL;
435 
436   g->n_ranks = cs_glob_n_ranks;
437   g->comm = cs_glob_mpi_comm;
438 
439 #endif
440   return g;
441 }
442 
443 /*----------------------------------------------------------------------------
444  * Initialize coarse grid from fine grid
445  *
446  * This creates au quasi-empty grid structure, with only symmetry and
447  * level information initialized, and coarsening array allocated and
448  * zeroed.
449  *
450  * After this function is called, the coarsening array must be determined,
451  * then _coarsen must be called to complete the coarse grid.
452  *
453  * parameters:
454  *   f <-- Pointer to fine (parent) grid structure
455  *
456  * returns:
457  *   coarse grid structure
458  *----------------------------------------------------------------------------*/
459 
460 static cs_grid_t *
_coarse_init(const cs_grid_t * f)461 _coarse_init(const cs_grid_t *f)
462 {
463   cs_lnum_t ii;
464   int i;
465   cs_grid_t *c = NULL;
466 
467   c = _create_grid();
468 
469   c->parent = f;
470 
471   c->level = f->level + 1;
472   c->symmetric = f->symmetric;
473   c->conv_diff = f->conv_diff;
474   for (i = 0; i < 4; i++)
475     c->db_size[i] = f->db_size[i];
476 
477   for (i = 0; i < 4; i++)
478     c->eb_size[i] = f->eb_size[i];
479 
480   BFT_MALLOC(c->coarse_row, f->n_cols_ext, cs_lnum_t);
481 
482 # pragma omp parallel for if(f->n_cols_ext > CS_THR_MIN)
483   for (ii = 0; ii < f->n_cols_ext; ii++)
484     c->coarse_row[ii] = -1;
485 
486 #if defined(HAVE_MPI)
487   c->merge_stride = f->merge_stride;
488   c->next_merge_stride = f->next_merge_stride;
489   c->n_ranks = f->n_ranks;
490   c->comm = f->comm;
491 #endif
492 
493   return c;
494 }
495 
496 /*----------------------------------------------------------------------------
497  * Log aggregation information using the face to cells adjacency.
498  *
499  * parameters:
500  *   f         <-- Fine grid structure
501  *   c         <-- Coarse grid structure
502  *   verbosity <-- verbosity level
503  *----------------------------------------------------------------------------*/
504 
505 static void
_aggregation_stats_log(const cs_grid_t * f,const cs_grid_t * c,int verbosity)506 _aggregation_stats_log(const cs_grid_t  *f,
507                        const cs_grid_t  *c,
508                        int               verbosity)
509 {
510   const cs_lnum_t f_n_rows = f->n_rows;
511   const cs_lnum_t c_n_rows = c->n_rows;
512 
513   cs_gnum_t c_n_rows_g = c->n_rows;
514 
515   const cs_lnum_t *c_coarse_row = (const cs_lnum_t *)(c->coarse_row);
516 
517   cs_lnum_t *c_aggr_count;
518 
519   BFT_MALLOC(c_aggr_count, c_n_rows, cs_lnum_t);
520   for (cs_lnum_t i = 0; i < c_n_rows; i++)
521     c_aggr_count[i] = 0;
522 
523   for (cs_lnum_t i = 0; i < f_n_rows; i++) {
524     cs_lnum_t j = c_coarse_row[i];
525     if (j > -1)
526       c_aggr_count[j] += 1;
527   }
528 
529   cs_lnum_t aggr_min = f->n_rows, aggr_max = 0;
530   cs_gnum_t aggr_tot = 0;
531 
532   for (cs_lnum_t i = 0; i < c_n_rows; i++) {
533     aggr_min = CS_MIN(aggr_min, c_aggr_count[i]);
534     aggr_max = CS_MAX(aggr_max, c_aggr_count[i]);
535     aggr_tot += c_aggr_count[i];
536   }
537 
538 #if defined(HAVE_MPI)
539   if (f->comm != MPI_COMM_NULL) {
540     cs_lnum_t _aggr_min = aggr_min, _aggr_max = aggr_max;
541     cs_gnum_t _tot[2] = {aggr_tot, c_n_rows_g}, tot[2] = {0, 0};
542     MPI_Allreduce(&_aggr_min, &aggr_min, 1, CS_MPI_LNUM, MPI_MIN, f->comm);
543     MPI_Allreduce(&_aggr_max, &aggr_max, 1, CS_MPI_LNUM, MPI_MAX, f->comm);
544     MPI_Allreduce(&_tot, &tot, 2, CS_MPI_GNUM, MPI_SUM, f->comm);
545     aggr_tot = tot[0];
546     c_n_rows_g = tot[1];
547   }
548 #endif
549 
550   bft_printf("       aggregation min = %ld; max = %ld; mean = %8.2f\n",
551              (long)aggr_min, (long)aggr_max,
552              (double)aggr_tot/(double)c_n_rows_g);
553 
554   cs_lnum_t aggr_count = aggr_max - aggr_min + 1;
555 
556   if (verbosity > 4 && c_n_rows_g > 0 && aggr_count > 0) {
557 
558     /* Histogram */
559 
560     bft_printf("       histogram\n");
561 
562     cs_lnum_t *histogram;
563     BFT_MALLOC(histogram, aggr_count, cs_lnum_t);
564     for (cs_lnum_t i = 0; i < aggr_count; i++)
565       histogram[i] = 0;
566     for (cs_lnum_t ic = 0; ic < c_n_rows; ic++) {
567       for (cs_lnum_t i = 0; i < aggr_count; i++) {
568         if (c_aggr_count[ic] == aggr_min + i)
569           histogram[i] += 1;
570       }
571     }
572 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
573     if (f->comm != MPI_COMM_NULL)
574       MPI_Allreduce(MPI_IN_PLACE, histogram, aggr_count, CS_MPI_LNUM, MPI_SUM,
575                     f->comm);
576 #endif
577     for (cs_lnum_t i = 0; i < aggr_count; i++) {
578       double epsp = 100. * histogram[i] / c_n_rows_g;
579       bft_printf("         aggregation %ld = %8.2f %%\n",
580                  (long)(aggr_min + i), epsp);
581     }
582     BFT_FREE(histogram);
583   }
584 
585   BFT_FREE(c_aggr_count);
586 }
587 
588 /*----------------------------------------------------------------------------
589  * Build fine -> coarse face connectivity and coarse grid face->cell
590  * connectivity (also determining the number of coarse faces) from
591  * fine grid and cell coarsening array.
592  *
593  * Also, it is the caller's responsibility to free arrays coarse_face[]
594  * and coarse_face_cell_id[] when they are no longer used.
595  *
596  * parameters:
597  *   fine             <-- Pointer to fine (parent) grid structure
598  *   coarse_row       <-- Fine -> coarse row id
599  *                        size: fine->n_cols_ext
600  *   n_coarse_rows    <-- Number of coarse cells
601  *   n_coarse_faces   --> Number of coarse faces
602  *   coarse_face      --> Fine -> coarse face connectivity (1 to n, signed:
603  *                        = 0 fine face inside coarse cell
604  *                        > 0 orientation same as parent
605  *                        < 0 orientation opposite as parent);
606  *                        size: fine->n_faces
607  *   coarse_face_cell --> coarse face -> cell connectivity
608  *                        size: n_coarse_faces
609  *----------------------------------------------------------------------------*/
610 
611 static void
_coarsen_faces(const cs_grid_t * fine,const cs_lnum_t * restrict coarse_row,cs_lnum_t n_coarse_rows,cs_lnum_t * n_coarse_faces,cs_lnum_t ** coarse_face,cs_lnum_2_t ** coarse_face_cell)612 _coarsen_faces(const cs_grid_t    *fine,
613                const cs_lnum_t    *restrict coarse_row,
614                cs_lnum_t           n_coarse_rows,
615                cs_lnum_t          *n_coarse_faces,
616                cs_lnum_t         **coarse_face,
617                cs_lnum_2_t       **coarse_face_cell)
618 {
619   cs_lnum_t  ii, jj, face_id, connect_size;
620 
621   cs_lnum_t  *restrict c_cell_cell_cnt = NULL;
622   cs_lnum_t  *restrict c_cell_cell_idx = NULL;
623   cs_lnum_t  *restrict c_cell_cell_id = NULL;
624   cs_lnum_t  *restrict c_cell_cell_face = NULL;
625 
626   cs_lnum_t    *restrict _coarse_face = NULL;
627   cs_lnum_2_t  *restrict _c_face_cell = NULL;
628 
629   cs_lnum_t   c_n_faces = 0;
630 
631   const cs_lnum_t c_n_cells = n_coarse_rows;
632   const cs_lnum_t f_n_faces = fine->n_faces;
633   const cs_lnum_2_t *restrict f_face_cell = fine->face_cell;
634 
635   /* Pre-allocate return values
636      (coarse face->cell connectivity is over-allocated) */
637 
638   BFT_MALLOC(_coarse_face, f_n_faces, cs_lnum_t);
639   BFT_MALLOC(_c_face_cell, f_n_faces, cs_lnum_2_t);
640 
641 # pragma omp parallel for if(f_n_faces > CS_THR_MIN)
642   for (face_id = 0; face_id < f_n_faces; face_id++)
643     _coarse_face[face_id] = 0;
644 
645   /* Allocate index */
646 
647   BFT_MALLOC(c_cell_cell_idx, c_n_cells + 1, cs_lnum_t);
648 
649 # pragma omp parallel for if(c_n_cells > CS_THR_MIN)
650   for (ii = 0; ii <= c_n_cells; ii++)
651     c_cell_cell_idx[ii] = 0;
652 
653   for (face_id = 0; face_id < f_n_faces; face_id++) {
654 
655     ii = coarse_row[f_face_cell[face_id][0]];
656     jj = coarse_row[f_face_cell[face_id][1]];
657 
658     if (ii < jj)
659       c_cell_cell_idx[ii+1] += 1;
660     else if (ii > jj)
661       c_cell_cell_idx[jj+1] += 1;
662 
663   }
664 
665   /* Transform to index */
666 
667   for (ii = 0; ii < c_n_cells; ii++)
668     c_cell_cell_idx[ii+1] += c_cell_cell_idx[ii];
669 
670   BFT_MALLOC(c_cell_cell_id,
671              c_cell_cell_idx[c_n_cells],
672              cs_lnum_t);
673 
674   BFT_MALLOC(c_cell_cell_face,
675              c_cell_cell_idx[c_n_cells],
676              cs_lnum_t);
677 
678   connect_size = c_cell_cell_idx[c_n_cells];
679 
680 # pragma omp parallel for if(connect_size > CS_THR_MIN)
681   for (ii = 0; ii < connect_size; ii++)
682     c_cell_cell_face[ii] = 0;
683 
684   /* Use a counter for array population, as array will usually
685      not be fully populated */
686 
687   BFT_MALLOC(c_cell_cell_cnt, c_n_cells, cs_lnum_t);
688 
689 # pragma omp parallel for if(c_n_cells > CS_THR_MIN)
690   for (ii = 0; ii < c_n_cells; ii++)
691     c_cell_cell_cnt[ii] = 0;
692 
693   /* Build connectivity */
694 
695   c_n_faces = 0;
696 
697   for (face_id = 0; face_id < f_n_faces; face_id++) {
698 
699     cs_lnum_t kk, start_id, end_id;
700     cs_lnum_t sign = 1;
701 
702     ii = coarse_row[f_face_cell[face_id][0]];
703     jj = coarse_row[f_face_cell[face_id][1]];
704 
705     if (ii != jj && ii > -1 && jj > -1) {
706 
707       if (ii > jj) {
708         sign = -1;
709         kk = ii;
710         ii = jj;
711         jj = kk;
712       }
713 
714       start_id = c_cell_cell_idx[ii];
715       end_id   = c_cell_cell_idx[ii] + c_cell_cell_cnt[ii];
716 
717       for (kk = start_id; kk < end_id; kk++) {
718         if (c_cell_cell_id[kk] == jj)
719           break;
720       }
721       if (kk == end_id) {
722         c_cell_cell_id[kk] = jj;
723         c_cell_cell_face[kk] = c_n_faces + 1;
724         _c_face_cell[c_n_faces][0] = ii;
725         _c_face_cell[c_n_faces][1] = jj;
726         c_n_faces++;
727         c_cell_cell_cnt[ii] += 1;
728       }
729       _coarse_face[face_id] = sign*c_cell_cell_face[kk];
730 
731     }
732 
733   }
734 
735   /* Free temporary arrays */
736 
737   BFT_FREE(c_cell_cell_cnt);
738   BFT_FREE(c_cell_cell_face);
739   BFT_FREE(c_cell_cell_id);
740   BFT_FREE(c_cell_cell_idx);
741 
742   /* Set return values */
743 
744   BFT_REALLOC(_c_face_cell, c_n_faces, cs_lnum_2_t);
745 
746   *n_coarse_faces = c_n_faces;
747   *coarse_face = _coarse_face;
748   *coarse_face_cell = _c_face_cell;
749 }
750 
751 /*----------------------------------------------------------------------------
752  * Send prepared coarsening ids (based on halo send lists) and receive
753  * similar values to a cell coarsening array halo.
754  *
755  * parameters:
756  *   halo          <->  pointer to halo structure
757  *   coarse_send   <->  values defined on halo send lists
758  *   coarse_row    <->  pointer to local row coarsening array
759  *                      whose halo values are to be received
760  *----------------------------------------------------------------------------*/
761 
762 static void
_exchange_halo_coarsening(const cs_halo_t * halo,cs_lnum_t coarse_send[],cs_lnum_t coarse_row[])763 _exchange_halo_coarsening(const cs_halo_t  *halo,
764                           cs_lnum_t         coarse_send[],
765                           cs_lnum_t         coarse_row[])
766 {
767   cs_lnum_t  i, start, length;
768 
769   int local_rank_id = (cs_glob_n_ranks == 1) ? 0 : -1;
770 
771 #if defined(HAVE_MPI)
772 
773   if (cs_glob_n_ranks > 1) {
774 
775     int rank_id;
776     int  request_count = 0;
777     const int  local_rank = cs_glob_rank_id;
778 
779     MPI_Request _request[128];
780     MPI_Request *request = _request;
781     MPI_Status _status[128];
782     MPI_Status *status = _status;
783 
784     /* Allocate if necessary */
785 
786     if (halo->n_c_domains*2 > 128) {
787       BFT_MALLOC(request, halo->n_c_domains*2, MPI_Request);
788       BFT_MALLOC(status, halo->n_c_domains*2, MPI_Status);
789     }
790 
791     /* Receive data from distant ranks */
792 
793     for (rank_id = 0; rank_id < halo->n_c_domains; rank_id++) {
794 
795       start = halo->index[2*rank_id];
796       length = halo->index[2*rank_id + 2] - halo->index[2*rank_id];
797 
798       if (halo->c_domain_rank[rank_id] != local_rank) {
799 
800         MPI_Irecv(coarse_row + halo->n_local_elts + start,
801                   length,
802                   CS_MPI_LNUM,
803                   halo->c_domain_rank[rank_id],
804                   halo->c_domain_rank[rank_id],
805                   cs_glob_mpi_comm,
806                   &(request[request_count++]));
807 
808       }
809       else
810         local_rank_id = rank_id;
811 
812     }
813 
814     /* We wait for posting all receives (often recommended) */
815 
816     // MPI_Barrier(comm);
817 
818     /* Send data to distant ranks */
819 
820     for (rank_id = 0; rank_id < halo->n_c_domains; rank_id++) {
821 
822       /* If this is not the local rank */
823 
824       if (halo->c_domain_rank[rank_id] != local_rank) {
825 
826         start = halo->send_index[2*rank_id];
827         length =  halo->send_index[2*rank_id + 2] - halo->send_index[2*rank_id];
828 
829         MPI_Isend(coarse_send + start,
830                   length,
831                   CS_MPI_LNUM,
832                   halo->c_domain_rank[rank_id],
833                   local_rank,
834                   cs_glob_mpi_comm,
835                   &(request[request_count++]));
836 
837       }
838 
839     }
840 
841     /* Wait for all exchanges */
842 
843     MPI_Waitall(request_count, request, status);
844 
845     if (request != _request) {
846       BFT_FREE(request);
847       BFT_FREE(status);
848     }
849   }
850 
851 #endif /* defined(HAVE_MPI) */
852 
853   /* Copy local values in case of periodicity */
854 
855   if (halo->n_transforms > 0) {
856 
857     if (local_rank_id > -1) {
858 
859       cs_lnum_t *_coarse_row
860         = coarse_row + halo->n_local_elts + halo->index[2*local_rank_id];
861 
862       start = halo->send_index[2*local_rank_id];
863       length =   halo->send_index[2*local_rank_id + 2]
864                - halo->send_index[2*local_rank_id];
865 
866 #     pragma omp parallel for if(length > CS_THR_MIN)
867       for (i = 0; i < length; i++)
868         _coarse_row[i] = coarse_send[start + i];
869 
870     }
871 
872   }
873 }
874 
875 /*----------------------------------------------------------------------------
876  * Build a halo for a coarse grid from a fine grid and its halo.
877  *
878  * The coarse grid must previously have been initialized with _coarse_init()
879  * and its coarsening indicator determined for the local cells.
880  *
881  * parameters:
882  *   f <-- Pointer to fine (parent) grid structure
883  *   c <-> Pointer to coarse grid structure
884  *----------------------------------------------------------------------------*/
885 
886 static void
_coarsen_halo(const cs_grid_t * f,cs_grid_t * c)887 _coarsen_halo(const cs_grid_t   *f,
888               cs_grid_t         *c)
889 {
890   int domain_id, tr_id, section_id;
891   cs_lnum_t ii, jj;
892   cs_lnum_t start_id, end_id, sub_count;
893 
894   cs_lnum_t *start_end_id = NULL;
895   cs_lnum_t *sub_num = NULL;
896   cs_lnum_t  *coarse_send = NULL;
897 
898   cs_lnum_t *restrict coarse_row = c->coarse_row;
899 
900   cs_halo_t *c_halo = NULL;
901   const cs_halo_t *f_halo = f->halo;
902 
903   const cs_lnum_t c_n_rows = c->n_rows;
904 
905   const int stride = f_halo->n_c_domains*4;
906   const int n_sections = f_halo->n_transforms + 1;
907   const int n_f_send = f_halo->n_send_elts[1]; /* Size of full list */
908 
909   c_halo = cs_halo_create_from_ref(f_halo);
910 
911   c->halo = c_halo;
912   c->_halo = c_halo;
913 
914   /* Initialize coarse halo counters */
915 
916   c_halo->n_local_elts = c_n_rows;
917   c_halo->n_send_elts[0] = 0;
918   c_halo->n_send_elts[1] = 0;
919 
920 # pragma omp parallel for if(f->n_cols_ext > CS_THR_MIN)
921   for (ii = f->n_rows; ii < f->n_cols_ext; ii++)
922     coarse_row[ii] = -1;
923 
924   /* Allocate and initialize counters */
925 
926   BFT_MALLOC(start_end_id, n_sections*2, cs_lnum_t);
927   BFT_MALLOC(sub_num, c_n_rows + 1, cs_lnum_t);
928   BFT_MALLOC(coarse_send, n_f_send, cs_lnum_t);
929 
930   /* Sub_num values shifted by 1, so 0 can be used for handling
931      of removed (penalized) rows) */
932 
933   sub_num[0] = -2;
934 # pragma omp parallel for if(c_n_rows > CS_THR_MIN)
935   for (ii = 1; ii <= c_n_rows; ii++)
936     sub_num[ii] = -1;
937 
938 # pragma omp parallel for if(n_f_send > CS_THR_MIN)
939   for (ii = 0; ii < n_f_send; ii++)
940     coarse_send[ii] = -1;
941 
942   /* Counting and marking pass */
943   /*---------------------------*/
944 
945   /* Proceed halo section by halo section */
946 
947   for (domain_id = 0; domain_id < f_halo->n_c_domains; domain_id++) {
948 
949     /* Handle purely parallel cells */
950 
951     start_end_id[0] = f_halo->send_index[domain_id*2];
952 
953     if (f_halo->n_transforms == 0)
954       start_end_id[1] = f_halo->send_index[(domain_id+1)*2];
955     else
956       start_end_id[1] = f_halo->send_perio_lst[4*domain_id];
957 
958     /* Now handle periodic cells transformation by transformation */
959 
960     for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
961       start_end_id[tr_id*2 + 2]
962         = f_halo->send_perio_lst[stride*tr_id + 4*domain_id];
963       start_end_id[tr_id*2 + 3]
964         =   start_end_id[tr_id*2 + 2]
965           + f_halo->send_perio_lst[stride*tr_id + 4*domain_id + 1];
966     }
967 
968     /* We can now loop on sections independently of the transform */
969 
970     for (section_id = 0; section_id < n_sections; section_id++) {
971 
972       start_id = start_end_id[section_id*2];
973       end_id = start_end_id[section_id*2 + 1];
974 
975       sub_count = 0;
976 
977       if (section_id > 0)
978         c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id]
979           = c_halo->n_send_elts[0];
980 
981       /* Build halo renumbering */
982 
983       for (ii = start_id; ii < end_id; ii++) {
984         jj = coarse_row[f_halo->send_list[ii]] + 1;
985         if (sub_num[jj] == -1) {
986           sub_num[jj] = sub_count;
987           sub_count += 1;
988         }
989         coarse_send[ii] = sub_num[jj];
990       }
991 
992       /* Update send_list indexes and counts */
993 
994       c_halo->n_send_elts[0] += sub_count;
995 
996       if (section_id > 0) {
997         c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id + 1]
998           = sub_count;
999         c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id + 2]
1000           = c_halo->n_send_elts[0];
1001         c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id + 3]
1002           = 0;
1003       }
1004 
1005       /* Reset sub_num for next section or domain */
1006 
1007       for (ii = start_id; ii < end_id; ii++)
1008         sub_num[coarse_row[f_halo->send_list[ii]] + 1] = -1;
1009       sub_num[0] = -2;
1010 
1011     }
1012 
1013     /* Update send index (initialized at halo creation) */
1014 
1015     c_halo->send_index[domain_id*2 + 1] = c_halo->n_send_elts[0];
1016     c_halo->send_index[domain_id*2 + 2] = c_halo->n_send_elts[0];
1017 
1018   }
1019 
1020   /* Exchange and update coarsening list halo */
1021   /*------------------------------------------*/
1022 
1023   _exchange_halo_coarsening(f_halo, coarse_send, coarse_row);
1024 
1025   BFT_FREE(coarse_send);
1026 
1027   /* Proceed halo section by halo section */
1028 
1029   c_halo->n_elts[0] = 0;
1030   c_halo->n_elts[1] = 0;
1031   c_halo->index[0] = 0;
1032 
1033   for (domain_id = 0; domain_id < f_halo->n_c_domains; domain_id++) {
1034 
1035     /* Handle purely parallel cells */
1036 
1037     start_end_id[0] = f_halo->index[domain_id*2];
1038 
1039     if (f_halo->n_transforms == 0)
1040       start_end_id[1] = f_halo->index[(domain_id+1)*2];
1041     else
1042       start_end_id[1] = f_halo->perio_lst[4*domain_id];
1043 
1044     /* Now handle periodic cells transformation by transformation */
1045 
1046     for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
1047       start_end_id[tr_id*2 + 2]
1048         = f_halo->perio_lst[stride*tr_id + 4*domain_id];
1049       start_end_id[tr_id*2 + 3]
1050         =   start_end_id[tr_id*2 + 2]
1051           + f_halo->perio_lst[stride*tr_id + 4*domain_id + 1];
1052     }
1053 
1054     /* We can now loop on sections independently of the transform */
1055 
1056     for (section_id = 0; section_id < n_sections; section_id++) {
1057 
1058       start_id = f_halo->n_local_elts + start_end_id[section_id*2];
1059       end_id = f_halo->n_local_elts + start_end_id[section_id*2 + 1];
1060 
1061       sub_count = 0;
1062 
1063       /* Build halo renumbering */
1064 
1065       for (ii = start_id, jj = -1; ii < end_id; ii++) {
1066         if (coarse_row[ii] > -1) {
1067           if (coarse_row[ii] > jj)
1068             jj += 1;
1069           coarse_row[ii] += c_n_rows + c_halo->n_elts[0];
1070         }
1071       }
1072 
1073       if (section_id > 0) {
1074         c_halo->perio_lst[stride*(section_id-1) + 4*domain_id]
1075           = c_halo->n_elts[0];
1076         c_halo->perio_lst[stride*(section_id-1) + 4*domain_id + 1]
1077           = jj + 1;
1078       }
1079 
1080       c_halo->n_elts[0] += jj + 1;
1081 
1082     }
1083 
1084     for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
1085       c_halo->perio_lst[stride*tr_id + 4*domain_id + 2]
1086         = c_halo->n_elts[0];
1087       c_halo->perio_lst[stride*tr_id + 4*domain_id + 3]
1088         = 0;
1089     }
1090 
1091     /* Update index */
1092 
1093     c_halo->n_elts[1] = c_halo->n_elts[0];
1094 
1095     c_halo->index[domain_id*2 + 1] = c_halo->n_elts[0];
1096     c_halo->index[domain_id*2 + 2] = c_halo->n_elts[0];
1097   }
1098 
1099   /* Pass to build send list */
1100   /*-------------------------*/
1101 
1102   CS_MALLOC_HD(c_halo->send_list, c_halo->n_send_elts[0], cs_lnum_t,
1103                CS_ALLOC_HOST);
1104 
1105   c_halo->n_send_elts[0] = 0;
1106 
1107   /* Proceed halo section by halo section */
1108 
1109   for (domain_id = 0; domain_id < f_halo->n_c_domains; domain_id++) {
1110 
1111     /* Handle purely parallel cells */
1112 
1113     start_end_id[0] = f_halo->send_index[domain_id*2];
1114 
1115     if (f_halo->n_transforms == 0)
1116       start_end_id[1] = f_halo->send_index[(domain_id+1)*2];
1117     else
1118       start_end_id[1] = f_halo->send_perio_lst[4*domain_id];
1119 
1120     /* Now handle periodic cells transformation by transformation */
1121 
1122     for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
1123       start_end_id[tr_id*2 + 2]
1124         = f_halo->send_perio_lst[stride*tr_id + 4*domain_id];
1125       start_end_id[tr_id*2 + 3]
1126         =   start_end_id[tr_id*2 + 2]
1127           + f_halo->send_perio_lst[stride*tr_id + 4*domain_id + 1];
1128     }
1129 
1130     /* We can now loop on sections independently of the transform */
1131 
1132     for (section_id = 0; section_id < n_sections; section_id++) {
1133 
1134       start_id = start_end_id[section_id*2];
1135       end_id = start_end_id[section_id*2 + 1];
1136 
1137       sub_count = 0;
1138 
1139       if (section_id > 0)
1140         c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id]
1141           = c_halo->n_send_elts[0];
1142 
1143       /* Build halo renumbering */
1144 
1145       for (ii = start_id; ii < end_id; ii++) {
1146         jj = coarse_row[f_halo->send_list[ii]] + 1;
1147         if (sub_num[jj] == -1) {
1148           sub_num[jj] = sub_count;
1149           c_halo->send_list[c_halo->n_send_elts[0] + sub_count] = jj - 1;
1150           sub_count += 1;
1151         }
1152       }
1153 
1154       c_halo->n_send_elts[0] += sub_count;
1155 
1156       /* Reset sub_num for next section or domain */
1157 
1158       for (ii = start_id; ii < end_id; ii++)
1159         sub_num[coarse_row[f_halo->send_list[ii]] + 1] = -1;
1160       sub_num[0] = -2;
1161 
1162     }
1163 
1164   }
1165 
1166   c_halo->n_send_elts[1] = c_halo->n_send_elts[0];
1167 
1168   /* Free memory */
1169 
1170   BFT_FREE(coarse_send);
1171   BFT_FREE(sub_num);
1172   BFT_FREE(start_end_id);
1173 }
1174 
1175 /*----------------------------------------------------------------------------
1176  * Build coarse grid from fine grid
1177  *
1178  * The coarse grid must previously have been initialized with _coarse_init()
1179  * and its coarsening indicator determined (at least for the local cells;
1180  * it is extended to halo cells here if necessary).
1181  *
1182  * - Periodic faces are not handled yet
1183  *
1184  * parameters:
1185  *   f <-- Pointer to fine (parent) grid structure
1186  *   c <-> Pointer to coarse grid structure
1187  *----------------------------------------------------------------------------*/
1188 
1189 static void
_coarsen(const cs_grid_t * f,cs_grid_t * c)1190 _coarsen(const cs_grid_t   *f,
1191          cs_grid_t         *c)
1192 {
1193   cs_lnum_t  c_n_rows = 0;
1194 
1195   const cs_lnum_t f_n_faces = f->n_faces;
1196   const cs_lnum_t f_n_rows = cs_matrix_get_n_rows(f->matrix);
1197   const cs_lnum_2_t *restrict f_face_cell = f->face_cell;
1198 
1199   /* Sanity check */
1200 
1201   if (f_face_cell != NULL) {
1202 #   pragma omp parallel for if(f_n_faces > CS_THR_MIN)
1203     for (cs_lnum_t face_id = 0; face_id < f_n_faces; face_id++) {
1204       cs_lnum_t ii = f_face_cell[face_id][0];
1205       cs_lnum_t jj = f_face_cell[face_id][1];
1206       if (ii == jj)
1207         bft_error(__FILE__, __LINE__, 0,
1208                   _("Connectivity error:\n"
1209                     "Face %d has same cell %d on both sides."),
1210                   (int)(face_id+1), (int)(ii+1));
1211     }
1212   }
1213 
1214   /* Compute number of coarse rows */
1215 
1216   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
1217     if (c->coarse_row[ii] >= c_n_rows)
1218       c_n_rows = c->coarse_row[ii] + 1;
1219   }
1220 
1221   c->n_rows = c_n_rows;
1222   c->n_g_rows = c_n_rows;
1223 
1224 #if defined(HAVE_MPI)
1225   if (cs_glob_n_ranks > 1) {
1226     if (f->comm != MPI_COMM_NULL) {
1227       cs_gnum_t _c_n_rows = c_n_rows;
1228       MPI_Allreduce(&_c_n_rows, &(c->n_g_rows), 1, CS_MPI_GNUM, MPI_SUM,
1229                     f->comm);
1230     }
1231   }
1232 #endif
1233 
1234   /* Prolong mesh coarsening indicator to halo rows and build
1235      coarse mesh halos if necessary */
1236 
1237   if (f->halo != NULL) {
1238     _coarsen_halo(f, c);
1239     c->n_cols_ext = c->n_rows + c->halo->n_elts[0];
1240   }
1241   else
1242     c->n_cols_ext = c_n_rows;
1243 
1244   c->n_elts_r[0] = c->n_rows;
1245   c->n_elts_r[1] = c->n_cols_ext;
1246 
1247   /* Build face coarsening and coarse grid face -> cells connectivity */
1248 
1249   if (  f->face_cell != NULL
1250       && (   c->relaxation > 0
1251           || cs_matrix_get_type(f->matrix) == CS_MATRIX_NATIVE)) {
1252     _coarsen_faces(f,
1253                    c->coarse_row,
1254                    c->n_rows,
1255                    &(c->n_faces),
1256                    &(c->coarse_face),
1257                    &(c->_face_cell));
1258 
1259     c->face_cell = (const cs_lnum_2_t  *)(c->_face_cell);
1260   }
1261 
1262 }
1263 
1264 #if defined(HAVE_MPI)
1265 
1266 /*----------------------------------------------------------------------------
1267  * Merge halo info after appending arrays.
1268  *
1269  * parameters:
1270  *   h                 <-> Pointer to halo structure
1271  *   new_src_cell_num  <-> new list of cells the senders should provide
1272  *----------------------------------------------------------------------------*/
1273 
1274 static void
_rebuild_halo_send_lists(cs_halo_t * h,cs_lnum_t new_src_cell_id[])1275 _rebuild_halo_send_lists(cs_halo_t  *h,
1276                          cs_lnum_t   new_src_cell_id[])
1277 {
1278   /* As halos are based on interior faces, every domain is both
1279      sender and receiver */
1280 
1281   cs_lnum_t start, length;
1282 
1283   int rank_id, tr_id;
1284   int n_sections = 1 + h->n_transforms;
1285   int request_count = 0;
1286   cs_lnum_t *send_buf = NULL, *recv_buf = NULL;
1287   MPI_Status *status = NULL;
1288   MPI_Request *request = NULL;
1289 
1290   BFT_MALLOC(status, h->n_c_domains*2, MPI_Status);
1291   BFT_MALLOC(request, h->n_c_domains*2, MPI_Request);
1292   BFT_MALLOC(send_buf, h->n_c_domains*n_sections, cs_lnum_t);
1293   BFT_MALLOC(recv_buf, h->n_c_domains*n_sections, cs_lnum_t);
1294 
1295   /* Exchange sizes */
1296 
1297   for (rank_id = 0; rank_id < h->n_c_domains; rank_id++)
1298     MPI_Irecv(recv_buf + rank_id*n_sections,
1299               n_sections,
1300               CS_MPI_LNUM,
1301               h->c_domain_rank[rank_id],
1302               h->c_domain_rank[rank_id],
1303               cs_glob_mpi_comm,
1304               &(request[request_count++]));
1305 
1306   /* Send data to distant ranks */
1307 
1308   for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1309     send_buf[rank_id*n_sections]
1310       = h->index[2*(rank_id+1)] - h->index[2*rank_id];
1311     for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1312       send_buf[rank_id*n_sections + tr_id + 1]
1313         = h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1];
1314     }
1315     MPI_Isend(send_buf + rank_id*n_sections,
1316               n_sections,
1317               CS_MPI_LNUM,
1318               h->c_domain_rank[rank_id],
1319               cs_glob_rank_id,
1320               cs_glob_mpi_comm,
1321               &(request[request_count++]));
1322   }
1323 
1324   /* Wait for all exchanges */
1325 
1326   MPI_Waitall(request_count, request, status);
1327   request_count = 0;
1328 
1329   /* Update sizes */
1330 
1331   CS_MALLOC_HD(h->send_index, h->n_c_domains*2 + 1, cs_lnum_t, CS_ALLOC_HOST);
1332   h->send_index[0] = 0;
1333   for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1334     h->send_index[rank_id*2 + 1]
1335       = h->send_index[rank_id*2] + recv_buf[rank_id*n_sections];
1336     h->send_index[rank_id*2 + 2] = h->send_index[rank_id*2 + 1];
1337   }
1338 
1339   /* Update send_perio_lst in case of transforms */
1340 
1341   if (h->n_transforms > 0) {
1342     BFT_MALLOC(h->send_perio_lst, h->n_c_domains*h->n_transforms*4, cs_lnum_t);
1343     for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1344       cs_lnum_t n_cur_vals = recv_buf[rank_id*n_sections];
1345       for (tr_id = 0; tr_id < h->n_transforms; tr_id++)
1346         n_cur_vals -= recv_buf[rank_id*n_sections + 1 + tr_id];
1347       n_cur_vals += h->send_index[rank_id*2];
1348       for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1349         cs_lnum_t n_tr_vals = recv_buf[rank_id*n_sections + 1 + tr_id];
1350         h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id] = n_cur_vals;
1351         h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1] = n_tr_vals;
1352         n_cur_vals += n_tr_vals;
1353         h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 2] = n_cur_vals;
1354         h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 3] = 0;
1355       }
1356     }
1357   }
1358 
1359   BFT_FREE(send_buf);
1360   BFT_FREE(recv_buf);
1361 
1362   h->n_send_elts[0] = h->send_index[h->n_c_domains*2];
1363   h->n_send_elts[1] = h->n_send_elts[0];
1364 
1365   CS_MALLOC_HD(h->send_list, h->n_send_elts[0], cs_lnum_t, CS_ALLOC_HOST);
1366 
1367   /* Receive data from distant ranks */
1368 
1369   for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1370     start = h->send_index[2*rank_id];
1371     length = h->send_index[2*rank_id + 1] - h->send_index[2*rank_id];
1372     MPI_Irecv(h->send_list + start,
1373               length,
1374               CS_MPI_LNUM,
1375               h->c_domain_rank[rank_id],
1376               h->c_domain_rank[rank_id],
1377               cs_glob_mpi_comm,
1378               &(request[request_count++]));
1379   }
1380 
1381   /* Send data to distant ranks */
1382 
1383   for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1384     start = h->index[2*rank_id];
1385     length = h->index[2*rank_id + 1] - h->index[2*rank_id];
1386     MPI_Isend(new_src_cell_id + start,
1387               length,
1388               CS_MPI_LNUM,
1389               h->c_domain_rank[rank_id],
1390               cs_glob_rank_id,
1391               cs_glob_mpi_comm,
1392               &(request[request_count++]));
1393   }
1394 
1395   /* Wait for all exchanges */
1396 
1397   MPI_Waitall(request_count, request, status);
1398 
1399   BFT_FREE(request);
1400   BFT_FREE(status);
1401 }
1402 
1403 /*----------------------------------------------------------------------------
1404  * Empty a halo that is only useful for global synchronization
1405  *
1406  * parameters:
1407  *   h <-> pointer to halo structure being emptyied
1408  *----------------------------------------------------------------------------*/
1409 
1410 static void
_empty_halo(cs_halo_t * h)1411 _empty_halo(cs_halo_t  *h)
1412 {
1413   if (h == NULL)
1414     return;
1415 
1416   h->n_c_domains = 0;
1417   BFT_FREE(h->c_domain_rank);
1418 
1419   h->n_send_elts[0] = 0;
1420   h->n_send_elts[1] = 0;
1421   h->n_elts[0] = 0;
1422   h->n_elts[1] = 0;
1423 
1424   CS_FREE_HD(h->send_list);
1425   CS_FREE_HD(h->send_index);
1426   BFT_FREE(h->send_perio_lst);
1427   BFT_FREE(h->index);
1428   BFT_FREE(h->perio_lst);
1429 }
1430 
1431 /*----------------------------------------------------------------------------
1432  * Merge halo info after appending arrays.
1433  *
1434  * parameters:
1435  *   h                 <-> pointer to halo structure
1436  *   loc_rank_id       <-- local rank id
1437  *   n_new_cells       <-- number of new local cells
1438  *   new_src_cell_id   <-> in: new halo sender cell id matching halo cell;
1439  *                         out: new list of cells the senders should provide
1440  *                              (same numbers, merged and possibly reordered)
1441  *   new_halo_cell_id --> new cell id for each halo cell
1442  *                         (< n_new_cells for cells that have become local,
1443  *                         >= n_new_cells for cells still in halo)
1444  *----------------------------------------------------------------------------*/
1445 
1446 static void
_merge_halo_data(cs_halo_t * h,int loc_rank_id,cs_lnum_t n_new_cells,cs_lnum_t new_src_cell_id[],cs_lnum_t new_halo_cell_id[])1447 _merge_halo_data(cs_halo_t   *h,
1448                  int          loc_rank_id,
1449                  cs_lnum_t    n_new_cells,
1450                  cs_lnum_t    new_src_cell_id[],
1451                  cs_lnum_t    new_halo_cell_id[])
1452 {
1453   int  rank_id, prev_rank_id, tr_id, prev_section_id;
1454   cs_lnum_t  ii, ii_0, ii_1, cur_id, section_id, src_id, prev_src_id;
1455 
1456   int   stride = (h->n_transforms > 0) ? 3 : 2;
1457   int   n_c_domains_ini = h->n_c_domains;
1458 
1459   cs_lnum_t   n_elts_ini = h->n_elts[0];
1460   cs_lnum_t  *order = NULL, *section_idx = NULL;
1461   cs_gnum_t  *tmp_num = NULL;
1462 
1463   const int  n_sections = h->n_transforms + 1;
1464 
1465   if (h->n_elts[0] < 1) {
1466     _empty_halo(h);
1467     return;
1468   }
1469 
1470   /* Order list by rank, transform, and new element number */
1471 
1472   BFT_MALLOC(tmp_num, n_elts_ini*stride, cs_gnum_t);
1473 
1474   for (rank_id = 0; rank_id < n_c_domains_ini; rank_id++) {
1475     for (ii = h->index[rank_id*2];
1476          ii < h->index[(rank_id+1)*2];
1477          ii++)
1478       tmp_num[ii*stride] = h->c_domain_rank[rank_id];
1479   }
1480 
1481   if (stride == 2) {
1482     for (ii = 0; ii < n_elts_ini; ii++)
1483       tmp_num[ii*2 + 1] = new_src_cell_id[ii];
1484   }
1485   else { /* if (stride == 3) */
1486 
1487     for (ii = 0; ii < n_elts_ini; ii++) {
1488       tmp_num[ii*3 + 1] = 0;
1489       tmp_num[ii*3 + 2] = new_src_cell_id[ii];
1490     }
1491 
1492     for (rank_id = 0; rank_id < n_c_domains_ini; rank_id++) {
1493       for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1494         ii_0 = h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id];
1495         ii_1 = ii_0 + h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1];
1496         for (ii = ii_0; ii < ii_1; ii++)
1497           tmp_num[ii*3 + 1] = tr_id + 1;
1498       }
1499     }
1500   }
1501 
1502   order = cs_order_gnum_s(NULL, tmp_num, stride, n_elts_ini);
1503 
1504   /* Rebuild lists and build renumbering */
1505 
1506   cur_id = order[0];
1507   h->n_c_domains = 0;
1508   h->index[0] = 0;
1509   h->n_elts[0] = 0;
1510   h->n_elts[1] = 0;
1511 
1512   prev_rank_id = -1;
1513   prev_src_id = 0;
1514 
1515   if (stride == 2) {
1516 
1517     for (ii = 0; ii < n_elts_ini; ii++) {
1518 
1519       bool is_same = true;
1520       cur_id = order[ii];
1521 
1522       rank_id = tmp_num[cur_id*2];
1523       src_id = tmp_num[cur_id*2 + 1];
1524 
1525       if (rank_id != loc_rank_id) {
1526 
1527         if (rank_id != prev_rank_id) {
1528           h->c_domain_rank[h->n_c_domains] = rank_id;
1529           h->index[h->n_c_domains*2] = h->n_elts[0];
1530           h->n_c_domains += 1;
1531           is_same = false;
1532         }
1533         if (src_id != prev_src_id)
1534           is_same = false;
1535 
1536         if (is_same == false) {
1537           new_src_cell_id[h->n_elts[0]] = src_id;
1538           h->n_elts[0] += 1;
1539         }
1540 
1541         new_halo_cell_id[cur_id] = n_new_cells + h->n_elts[0] - 1;
1542 
1543         prev_rank_id = rank_id;
1544         prev_src_id = src_id;
1545 
1546       }
1547       else { /* if (rank_id == loc_rank_id) */
1548         new_halo_cell_id[cur_id] = tmp_num[cur_id*2 + 1];
1549         assert(new_halo_cell_id[cur_id] < n_new_cells);
1550       }
1551     }
1552 
1553   }
1554   else { /* if (stride == 3) */
1555 
1556     int   rank_idx = -1;
1557     cs_lnum_t section_idx_size = 0;
1558 
1559     prev_section_id = -1;
1560 
1561     /* Index will be initialized as count */
1562 
1563     for (ii = 0; ii < n_elts_ini; ii++) {
1564 
1565       bool is_same = true;
1566       cur_id = order[ii];
1567 
1568       rank_id = tmp_num[cur_id*3];
1569       section_id = tmp_num[cur_id*3 + 1];
1570       src_id = tmp_num[cur_id*3 + 2];
1571 
1572       if (rank_id != loc_rank_id || tmp_num[cur_id*3 + 1] != 0) {
1573 
1574         if (rank_id != prev_rank_id) {
1575           h->c_domain_rank[h->n_c_domains] = rank_id;
1576           h->index[h->n_c_domains*2] = h->n_elts[0];
1577           h->n_c_domains += 1;
1578           is_same = false;
1579           rank_idx += 1;
1580         }
1581         if (section_id != prev_section_id)
1582           is_same = false;
1583         if (src_id != prev_src_id)
1584           is_same = false;
1585 
1586         if (is_same == false) {
1587           new_src_cell_id[h->n_elts[0]] = src_id;
1588           h->n_elts[0] += 1;
1589           while (rank_idx*n_sections + section_id + 1 >= section_idx_size) {
1590             cs_lnum_t section_idx_size_prv = section_idx_size;
1591             section_idx_size
1592               = (section_idx_size_prv > 0) ? section_idx_size_prv*2 : 16;
1593             BFT_REALLOC(section_idx, section_idx_size, cs_lnum_t);
1594             for (cs_lnum_t sid = section_idx_size_prv;
1595                  sid < section_idx_size;
1596                  sid++)
1597               section_idx[sid] = 0;
1598           };
1599           section_idx[rank_idx*n_sections + section_id + 1] += 1;
1600         }
1601 
1602         new_halo_cell_id[cur_id] = n_new_cells + h->n_elts[0] - 1;
1603 
1604         prev_rank_id = rank_id;
1605         prev_section_id = section_id;
1606         prev_src_id = src_id;
1607 
1608       }
1609       else { /* if (rank_id == loc_rank_id && tmp_num[cur_id*3 + 1] == 0) */
1610         new_halo_cell_id[cur_id] = tmp_num[cur_id*3 + 2];
1611         assert(new_halo_cell_id[cur_id] < n_new_cells);
1612       }
1613 
1614     }
1615 
1616     /* Transform count to index */
1617 
1618     section_idx_size = n_sections * h->n_c_domains + 1;
1619     for (cs_lnum_t sid = 1; sid < section_idx_size; sid++)
1620       section_idx[sid] += section_idx[sid - 1];
1621   }
1622 
1623   if (h->n_transforms > 0) {
1624 
1625     for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1626       for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1627         section_id = rank_id*n_sections + tr_id + 1;
1628         h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id]
1629           = section_idx[section_id];
1630         h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1]
1631           = section_idx[section_id + 1] - section_idx[section_id];
1632         h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 2]
1633           = section_idx[section_id + 1];
1634         h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 3]
1635           = 0;
1636       }
1637     }
1638 
1639     BFT_FREE(section_idx);
1640 
1641   }
1642 
1643   /* Free and resize memory */
1644 
1645   BFT_FREE(order);
1646   BFT_FREE(tmp_num);
1647   BFT_REALLOC(h->c_domain_rank, h->n_c_domains, int);
1648   BFT_REALLOC(h->index, h->n_c_domains*2+1, cs_lnum_t);
1649   if (h->n_transforms > 0)
1650     BFT_REALLOC(h->perio_lst,
1651                 h->n_c_domains * h->n_transforms * 4,
1652                 cs_lnum_t);
1653 
1654   h->n_elts[1] = h->n_elts[0];
1655   h->index[h->n_c_domains*2] = h->n_elts[0];
1656   for (ii = 0; ii < h->n_c_domains; ii++)
1657     h->index[ii*2+1] = h->index[ii*2+2];
1658 }
1659 
1660 /*----------------------------------------------------------------------------
1661  * Append halo info for grid grouping and merging.
1662  *
1663  * parameters:
1664  *   g           <-> pointer to grid structure being merged
1665  *   new_cell_id <-> new cell ids for local cells
1666  *                   in: defined for local cells
1667  *                   out: updated also for halo cells
1668  *----------------------------------------------------------------------------*/
1669 
1670 static void
_append_halos(cs_grid_t * g,cs_lnum_t * new_cell_id)1671 _append_halos(cs_grid_t   *g,
1672               cs_lnum_t   *new_cell_id)
1673 {
1674   cs_lnum_t ii, jj;
1675   int rank_id;
1676   int counts[3];
1677 
1678   int *recv_count = NULL;
1679   cs_lnum_t *new_src_cell_id = NULL, *new_halo_cell_id = NULL;
1680 
1681   cs_halo_t *h = g->_halo;
1682 
1683   MPI_Status status;
1684   MPI_Comm  comm = cs_glob_mpi_comm;
1685 
1686   const int n_transforms = h->n_transforms;
1687   static const int tag = 'a'+'p'+'p'+'e'+'n'+'d'+'_'+'h';
1688 
1689   /* Remove send indexes, which will be rebuilt */
1690 
1691   h->n_send_elts[0] = 0;
1692   h->n_send_elts[1] = 0;
1693   CS_FREE_HD(h->send_list);
1694   CS_FREE_HD(h->send_index);
1695   BFT_FREE(h->send_perio_lst);
1696 
1697   if (g->merge_sub_size == 0) {
1698     _empty_halo(h);
1699     return;
1700   }
1701 
1702   /* Adjust numbering */
1703 
1704   for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1705     int init_rank_id = h->c_domain_rank[rank_id];
1706     h->c_domain_rank[rank_id]
1707       = init_rank_id - (init_rank_id % g->next_merge_stride);
1708   }
1709 
1710   counts[0] = h->n_c_domains;
1711   counts[1] = h->n_local_elts;
1712   counts[2] = h->n_elts[0];
1713 
1714   /* Exchange counters needed for concatenation */
1715 
1716   if (g->merge_sub_rank == 0) {
1717     BFT_MALLOC(recv_count, g->merge_sub_size*3, int);
1718     for (ii = 0; ii < 3; ii++)
1719       recv_count[ii] = counts[ii];
1720     for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1721       int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1722       MPI_Recv(recv_count + 3*rank_id, 3, MPI_INT, dist_rank,
1723                tag, comm, &status);
1724       for (ii = 0; ii < 3; ii++)
1725         counts[ii] += recv_count[rank_id*3 + ii];
1726     }
1727   }
1728   else
1729     MPI_Send(counts, 3, MPI_INT, g->merge_sub_root, tag, comm);
1730 
1731   /* In case of periodic transforms, transpose perio list
1732      so as to have blocs of fixed n_transforms size, easier
1733      to work with for append + merge operations */
1734 
1735   if (n_transforms > 0) {
1736     cs_lnum_t *perio_list_tr;
1737     BFT_MALLOC(perio_list_tr, h->n_c_domains*n_transforms*4, cs_lnum_t);
1738     for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1739       for (int tr_id = 0; tr_id < n_transforms; tr_id++) {
1740         for (int k = 0; k < 4; k++)
1741           perio_list_tr[n_transforms*4*rank_id + 4*tr_id + k]
1742             = h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + k];
1743       }
1744     }
1745     memcpy(h->perio_lst,
1746            perio_list_tr,
1747            h->n_c_domains*n_transforms*4*sizeof(cs_lnum_t));
1748     BFT_FREE(perio_list_tr);
1749   }
1750 
1751   /* Reallocate arrays for receiving rank and append data */
1752 
1753   if (g->merge_sub_rank == 0) {
1754 
1755     BFT_MALLOC(new_src_cell_id, counts[2], cs_lnum_t);
1756     for (ii = g->n_rows, jj = 0; ii < g->n_cols_ext; ii++, jj++)
1757       new_src_cell_id[jj] = new_cell_id[ii];
1758 
1759     BFT_REALLOC(h->c_domain_rank, counts[0], int);
1760     BFT_REALLOC(h->index, counts[0]*2 + 1, cs_lnum_t);
1761     BFT_REALLOC(h->perio_lst, counts[0]*n_transforms*4, cs_lnum_t);
1762 
1763     for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1764 
1765       int n_c_domains_r = recv_count[rank_id*3 + 0];
1766       cs_lnum_t n_recv = recv_count[rank_id*3 + 2];
1767 
1768       cs_lnum_t index_shift = h->index[2*h->n_c_domains];
1769       int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1770 
1771       MPI_Recv(h->c_domain_rank + h->n_c_domains, n_c_domains_r,
1772                MPI_INT, dist_rank, tag, comm, &status);
1773       MPI_Recv(new_src_cell_id + h->n_elts[0], n_recv,
1774                CS_MPI_LNUM, dist_rank, tag, comm, &status);
1775       MPI_Recv(h->index + h->n_c_domains*2, n_c_domains_r*2+1,
1776                CS_MPI_LNUM, dist_rank, tag, comm, &status);
1777 
1778       for (ii = 0, jj = h->n_c_domains*2;
1779            ii < n_c_domains_r*2+1;
1780            ii++, jj++)
1781         h->index[jj] += index_shift;
1782 
1783       if (n_transforms > 0) {
1784         MPI_Recv(h->perio_lst + h->n_c_domains*n_transforms*4,
1785                  n_c_domains_r*n_transforms*4,
1786                  CS_MPI_LNUM, dist_rank, tag, comm, &status);
1787         for (ii = 0, jj = h->n_c_domains*n_transforms*4;
1788              ii < n_c_domains_r*n_transforms*4;
1789              ii+=2, jj+=2)
1790           h->perio_lst[jj] += index_shift;
1791       }
1792 
1793       /* Update halo sizes */
1794 
1795       h->n_local_elts += recv_count[rank_id*3 + 1];
1796       h->n_c_domains += n_c_domains_r;
1797       h->n_elts[0] += n_recv;
1798       h->n_elts[1] = h->n_elts[0];
1799     }
1800 
1801   }
1802   else if (g->merge_sub_size > 1) {
1803 
1804     BFT_MALLOC(new_src_cell_id, h->n_elts[0], cs_lnum_t);
1805     for (ii = g->n_rows, jj = 0; ii < g->n_cols_ext; ii++, jj++)
1806       new_src_cell_id[jj] = new_cell_id[ii];
1807 
1808     MPI_Send(h->c_domain_rank, h->n_c_domains, MPI_INT,
1809              g->merge_sub_root, tag, comm);
1810     MPI_Send(new_src_cell_id, h->n_elts[0], CS_MPI_LNUM,
1811              g->merge_sub_root, tag, comm);
1812     MPI_Send(h->index, h->n_c_domains*2+1, CS_MPI_LNUM,
1813              g->merge_sub_root, tag, comm);
1814 
1815     if (n_transforms > 0)
1816       MPI_Send(h->perio_lst, h->n_c_domains*n_transforms*4,
1817                CS_MPI_LNUM, g->merge_sub_root, tag, comm);
1818 
1819     _empty_halo(h);
1820   }
1821 
1822   /* Cleanup halo and set pointer for coarsening (sub_rooot) ranks*/
1823 
1824   if (h != NULL) {
1825 
1826     /* In case of periodic transforms, transpose perio list back to its
1827        standard order */
1828 
1829     if (n_transforms > 0) {
1830       cs_lnum_t *perio_list_tr;
1831       BFT_MALLOC(perio_list_tr, h->n_c_domains*n_transforms*4, cs_lnum_t);
1832       for (int tr_id = 0; tr_id < n_transforms; tr_id++) {
1833         for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1834           for (int k = 0; k < 4; k++)
1835             perio_list_tr[h->n_c_domains*4*tr_id + 4*rank_id + k]
1836               = h->perio_lst[n_transforms*4*rank_id + 4*tr_id + k];
1837         }
1838       }
1839       memcpy(h->perio_lst,
1840              perio_list_tr,
1841              h->n_c_domains*n_transforms*4*sizeof(cs_lnum_t));
1842       BFT_FREE(perio_list_tr);
1843     }
1844 
1845     /* Now merge data */
1846 
1847     BFT_MALLOC(new_halo_cell_id, h->n_elts[0], cs_lnum_t);
1848 
1849     _merge_halo_data(h,
1850                      cs_glob_rank_id,
1851                      counts[1],
1852                      new_src_cell_id,
1853                      new_halo_cell_id);
1854 
1855     _rebuild_halo_send_lists(h, new_src_cell_id);
1856 
1857   }
1858 
1859   if (new_src_cell_id != NULL)
1860     BFT_FREE(new_src_cell_id);
1861 
1862   g->halo = h;
1863   g->_halo = h;
1864 
1865   /* Finally, update halo section of cell renumbering array */
1866 
1867   if (g->merge_sub_rank == 0) {
1868 
1869     cs_lnum_t n_send = recv_count[2];
1870     cs_lnum_t send_shift = n_send;
1871 
1872     for (ii = 0; ii < n_send; ii++)
1873       new_cell_id[g->n_rows + ii] = new_halo_cell_id[ii];
1874 
1875     for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1876       int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1877       n_send = recv_count[rank_id*3 + 2];
1878       MPI_Send(new_halo_cell_id + send_shift, n_send, CS_MPI_LNUM,
1879                dist_rank, tag, comm);
1880       send_shift += n_send;
1881     }
1882 
1883     BFT_FREE(recv_count);
1884   }
1885   else if (g->merge_sub_size > 1)
1886     MPI_Recv(new_cell_id + g->n_rows, g->n_cols_ext - g->n_rows,
1887              CS_MPI_LNUM, g->merge_sub_root, tag, comm, &status);
1888 
1889   BFT_FREE(new_halo_cell_id);
1890 }
1891 
1892 /*----------------------------------------------------------------------------
1893  * Append cell arrays and counts for grid grouping and merging.
1894  *
1895  * parameters:
1896  *   g <-> pointer to grid structure being merged
1897  *----------------------------------------------------------------------------*/
1898 
1899 static void
_append_cell_data(cs_grid_t * g)1900 _append_cell_data(cs_grid_t  *g)
1901 {
1902   int rank_id;
1903 
1904   MPI_Status status;
1905   MPI_Comm  comm = cs_glob_mpi_comm;
1906 
1907   static const int tag = 'a'+'p'+'p'+'e'+'n'+'d'+'_'+'c';
1908 
1909   /* Reallocate arrays for receiving rank and append data */
1910 
1911   if (g->merge_sub_rank == 0) {
1912 
1913     g->n_rows = g->merge_cell_idx[g->merge_sub_size];
1914     g->n_cols_ext = g->n_rows + g->halo->n_elts[0];
1915 
1916     if (g->relaxation > 0) {
1917       BFT_REALLOC(g->_cell_cen, g->n_cols_ext * 3, cs_real_t);
1918       BFT_REALLOC(g->_cell_vol, g->n_cols_ext, cs_real_t);
1919     }
1920     BFT_REALLOC(g->_da, g->n_cols_ext*g->db_size[3], cs_real_t);
1921 
1922     for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1923 
1924       cs_lnum_t n_recv = (  g->merge_cell_idx[rank_id+1]
1925                            - g->merge_cell_idx[rank_id]);
1926       int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1927 
1928       if (g->relaxation > 0) {
1929         MPI_Recv(g->_cell_cen + g->merge_cell_idx[rank_id]*3, n_recv*3,
1930                  CS_MPI_REAL, dist_rank, tag, comm, &status);
1931 
1932         MPI_Recv(g->_cell_vol + g->merge_cell_idx[rank_id], n_recv,
1933                  CS_MPI_REAL, dist_rank, tag, comm, &status);
1934       }
1935 
1936       MPI_Recv(g->_da + g->merge_cell_idx[rank_id]*g->db_size[3],
1937                n_recv*g->db_size[3],
1938                CS_MPI_REAL, dist_rank, tag, comm, &status);
1939 
1940     }
1941 
1942   }
1943   else {
1944     if (g->relaxation > 0) {
1945       MPI_Send(g->_cell_cen, g->n_rows*3, CS_MPI_REAL, g->merge_sub_root,
1946                tag, comm);
1947       BFT_FREE(g->_cell_cen);
1948 
1949       MPI_Send(g->_cell_vol, g->n_rows, CS_MPI_REAL, g->merge_sub_root,
1950                tag, comm);
1951       BFT_FREE(g->_cell_vol);
1952     }
1953 
1954     MPI_Send(g->_da, g->n_rows*g->db_size[3], CS_MPI_REAL,
1955              g->merge_sub_root, tag, comm);
1956     BFT_FREE(g->_da);
1957 
1958     g->n_rows = 0;
1959     g->n_cols_ext = 0;
1960   }
1961 
1962   if (g->relaxation > 0) {
1963     g->cell_cen = g->_cell_cen;
1964     g->cell_vol = g->_cell_vol;
1965   }
1966 
1967   g->da = g->_da;
1968 }
1969 
1970 /*----------------------------------------------------------------------------
1971  * Synchronize cell array values for grid grouping and merging.
1972  *
1973  * parameters:
1974  *   g <-> pointer to grid structure being merged
1975  *----------------------------------------------------------------------------*/
1976 
1977 static void
_sync_merged_cell_data(cs_grid_t * g)1978 _sync_merged_cell_data(cs_grid_t  *g)
1979 {
1980   if (g->halo != NULL) {
1981 
1982     if (g->relaxation > 0) {
1983       cs_halo_sync_var_strided(g->halo, CS_HALO_STANDARD, g->_cell_cen, 3);
1984       if (g->halo->n_transforms > 0)
1985         cs_halo_perio_sync_coords(g->halo, CS_HALO_STANDARD, g->_cell_cen);
1986 
1987       cs_halo_sync_var(g->halo, CS_HALO_STANDARD, g->_cell_vol);
1988     }
1989 
1990     cs_halo_sync_var_strided(g->halo, CS_HALO_STANDARD,
1991                              g->_da, g->db_size[3]);
1992 
1993   }
1994 }
1995 
1996 /*----------------------------------------------------------------------------
1997  * Append face arrays and counts for grid grouping and merging.
1998  *
1999  * parameters:
2000  *   g           <-> pointer to grid structure being merged
2001  *   n_faces     <-- number of faces to append
2002  *   new_cel_num <-> new cell numbering for local cells
2003  *                   in: defined for local cells
2004  *                   out: updated also for halo cells
2005  *----------------------------------------------------------------------------*/
2006 
2007 static void
_append_face_data(cs_grid_t * g,cs_lnum_t n_faces,cs_lnum_t * face_list)2008 _append_face_data(cs_grid_t   *g,
2009                   cs_lnum_t    n_faces,
2010                   cs_lnum_t   *face_list)
2011 {
2012   int rank_id;
2013 
2014   cs_lnum_t *recv_count = NULL;
2015 
2016   MPI_Status status;
2017   MPI_Comm  comm = cs_glob_mpi_comm;
2018 
2019   static const int tag = 'a'+'p'+'p'+'e'+'n'+'d'+'_'+'f';
2020 
2021   /* Exchange counters needed for concatenation */
2022 
2023   if (g->merge_sub_rank == 0) {
2024     BFT_MALLOC(recv_count, g->merge_sub_size, cs_lnum_t);
2025     recv_count[0] = g->n_faces;
2026     for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2027       int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2028       MPI_Recv(recv_count + rank_id, 1, CS_MPI_LNUM, dist_rank,
2029                tag, comm, &status);
2030     }
2031   }
2032   else
2033     MPI_Send(&n_faces, 1, CS_MPI_LNUM, g->merge_sub_root, tag, comm);
2034 
2035   /* Reallocate arrays for receiving rank and append data */
2036 
2037   BFT_FREE(g->coarse_face);
2038 
2039   if (g->merge_sub_rank == 0) {
2040 
2041     cs_lnum_t n_faces_tot = 0;
2042 
2043     for (rank_id = 0; rank_id < g->merge_sub_size; rank_id++)
2044       n_faces_tot += recv_count[rank_id];
2045 
2046     BFT_REALLOC(g->_face_cell, n_faces_tot, cs_lnum_2_t);
2047 
2048     if (g->symmetric == true)
2049       BFT_REALLOC(g->_xa, n_faces_tot, cs_real_t);
2050     else
2051       BFT_REALLOC(g->_xa, n_faces_tot*2, cs_real_t);
2052 
2053     if (g->relaxation > 0) {
2054 
2055       BFT_REALLOC(g->_face_normal, n_faces_tot*3, cs_real_t);
2056 
2057       BFT_REALLOC(g->_xa0, n_faces_tot, cs_real_t);
2058       BFT_REALLOC(g->xa0ij, n_faces_tot*3, cs_real_t);
2059 
2060     }
2061 
2062     g->n_faces = recv_count[0];
2063 
2064     for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2065 
2066       cs_lnum_t n_recv = recv_count[rank_id];
2067       int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2068 
2069       MPI_Recv(g->_face_cell + g->n_faces, n_recv*2,
2070                CS_MPI_LNUM, dist_rank, tag, comm, &status);
2071 
2072       if (g->symmetric == true)
2073         MPI_Recv(g->_xa + g->n_faces, n_recv,
2074                  CS_MPI_REAL, dist_rank, tag, comm, &status);
2075       else
2076         MPI_Recv(g->_xa + g->n_faces*2, n_recv*2,
2077                  CS_MPI_REAL, dist_rank, tag, comm, &status);
2078 
2079       if (g->relaxation > 0) {
2080 
2081         MPI_Recv(g->_face_normal + g->n_faces*3, n_recv*3,
2082                  CS_MPI_REAL, dist_rank, tag, comm, &status);
2083 
2084         MPI_Recv(g->_xa0 + g->n_faces, n_recv,
2085                  CS_MPI_REAL, dist_rank, tag, comm, &status);
2086 
2087         MPI_Recv(g->xa0ij + g->n_faces*3, n_recv*3,
2088                  CS_MPI_REAL, dist_rank, tag, comm, &status);
2089 
2090       }
2091 
2092       g->n_faces += recv_count[rank_id];
2093     }
2094 
2095     BFT_FREE(recv_count);
2096 
2097   }
2098   else {
2099 
2100     cs_lnum_t face_id = 0;
2101 
2102     /* Filter face connectivity then send it */
2103 
2104     for (face_id = 0; face_id < n_faces; face_id++) {
2105       cs_lnum_t p_face_id = face_list[face_id];
2106       g->_face_cell[face_id][0] = g->_face_cell[p_face_id][0];
2107       g->_face_cell[face_id][1] = g->_face_cell[p_face_id][1];
2108       if (g->symmetric == true)
2109         g->_xa[face_id] = g->_xa[p_face_id];
2110       else {
2111         g->_xa[face_id*2] = g->_xa[p_face_id*2];
2112         g->_xa[face_id*2+1] = g->_xa[p_face_id*2+1];
2113       }
2114       if (g->relaxation > 0) {
2115         g->_face_normal[face_id*3] = g->_face_normal[p_face_id*3];
2116         g->_face_normal[face_id*3 + 1] = g->_face_normal[p_face_id*3 + 1];
2117         g->_face_normal[face_id*3 + 2] = g->_face_normal[p_face_id*3 + 2];
2118         g->_xa0[face_id] = g->_xa0[p_face_id];
2119         g->xa0ij[face_id*3] = g->xa0ij[p_face_id*3];
2120         g->xa0ij[face_id*3 + 1] = g->xa0ij[p_face_id*3 + 1];
2121         g->xa0ij[face_id*3 + 2] = g->xa0ij[p_face_id*3 + 2];
2122       }
2123     }
2124 
2125     MPI_Send(g->_face_cell, n_faces*2, CS_MPI_LNUM,
2126              g->merge_sub_root, tag, comm);
2127     BFT_FREE(g->_face_cell);
2128 
2129     if (g->symmetric == true)
2130       MPI_Send(g->_xa, n_faces, CS_MPI_REAL,
2131                g->merge_sub_root, tag, comm);
2132     else
2133       MPI_Send(g->_xa, n_faces*2, CS_MPI_REAL,
2134                g->merge_sub_root, tag, comm);
2135     BFT_FREE(g->_xa);
2136 
2137     if (g->relaxation > 0) {
2138       MPI_Send(g->_face_normal, n_faces*3, CS_MPI_REAL,
2139                g->merge_sub_root, tag, comm);
2140       BFT_FREE(g->_face_normal);
2141 
2142       MPI_Send(g->_xa0, n_faces, CS_MPI_REAL,
2143                g->merge_sub_root, tag, comm);
2144       BFT_FREE(g->_xa0);
2145 
2146       MPI_Send(g->xa0ij, n_faces*3, CS_MPI_REAL,
2147                g->merge_sub_root, tag, comm);
2148       BFT_FREE(g->xa0ij);
2149     }
2150 
2151     g->n_faces = 0;
2152   }
2153 
2154   g->face_cell = (const cs_lnum_2_t  *)(g->_face_cell);
2155   g->xa = g->_xa;
2156   if (g->relaxation > 0) {
2157     g->face_normal = g->_face_normal;
2158     g->xa0 = g->_xa0;
2159   }
2160 }
2161 
2162 /*----------------------------------------------------------------------------
2163  * Merge grids on several ranks to a grid on a single rank
2164  *
2165  * The coarse grid must previously have been initialized with _coarse_init()
2166  * and its coarsening indicator determined (at least for the local cells;
2167  * it is extended to halo cells here if necessary).
2168  *
2169  * - Periodic faces are not handled yet
2170  *
2171  * parameters:
2172  *   g            <-- Pointer to grid structure
2173  *   merge_stride <-- Associated merge stride
2174  *   verbosity    <-- verbosity level
2175  *----------------------------------------------------------------------------*/
2176 
2177 static void
_merge_grids(cs_grid_t * g,int merge_stride,int verbosity)2178 _merge_grids(cs_grid_t  *g,
2179              int         merge_stride,
2180              int         verbosity)
2181 {
2182   int i, rank_id, t_id;
2183   cs_lnum_t j, face_id;
2184   int base_rank = cs_glob_rank_id;
2185   cs_lnum_t cell_shift = 0;
2186   cs_lnum_t n_faces = 0;
2187   cs_lnum_t *new_cell_id = NULL, *face_list = NULL;
2188   bool  *halo_cell_flag = NULL;
2189   MPI_Comm comm = cs_glob_mpi_comm;
2190   MPI_Status status;
2191 
2192   static const int tag = 'm'+'e'+'r'+'g'+'e';
2193 
2194   if (merge_stride < 2)
2195     return;
2196 
2197   /* Determine rank in merged group */
2198 
2199   g->merge_sub_size = merge_stride;
2200   g->merge_stride = g->next_merge_stride;
2201   g->next_merge_stride *= merge_stride;
2202 
2203   if (base_rank % g->merge_stride != 0) {
2204     g->merge_sub_size = 0;
2205     g->merge_sub_root = -1;
2206     g->merge_sub_rank = -1;
2207   }
2208   else {
2209     int merge_rank = base_rank / g->merge_stride;
2210     int merge_size = (cs_glob_n_ranks/g->merge_stride);
2211     int merge_sub_root = (merge_rank / merge_stride) * merge_stride;
2212     if (cs_glob_n_ranks % g->merge_stride > 0)
2213       merge_size += 1;
2214     g->merge_sub_root = merge_sub_root * g->merge_stride;
2215     g->merge_sub_rank = merge_rank % merge_stride;
2216     if (merge_sub_root + g->merge_sub_size > merge_size)
2217       g->merge_sub_size = merge_size - merge_sub_root;
2218   }
2219 
2220   if (g->next_merge_stride > 1) {
2221     if (g->n_ranks % merge_stride)
2222       g->n_ranks = (g->n_ranks/merge_stride) + 1;
2223     else
2224       g->n_ranks = (g->n_ranks/merge_stride);
2225     g->comm = cs_base_get_rank_step_comm(g->next_merge_stride);
2226   }
2227 
2228   if (verbosity > 2) {
2229     bft_printf("\n"
2230                "    merging level %2d grid:\n"
2231                "      merge_stride = %d; new ranks = %d\n",
2232                g->level, g->merge_stride, g->n_ranks);
2233     if (verbosity > 3)
2234       bft_printf("      sub_root = %d; sub_rank = %d; sub_size = %d\n",
2235                  g->merge_sub_root, g->merge_sub_rank, g->merge_sub_size);
2236   }
2237 
2238   /* Compute number of coarse cells in new grid;
2239      local cell numbers will be shifted by cell_shift in the merged grid */
2240 
2241   if (g->merge_sub_size > 1) {
2242 
2243     if (g->merge_sub_rank == 0) {
2244       BFT_MALLOC(g->merge_cell_idx, g->merge_sub_size + 1, cs_lnum_t);
2245       g->merge_cell_idx[0] = 0; g->merge_cell_idx[1] = g->n_rows;
2246       for (i = 1; i < g->merge_sub_size; i++) {
2247         cs_lnum_t recv_val;
2248         int dist_rank = g->merge_sub_root + g->merge_stride*i;
2249         MPI_Recv(&recv_val, 1, CS_MPI_LNUM, dist_rank, tag, comm, &status);
2250         g->merge_cell_idx[i + 1] = recv_val + g->merge_cell_idx[i];
2251       }
2252     }
2253     else {
2254       cs_lnum_t send_val = g->n_rows;
2255       MPI_Send(&send_val, 1, CS_MPI_LNUM, g->merge_sub_root, tag, comm);
2256     }
2257 
2258     /* Now return computed info to grids that will be merged */
2259 
2260     if (g->merge_sub_rank == 0) {
2261       for (i = 1; i < g->merge_sub_size; i++) {
2262         cs_lnum_t send_val;
2263         send_val = g->merge_cell_idx[i];
2264         MPI_Send(&send_val, 1, CS_MPI_LNUM,
2265                  g->merge_sub_root + g->merge_stride*i, tag, comm);
2266       }
2267     }
2268     else
2269       MPI_Recv(&cell_shift, 1, CS_MPI_LNUM,
2270                g->merge_sub_root, tag, comm, &status);
2271   }
2272 
2273   /* Compute and exchange new cell numbers */
2274 
2275   BFT_MALLOC(new_cell_id, g->n_cols_ext, cs_lnum_t);
2276   for (j = 0; j < g->n_rows; j++)
2277     new_cell_id[j] = cell_shift + j;
2278   for (j = g->n_rows; j < g->n_cols_ext; j++)
2279     new_cell_id[j] = -1;
2280 
2281   cs_halo_sync_untyped(g->halo,
2282                        CS_HALO_STANDARD,
2283                        sizeof(cs_lnum_t),
2284                        new_cell_id);
2285 
2286   /* Now build face filter list (before halo is modified) */
2287 
2288   if (g->merge_sub_size > 1 && g->merge_sub_rank > 0 && g->n_faces > 0) {
2289 
2290     cs_lnum_t n_ghost_cells = g->n_cols_ext - g->n_rows;
2291 
2292     /* Mark which faces should be merged: to avoid duplicates, a face
2293        connected to a cell on a lower rank in the same merge set is
2294        discarded, as it has already been accounted for by that rank. */
2295 
2296     BFT_MALLOC(face_list, g->n_faces, cs_lnum_t);
2297     BFT_MALLOC(halo_cell_flag, n_ghost_cells, bool);
2298     for (j = 0; j < n_ghost_cells; j++)
2299       halo_cell_flag[j] = false;
2300 
2301     for (i = 0; i < g->halo->n_c_domains; i++) {
2302       rank_id = g->halo->c_domain_rank[i];
2303       if (rank_id >= g->merge_sub_root && rank_id < cs_glob_rank_id) {
2304         for (t_id = 0; t_id < g->halo->n_transforms; t_id++) {
2305           int t_shift = 4 * g->halo->n_c_domains * t_id;
2306           cs_lnum_t t_start = g->halo->perio_lst[t_shift + 4*i];
2307           cs_lnum_t t_end = t_start + g->halo->perio_lst[t_shift + 4*i + 1];
2308           for (j = t_start; j < t_end; j++)
2309             halo_cell_flag[j] = true;
2310         }
2311       }
2312       else {
2313         for (j = g->halo->index[2*i]; j < g->halo->index[2*i+2]; j++)
2314           halo_cell_flag[j] = true;
2315       }
2316     }
2317 
2318     for (face_id = 0; face_id < g->n_faces; face_id++) {
2319       bool use_face = true;
2320       cs_lnum_t ii = g->face_cell[face_id][0] - g->n_rows + 1;
2321       cs_lnum_t jj = g->face_cell[face_id][1] - g->n_rows + 1;
2322       if (ii > 0) {
2323         if (halo_cell_flag[ii - 1] == false)
2324           use_face = false;
2325       }
2326       else if (jj > 0) {
2327         if (halo_cell_flag[jj - 1] == false)
2328           use_face = false;
2329       }
2330       if (use_face == true)
2331         face_list[n_faces++] = face_id;
2332     }
2333 
2334     BFT_FREE(halo_cell_flag);
2335   }
2336 
2337   /* Append and merge halos */
2338 
2339   _append_halos(g, new_cell_id);
2340 
2341   /* Update face ->cells connectivity */
2342 
2343   for (face_id = 0; face_id < g->n_faces; face_id++) {
2344     cs_lnum_t ii = g->face_cell[face_id][0];
2345     cs_lnum_t jj = g->face_cell[face_id][1];
2346     assert(ii != jj && new_cell_id[ii] != new_cell_id[jj]);
2347     g->_face_cell[face_id][0] = new_cell_id[ii];
2348     g->_face_cell[face_id][1] = new_cell_id[jj];
2349   }
2350 
2351   BFT_FREE(new_cell_id);
2352 
2353   /* Merge cell and face data */
2354 
2355   if (g->merge_sub_size > 1) {
2356     _append_cell_data(g);
2357     _append_face_data(g, n_faces, face_list);
2358   }
2359   _sync_merged_cell_data(g);
2360 
2361   BFT_FREE(face_list);
2362 
2363   if (verbosity > 3)
2364     bft_printf("      merged to %ld (from %ld) rows\n\n",
2365                (long)g->n_rows, (long)g->n_elts_r[0]);
2366 }
2367 
2368 /*----------------------------------------------------------------------------
2369  * Scatter coarse cell integer data in case of merged grids
2370  *
2371  * parameters:
2372  *   g    <-- Grid structure
2373  *   num  <-> Variable defined on merged grid cells in,
2374  *            defined on scattered to grid cells out
2375  *----------------------------------------------------------------------------*/
2376 
2377 static void
_scatter_row_int(const cs_grid_t * g,int * num)2378 _scatter_row_int(const cs_grid_t  *g,
2379                  int              *num)
2380 {
2381   assert(g != NULL);
2382 
2383   /* If grid merging has taken place, scatter coarse data */
2384 
2385   if (g->merge_sub_size > 1) {
2386 
2387     MPI_Comm  comm = cs_glob_mpi_comm;
2388     static const int tag = 'p'+'r'+'o'+'l'+'o'+'n'+'g';
2389 
2390     /* Append data */
2391 
2392     if (g->merge_sub_rank == 0) {
2393       int rank_id;
2394       assert(cs_glob_rank_id == g->merge_sub_root);
2395       for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2396         cs_lnum_t n_send = (  g->merge_cell_idx[rank_id+1]
2397                             - g->merge_cell_idx[rank_id]);
2398         int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2399         MPI_Send(num + g->merge_cell_idx[rank_id], n_send, CS_MPI_LNUM,
2400                  dist_rank, tag, comm);
2401       }
2402     }
2403     else {
2404       MPI_Status status;
2405       MPI_Recv(num, g->n_elts_r[0], CS_MPI_LNUM,
2406                g->merge_sub_root, tag, comm, &status);
2407     }
2408   }
2409 
2410 }
2411 
2412 /*----------------------------------------------------------------------------
2413  * Scatter coarse cell integer data in case of merged grids
2414  *
2415  * parameters:
2416  *   g    <-- Grid structure
2417  *   num  <-> Variable defined on merged grid cells in,
2418  *            defined on scattered to grid cells out
2419  *----------------------------------------------------------------------------*/
2420 
2421 static void
_scatter_row_num(const cs_grid_t * g,cs_lnum_t * num)2422 _scatter_row_num(const cs_grid_t  *g,
2423                  cs_lnum_t        *num)
2424 {
2425   assert(g != NULL);
2426 
2427   /* If grid merging has taken place, scatter coarse data */
2428 
2429   if (g->merge_sub_size > 1) {
2430 
2431     MPI_Comm  comm = cs_glob_mpi_comm;
2432     static const int tag = 'p'+'r'+'o'+'l'+'o'+'n'+'g';
2433 
2434     /* Append data */
2435 
2436     if (g->merge_sub_rank == 0) {
2437       int rank_id;
2438       assert(cs_glob_rank_id == g->merge_sub_root);
2439       for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2440         cs_lnum_t n_send = (  g->merge_cell_idx[rank_id+1]
2441                             - g->merge_cell_idx[rank_id]);
2442         int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2443         MPI_Send(num + g->merge_cell_idx[rank_id], n_send, CS_MPI_LNUM,
2444                  dist_rank, tag, comm);
2445       }
2446     }
2447     else {
2448       MPI_Status status;
2449       MPI_Recv(num, g->n_elts_r[0], CS_MPI_LNUM,
2450                g->merge_sub_root, tag, comm, &status);
2451     }
2452   }
2453 
2454 }
2455 
2456 #endif /* defined(HAVE_MPI) */
2457 
2458 /*----------------------------------------------------------------------------
2459  * Remove an element from a list (queue) of elements listed by adjacency size.
2460  *
2461  * parameters:
2462  *   s      <-> structure associated with graph traversal and update
2463  *   a_m_e  <-- number of adjacencies considered for this element
2464  *   elt_id <-- id of element to remove
2465  *----------------------------------------------------------------------------*/
2466 
2467 static inline void
_graph_m_ptr_remove_m(cs_graph_m_ptr_t * s,cs_lnum_t a_m_e,cs_lnum_t elt_id)2468 _graph_m_ptr_remove_m(cs_graph_m_ptr_t  *s,
2469                       cs_lnum_t          a_m_e,
2470                       cs_lnum_t          elt_id)
2471 {
2472   cs_lnum_t _m = a_m_e;
2473 
2474   cs_lnum_t s_p = s->prev[elt_id];
2475   cs_lnum_t s_n = s->next[elt_id];
2476   if (s_p > -1) { /* not head of list */
2477     s->next[s_p] = s_n;
2478     if (s_n > -1)
2479       s->prev[s_n] = s_p;
2480   }
2481   else { /* head of list */
2482     assert(s->m_head[_m] == elt_id);
2483     s->m_head[_m] = s_n;
2484     if (s_n > -1)
2485       s->prev[s_n] = -1;
2486     if (s->m_head[_m] < 0) { /* Update min and max m */
2487       if (_m == s->m_min) {
2488         while (s->m_min < s->m_max && s->m_head[s->m_min] < 0)
2489           s->m_min++;
2490       }
2491       else if (_m == s->m_max) {
2492         s->m_max -= 1;
2493         while (s->m_max > s->m_min) {
2494           if (s->m_head[s->m_max] > -1)
2495             break;
2496           else
2497             s->m_max--;
2498         }
2499       }
2500     }
2501   }
2502 
2503   s->prev[elt_id] = -1;
2504   s->next[elt_id] = -1;
2505 }
2506 
2507 /*----------------------------------------------------------------------------
2508  * Insert an element at the head of a list (queue) of elements listed
2509  * by adjacency size.
2510  *
2511  * Elements inserted through this function must have been removed from
2512  * a list with higher adjacency, so a_m_e < s->m_max.
2513  *
2514  * parameters:
2515  *   s      <-> structure associated with graph traversal and update
2516  *   a_m_e  <-> number of adjacencies considered for this element
2517  *   elt_id <-- id of element to remove
2518  *----------------------------------------------------------------------------*/
2519 
2520 static inline void
_graph_m_ptr_insert_m(cs_graph_m_ptr_t * s,cs_lnum_t a_m_e,cs_lnum_t elt_id)2521 _graph_m_ptr_insert_m(cs_graph_m_ptr_t  *s,
2522                       cs_lnum_t          a_m_e,
2523                       cs_lnum_t          elt_id)
2524 {
2525   cs_lnum_t _m = a_m_e;
2526 
2527   cs_lnum_t s_p = s->m_head[_m];
2528   if (s_p > -1) { /* list not empty */
2529     assert(s->prev[s_p] == -1);
2530     s->prev[s_p] = elt_id;
2531     s->next[elt_id] = s_p;
2532   }
2533   else {  /* Update min and max m */
2534     if (_m < s->m_min)
2535       s->m_min = _m;
2536     else if (_m > s->m_max) {
2537       s->m_max = _m;
2538       if (s->m_head[s->m_min] < 0)
2539         s->m_min = _m;
2540     }
2541     assert(_m <= s->m_max);
2542   }
2543   s->m_head[_m] = elt_id;
2544 }
2545 
2546 /*----------------------------------------------------------------------------*/
2547 /*!
2548  * \brief Apply one step of the pairwise aggregation algorithm for a
2549  *        matrix expected to be an M-matrix.
2550  *
2551  * This assumes positive diagonal entries, and negative off-diagonal entries.
2552  * The matrix is provided in scalar MSR (modified CSR with separate diagonal)
2553  * form. To use block matrices with this function, their blocks must be
2554  * condensed to equivalent scalars.
2555  *
2556  * \param[in]   f_n_rows      number of rows in fine grid
2557  * \param[in]   beta          aggregation criterion
2558  * \param[in]   dd_threshold  diagonal dominance threshold; if > 0, ignore rows
2559  *                            whose diagonal dominance is above this threshold
2560  * \param[in]   row_index     matrix row index (separate diagonal)
2561  * \param[in]   col_id        matrix column ids
2562  * \param[in]   d_val         matrix diagonal values (scalar)
2563  * \param[in]   x_val         matrix extradiagonal values (scalar)
2564  * \param[out]  f_c_row       fine to coarse rows mapping
2565  *
2566  * \return  local number of resulting coarse rows
2567  */
2568 /*----------------------------------------------------------------------------*/
2569 
2570 static cs_lnum_t
_pairwise_msr(cs_lnum_t f_n_rows,const cs_real_t beta,const cs_real_t dd_threshold,const cs_lnum_t row_index[restrict],const cs_lnum_t col_id[restrict],const cs_real_t d_val[restrict],const cs_real_t x_val[restrict],cs_lnum_t * f_c_row)2571 _pairwise_msr(cs_lnum_t         f_n_rows,
2572               const cs_real_t   beta,
2573               const cs_real_t   dd_threshold,
2574               const cs_lnum_t   row_index[restrict],
2575               const cs_lnum_t   col_id[restrict],
2576               const cs_real_t   d_val[restrict],
2577               const cs_real_t   x_val[restrict],
2578               cs_lnum_t        *f_c_row)
2579 {
2580   cs_lnum_t c_n_rows = 0;
2581 
2582   /* Mark all elements of fine to coarse rows as uninitialized */
2583   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
2584     f_c_row[ii] = -2;
2585 
2586   /* Allocate working arrays */
2587 
2588   short int  *a_m;   /* active m for row */
2589   cs_real_t  *a_max; /* max per line */
2590 
2591   BFT_MALLOC(a_m, f_n_rows, short int);
2592   BFT_MALLOC(a_max, f_n_rows, cs_real_t);
2593 
2594   /* Computation of the maximum over line ii and test if the line ii is
2595    * ignored. Be careful that the sum has to be the sum of the
2596    * absolute value of every extra-diagonal coefficient, but the maximum is only
2597    * on the negative coefficient. */
2598 
2599   cs_lnum_t m_max = -1;
2600 
2601   cs_lnum_t n_remain = f_n_rows;
2602 
2603   if (dd_threshold > 0) {
2604 
2605     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2606 
2607       cs_real_t sum = 0.0;
2608 
2609       cs_lnum_t s_id = row_index[ii];
2610       cs_lnum_t e_id = row_index[ii+1];
2611       a_max[ii] = 0.0;
2612 
2613       for (cs_lnum_t jj = s_id; jj < e_id; jj++) {
2614         cs_real_t xv = x_val[jj];
2615         sum += CS_ABS(xv);
2616         if (xv < 0)
2617           a_max[ii] = CS_MAX(a_max[ii], -xv);
2618       }
2619 
2620       /* Check if the line seems ignored or not. */
2621       if (d_val[ii] > dd_threshold * sum) {
2622         a_m[ii] = -1;
2623         n_remain -= 1;
2624         f_c_row[ii] = -1;
2625       }
2626       else {
2627         a_m[ii] = 0;
2628         for (cs_lnum_t jj = e_id-1; jj >= s_id; jj--) {
2629           if (col_id[jj] < f_n_rows && x_val[jj] < beta*sum)
2630             a_m[ii] += 1;
2631         }
2632       }
2633 
2634       if (m_max < a_m[ii])
2635         m_max = a_m[ii];
2636 
2637     }
2638 
2639   }
2640   else { /* variant with no diagonal dominance check */
2641 
2642     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2643 
2644       cs_lnum_t s_id = row_index[ii];
2645       cs_lnum_t e_id = row_index[ii+1];
2646       a_max[ii] = 0.0;
2647 
2648       for (cs_lnum_t jj = s_id; jj < e_id; jj++) {
2649         cs_real_t xv = x_val[jj];
2650         if (xv < 0)
2651           a_max[ii] = CS_MAX(a_max[ii], -xv);
2652       }
2653 
2654       a_m[ii] = 0;
2655       for (cs_lnum_t jj = e_id-1; jj >= s_id; jj--) {
2656         if (col_id[jj] < f_n_rows)
2657           a_m[ii] += 1;
2658       }
2659 
2660       if (m_max < a_m[ii])
2661         m_max = a_m[ii];
2662 
2663     }
2664 
2665   }
2666 
2667   if (m_max < 0)
2668     return 0;
2669 
2670   /* Build pointers to lists of rows by a_m
2671      (to allow access to row with lowest m) */
2672 
2673   cs_graph_m_ptr_t s;
2674 
2675   s.m_min = 0;
2676   s.m_max = m_max;
2677 
2678   BFT_MALLOC(s.m_head, s.m_max+1, cs_lnum_t);
2679   BFT_MALLOC(s.next, f_n_rows*2, cs_lnum_t);
2680   s.prev = s.next + f_n_rows;
2681 
2682   for (cs_lnum_t ii = 0; ii < s.m_max+1; ii++)
2683     s.m_head[ii] = -1;
2684   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2685     s.next[ii] = -1;
2686     s.prev[ii] = -1;
2687   }
2688 
2689   for (cs_lnum_t ii = f_n_rows-1; ii >= 0; ii--) {
2690     cs_lnum_t _m = a_m[ii];
2691     if (_m >= 0) {
2692       cs_lnum_t prev_head = s.m_head[_m];
2693       s.m_head[_m] = ii;
2694       s.next[ii] = prev_head;
2695       if (prev_head > -1)
2696         s.prev[prev_head] = ii;
2697     }
2698   }
2699 
2700   /* Now build pairs */
2701 
2702   while (s.m_min < s.m_max) {
2703     if (s.m_head[s.m_min] < 0)
2704       s.m_min++;
2705     else
2706       break;
2707   }
2708 
2709   while (s.m_min > -1) {
2710 
2711     /* Select remaining ii with minimal a_m */
2712 
2713     cs_lnum_t ii = s.m_head[s.m_min];
2714     assert(ii > -1);
2715 
2716     cs_lnum_t gg[2] = {ii, -1};
2717 
2718     /* Select remaining jj such that aij = min_over_k_aik */
2719 
2720     cs_lnum_t s_id = row_index[ii];
2721     cs_lnum_t e_id = row_index[ii+1];
2722 
2723     f_c_row[ii] = c_n_rows; /* Add i to "pair" in all cases */
2724 
2725     if (e_id > s_id) {
2726 
2727       cs_lnum_t jj = -1;
2728       cs_real_t _a_min = HUGE_VAL;
2729       for (cs_lnum_t kk_idx = s_id; kk_idx < e_id; kk_idx++) {
2730         cs_lnum_t kk = col_id[kk_idx];
2731         if (kk < f_n_rows && f_c_row[kk] == -2) { /* not aggregated yet */
2732           cs_real_t xv = x_val[kk_idx];
2733           if (xv < _a_min) {
2734             _a_min = xv;
2735             jj = kk;
2736           }
2737         }
2738       }
2739 
2740       /* keep jj only if within threshold */
2741 
2742       if (_a_min >= -beta*a_max[ii])
2743         jj = -1;
2744       else {
2745         f_c_row[jj] = c_n_rows; /* Add jj to "pair" */
2746         gg[1] = jj;
2747       }
2748 
2749     }
2750 
2751     c_n_rows++;
2752 
2753     /* Now update search set */
2754 
2755     for (int ip = 0; ip < 2; ip++) {
2756       cs_lnum_t i = gg[ip];
2757       if (i > -1) {
2758         cs_lnum_t _m = a_m[i];
2759         assert(_m > -1);
2760         _graph_m_ptr_remove_m(&s, _m, i);
2761         a_m[i] = -1;
2762         cs_lnum_t _s_id = row_index[i];
2763         cs_lnum_t _e_id = row_index[i+1];
2764         for (cs_lnum_t k = _e_id-1; k >= _s_id; k--) {
2765           cs_lnum_t j = col_id[k];
2766           if (j >= f_n_rows)
2767             continue;
2768           _m = a_m[j];
2769           if (_m >= 0) {
2770             _graph_m_ptr_remove_m(&s, _m, j);
2771             if (_m > 0)
2772               _graph_m_ptr_insert_m(&s, _m-1, j);
2773             a_m[j] = _m - 1;
2774           }
2775         }
2776         n_remain--;
2777       }
2778     }
2779     if (s.m_min >= s.m_max) { /* check if list has become empty */
2780       if (s.m_head[s.m_min] < 0) {
2781         s.m_min = -1;
2782         s.m_max = -1;
2783       }
2784     }
2785 
2786   }
2787 
2788   /* We might have remaining cells */
2789 
2790   for (int ii = 0; ii < f_n_rows; ii++) {
2791     if (f_c_row[ii] < -1)
2792       f_c_row[ii] = c_n_rows++;
2793   }
2794 
2795   /* Free working arrays */
2796 
2797   BFT_FREE(s.next);
2798   BFT_FREE(s.m_head);
2799   BFT_FREE(a_max);
2800   BFT_FREE(a_m);
2801 
2802   return c_n_rows;
2803 }
2804 
2805  /*----------------------------------------------------------------------------
2806  * Build a coarse grid level from the previous level using an
2807  * automatic criterion and pairwise aggregation variant 1,
2808  * with a matrix in MSR format.
2809  *
2810  * parameters:
2811  *   f                   <-- Fine grid structure
2812  *   verbosity           <-- Verbosity level
2813  *   f_c_row             --> Fine row -> coarse row connectivity
2814  *----------------------------------------------------------------------------*/
2815 
2816 static void
_automatic_aggregation_pw_msr(const cs_grid_t * f,int verbosity,cs_lnum_t * f_c_row)2817 _automatic_aggregation_pw_msr(const cs_grid_t  *f,
2818                               int               verbosity,
2819                               cs_lnum_t        *f_c_row)
2820 {
2821   const cs_real_t beta = 0.25;
2822 
2823   const cs_real_t dd_threshold = (f->level == 0) ? _dd_threshold_pw : -1;
2824 
2825   const cs_lnum_t f_n_rows = f->n_rows;
2826 
2827   /* Access matrix MSR vectors */
2828 
2829   const cs_lnum_t  *row_index, *col_id;
2830   const cs_real_t  *d_val, *x_val;
2831   cs_real_t *_d_val = NULL, *_x_val = NULL;
2832 
2833   cs_matrix_get_msr_arrays(f->matrix,
2834                            &row_index,
2835                            &col_id,
2836                            &d_val,
2837                            &x_val);
2838 
2839   const cs_lnum_t *db_size = f->db_size;
2840   const cs_lnum_t *eb_size = f->eb_size;
2841 
2842   if (db_size[0] > 1) {
2843     BFT_MALLOC(_d_val, f_n_rows, cs_real_t);
2844     _reduce_block(f_n_rows, db_size, d_val, _d_val);
2845     d_val = _d_val;
2846   }
2847 
2848   if (eb_size[0] > 1) {
2849     cs_lnum_t f_n_enz = row_index[f_n_rows];
2850     BFT_MALLOC(_x_val, f_n_enz, cs_real_t);
2851     _reduce_block(f_n_enz, eb_size, x_val, _x_val);
2852     x_val = _x_val;
2853   }
2854 
2855   if (verbosity > 3)
2856     bft_printf("\n     %s: beta %5.3e; diag_dominance_threshold: %5.3e\n",
2857                __func__, beta, dd_threshold);
2858 
2859   _pairwise_msr(f_n_rows,
2860                 beta,
2861                 dd_threshold,
2862                 row_index,
2863                 col_id,
2864                 d_val,
2865                 x_val,
2866                 f_c_row);
2867 
2868   /* Free working arrays */
2869 
2870   BFT_FREE(_d_val);
2871   BFT_FREE(_x_val);
2872 }
2873 
2874 /*----------------------------------------------------------------------------
2875  * Build a coarse grid level from the previous level using an
2876  * automatic criterion and pairwise aggregation variant 1,
2877  * with a matrix in native format.
2878  *
2879  * parameters:
2880  *   f                   <-- Fine grid structure
2881  *   max_aggregation     <-- Max fine rows per coarse row
2882  *   verbosity           <-- Verbosity level
2883  *   f_c_row             --> Fine row -> coarse row connectivity
2884  *----------------------------------------------------------------------------*/
2885 
2886 static void
_automatic_aggregation_mx_native(const cs_grid_t * f,cs_lnum_t max_aggregation,int verbosity,cs_lnum_t * f_c_row)2887 _automatic_aggregation_mx_native(const cs_grid_t  *f,
2888                                  cs_lnum_t         max_aggregation,
2889                                  int               verbosity,
2890                                  cs_lnum_t        *f_c_row)
2891 {
2892   const cs_lnum_t f_n_rows = f->n_rows;
2893 
2894   /* Algorithm parameters */
2895   const cs_real_t beta = 0.25; /* 0.5 for HHO */
2896   const int ncoarse = 8;
2897   int npass_max = 10;
2898 
2899   int _max_aggregation = 1, npass = 0;
2900   cs_lnum_t aggr_count = f_n_rows;
2901   cs_lnum_t c_n_rows = 0;
2902 
2903   cs_lnum_t *c_aggr_count = NULL;
2904   bool *penalize = NULL;
2905   cs_real_t *maxi = NULL;
2906 
2907   /* Access matrix MSR vectors */
2908 
2909   cs_lnum_t n_edges = 0;
2910   const cs_lnum_2_t  *edges;
2911   const cs_real_t  *d_val, *x_val;
2912   cs_real_t *_d_val = NULL, *_x_val = NULL;
2913 
2914   bool symmetric = true;
2915   cs_lnum_t isym = 2;
2916 
2917   cs_matrix_get_native_arrays(f->matrix,
2918                               &symmetric,
2919                               &n_edges,
2920                               &edges,
2921                               &d_val,
2922                               &x_val);
2923 
2924   if (f->symmetric)
2925     isym = 1;
2926 
2927   const cs_lnum_t *db_size = f->db_size;
2928   const cs_lnum_t *eb_size = f->eb_size;
2929 
2930   if (db_size[0] > 1) {
2931     BFT_MALLOC(_d_val, f_n_rows, cs_real_t);
2932     _reduce_block(f_n_rows, db_size, d_val, _d_val);
2933     d_val = _d_val;
2934   }
2935 
2936   if (eb_size[0] > 1) {
2937     BFT_MALLOC(_x_val, n_edges*isym, cs_real_t);
2938     _reduce_block(n_edges*isym, eb_size, x_val, _x_val);
2939     x_val = _x_val;
2940   }
2941 
2942   /* Allocate working arrays */
2943 
2944   BFT_MALLOC(c_aggr_count, f_n_rows, cs_lnum_t);
2945   BFT_MALLOC(maxi, f_n_rows, cs_real_t);
2946   BFT_MALLOC(penalize, f_n_rows, bool);
2947 
2948   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++){
2949     c_aggr_count[ii] = 1;
2950     penalize[ii] = false;
2951   }
2952 
2953   /* Computation of the maximum over line ii and test if the line ii is
2954    * penalized. Be careful that the sum has to be the sum of the
2955    * absolute value of every extra-diagonal coefficient, but the maximum
2956    * is only on the negative coefficient. */
2957 
2958   cs_real_t *sum;
2959   BFT_MALLOC(sum, f_n_rows, cs_real_t);
2960 
2961   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2962     sum[ii] = 0;
2963     maxi[ii] = 0.0;
2964   }
2965 
2966   for (cs_lnum_t e_id = 0; e_id < n_edges; e_id++) {
2967     cs_lnum_t ii = edges[e_id][0];
2968     cs_lnum_t jj = edges[e_id][1];
2969     cs_real_t xv0 = x_val[e_id*isym];
2970     cs_real_t xv1 = x_val[(e_id+1)*isym-1];
2971     if (ii < f_n_rows) {
2972       sum[ii] += CS_ABS(xv0);
2973       if (xv0 < 0)
2974         maxi[ii] = CS_MAX(maxi[ii], -xv0);
2975     }
2976     if (jj < f_n_rows) {
2977       sum[jj] += CS_ABS(xv1);
2978       if (xv1 < 0)
2979         maxi[jj] = CS_MAX(maxi[jj], -xv1);
2980     }
2981   }
2982 
2983   /* Check if the line seems penalized or not. */
2984   if (f->level == 0) {
2985     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2986       if (d_val[ii] > _penalization_threshold * sum[ii])
2987         penalize[ii] = true;
2988     }
2989   }
2990   BFT_FREE(sum);
2991 
2992   /* Passes */
2993 
2994   if (verbosity > 3)
2995     bft_printf("\n     %s:\n", __func__);
2996 
2997   do {
2998 
2999     npass++;
3000     _max_aggregation++;
3001     _max_aggregation = CS_MIN(_max_aggregation, max_aggregation);
3002 
3003     /* Pairwise aggregation */
3004 
3005     for (cs_lnum_t e_id = 0; e_id < n_edges; e_id++) {
3006 
3007       cs_lnum_t ii = edges[e_id][0];
3008       cs_lnum_t jj = edges[e_id][1];
3009 
3010       /* Exclude rows on parallel or periodic boundary, so as not to */
3011       /* coarsen the grid across those boundaries (which would change */
3012       /* the communication pattern and require a more complex algorithm). */
3013 
3014       /* ii or jj is candidate to aggregation only if it is not penalized */
3015 
3016       if (   ii >= f_n_rows || penalize[ii]
3017           || jj >= f_n_rows || penalize[jj])
3018         continue;
3019 
3020       cs_real_t xv = x_val[e_id*isym];
3021 
3022       if (isym == 2)
3023         xv = CS_MAX(xv, x_val[e_id*2 + 1]);
3024 
3025       /* Test if ii and jj are strongly negatively coupled and at */
3026       /* least one of them is not already in an aggregate. */
3027 
3028       if (   xv < -beta*maxi[ii]
3029           && (f_c_row[ii] < 0 || f_c_row[jj] < 0)) {
3030 
3031         if (f_c_row[ii] > -1 && f_c_row[jj] < 0 ) {
3032           if (c_aggr_count[f_c_row[ii]] < _max_aggregation +1) {
3033             f_c_row[jj] = f_c_row[ii];
3034             c_aggr_count[f_c_row[ii]] += 1;
3035           }
3036         }
3037         else if (f_c_row[ii] < 0 && f_c_row[jj] > -1) {
3038           if (c_aggr_count[f_c_row[jj]] < _max_aggregation +1) {
3039             f_c_row[ii] = f_c_row[jj];
3040             c_aggr_count[f_c_row[jj]] += 1;
3041           }
3042         }
3043         else if (f_c_row[ii] < 0 && f_c_row[jj] < 0) {
3044           f_c_row[ii] = c_n_rows;
3045           f_c_row[jj] = c_n_rows;
3046           c_aggr_count[c_n_rows] += 1;
3047           c_n_rows++;
3048         }
3049       }
3050 
3051     }
3052 
3053     /* Check the number of coarse rows created */
3054     aggr_count = 0;
3055     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3056       if (f_c_row[ii] < 0)
3057         aggr_count++;
3058     }
3059 
3060     /* Additional passes if aggregation is insufficient */
3061     if (aggr_count == 0 || (c_n_rows + aggr_count)*ncoarse < f_n_rows)
3062       npass_max = npass;
3063 
3064   } while (npass < npass_max); /* Loop on passes */
3065 
3066   /* Finish assembly: rows that are not diagonally dominant and not in an
3067    * aggregate form their own aggregate */
3068   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3069     if (!penalize[ii] && f_c_row[ii] < 0) {
3070       f_c_row[ii] = c_n_rows;
3071       c_n_rows++;
3072     }
3073   }
3074 
3075   /* Free working arrays */
3076 
3077   BFT_FREE(_d_val);
3078   BFT_FREE(_x_val);
3079   BFT_FREE(c_aggr_count);
3080   BFT_FREE(maxi);
3081   BFT_FREE(penalize);
3082 }
3083 
3084 /*----------------------------------------------------------------------------
3085  * Build a coarse grid level from the previous level using an
3086  * automatic criterion and pairwise aggregation variant 1,
3087  * with a matrix in MSR format.
3088  *
3089  * parameters:
3090  *   f                   <-- Fine grid structure
3091  *   max_aggregation     <-- Max fine rows per coarse row
3092  *   verbosity           <-- Verbosity level
3093  *   f_c_row             --> Fine row -> coarse row connectivity
3094  *----------------------------------------------------------------------------*/
3095 
3096 static void
_automatic_aggregation_mx_msr(const cs_grid_t * f,cs_lnum_t max_aggregation,int verbosity,cs_lnum_t * f_c_row)3097 _automatic_aggregation_mx_msr(const cs_grid_t  *f,
3098                               cs_lnum_t         max_aggregation,
3099                               int               verbosity,
3100                               cs_lnum_t        *f_c_row)
3101 {
3102   const cs_lnum_t f_n_rows = f->n_rows;
3103 
3104   int npass_max = 10;
3105   int _max_aggregation = 1, npass = 0;
3106   cs_lnum_t aggr_count = f_n_rows;
3107   cs_lnum_t c_n_rows = 0;
3108 
3109   cs_lnum_t *c_aggr_count = NULL;
3110   bool *penalize = NULL;
3111   cs_real_t *maxi = NULL;
3112 
3113   /* Algorithm parameters */
3114   const cs_real_t beta = 0.25; /* 0.5 for HHO */
3115   const int ncoarse = 8;
3116   const cs_real_t p_test = (f->level == 0) ? 1. : -1;
3117 
3118   if (verbosity > 3)
3119     bft_printf("\n     %s: npass_max: %d; n_coarse: %d;"
3120                " beta %5.3e; pena_thd: %5.3e, p_test: %g\n",
3121                __func__, npass_max, ncoarse, beta, _penalization_threshold,
3122                p_test);
3123 
3124   /* Access matrix MSR vectors */
3125 
3126   const cs_lnum_t  *row_index, *col_id;
3127   const cs_real_t  *d_val, *x_val;
3128   cs_real_t *_d_val = NULL, *_x_val = NULL;
3129 
3130   cs_matrix_get_msr_arrays(f->matrix,
3131                            &row_index,
3132                            &col_id,
3133                            &d_val,
3134                            &x_val);
3135 
3136   const cs_lnum_t *db_size = f->db_size;
3137   const cs_lnum_t *eb_size = f->eb_size;
3138 
3139   if (db_size[0] > 1) {
3140     BFT_MALLOC(_d_val, f_n_rows, cs_real_t);
3141     _reduce_block(f_n_rows, db_size, d_val, _d_val);
3142     d_val = _d_val;
3143   }
3144 
3145   if (eb_size[0] > 1) {
3146     cs_lnum_t f_n_enz = row_index[f_n_rows];
3147     BFT_MALLOC(_x_val, f_n_enz, cs_real_t);
3148     _reduce_block(f_n_enz, eb_size, x_val, _x_val);
3149     x_val = _x_val;
3150   }
3151 
3152   /* Allocate working arrays */
3153 
3154   BFT_MALLOC(c_aggr_count, f_n_rows, cs_lnum_t);
3155   BFT_MALLOC(maxi, f_n_rows, cs_real_t);
3156   BFT_MALLOC(penalize, f_n_rows, bool);
3157 
3158 # pragma omp parallel if (f_n_rows > CS_THR_MIN)
3159   {
3160 #   pragma omp for
3161     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++){
3162       c_aggr_count[ii] = 1;
3163       penalize[ii] = false;
3164     }
3165 
3166     /* Computation of the maximum over line ii and test if the line ii is
3167      * penalized. Be careful that the sum has to be the sum of the absolute
3168      * value of every extra-diagonal coefficient, but the maximum is only on the
3169      * negative coefficient. */
3170 
3171 #   pragma omp for
3172     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3173 
3174       maxi[ii] = 0.0;
3175 
3176       cs_real_t  sum = 0.0;
3177       for (cs_lnum_t jj = row_index[ii]; jj < row_index[ii+1]; jj++) {
3178 
3179         const cs_real_t  xv = x_val[jj];
3180         if (xv < 0) {
3181           sum -= xv;
3182           maxi[ii] = CS_MAX(maxi[ii], -xv);
3183         }
3184         else {
3185           sum += xv;
3186         }
3187       }
3188 
3189       /* Check if the line seems penalized or not. */
3190       if (d_val[ii]*p_test > _penalization_threshold * sum)
3191         penalize[ii] = true;
3192 
3193     } /* Loop on f_n_rows */
3194 
3195   }   /* OpenMP block */
3196 
3197   /* Passes */
3198 
3199   do {
3200 
3201     npass++;
3202     _max_aggregation++;
3203     _max_aggregation = CS_MIN(_max_aggregation, max_aggregation);
3204 
3205     /* Pairwise aggregation */
3206 
3207     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3208 
3209       /* ii is candidate to aggregation only if it is not penalized */
3210 
3211       if (penalize[ii])
3212         continue;
3213 
3214       const cs_real_t  row_criterion = -beta*maxi[ii];
3215 
3216       for (cs_lnum_t jidx = row_index[ii]; jidx < row_index[ii+1]; jidx++) {
3217 
3218         cs_lnum_t jj = col_id[jidx];
3219 
3220         /* Exclude rows on parallel or periodic boundary, so as not to */
3221         /* coarsen the grid across those boundaries (which would change */
3222         /* the communication pattern and require a more complex algorithm). */
3223 
3224         if (jj < f_n_rows) {
3225           if (!penalize[jj]) {
3226 
3227             /* Test if ii and jj are strongly negatively coupled and at */
3228             /* least one of them is not already in an aggregate. */
3229 
3230             if (    x_val[jidx] < row_criterion
3231                 && (f_c_row[ii] < 0 || f_c_row[jj] < 0)) {
3232 
3233               if (f_c_row[ii] > -1 && f_c_row[jj] < 0 ) {
3234                 if (c_aggr_count[f_c_row[ii]] < _max_aggregation +1) {
3235                   f_c_row[jj] = f_c_row[ii];
3236                   c_aggr_count[f_c_row[ii]] += 1;
3237                 }
3238               }
3239               else if (f_c_row[ii] < 0 && f_c_row[jj] > -1) {
3240                 if (c_aggr_count[f_c_row[jj]] < _max_aggregation +1) {
3241                   f_c_row[ii] = f_c_row[jj];
3242                   c_aggr_count[f_c_row[jj]] += 1;
3243                 }
3244               }
3245               else if (f_c_row[ii] < 0 && f_c_row[jj] < 0) {
3246                 f_c_row[ii] = c_n_rows;
3247                 f_c_row[jj] = c_n_rows;
3248                 c_aggr_count[c_n_rows] += 1;
3249                 c_n_rows++;
3250               }
3251             }
3252 
3253           } /* Column is not penalized */
3254         } /* The current rank is owner of the column */
3255 
3256       } /* Loop on columns */
3257 
3258     } /* Loop on rows */
3259 
3260     /* Check the number of coarse rows created */
3261     aggr_count = 0;
3262     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3263       if (f_c_row[ii] < 0)
3264         aggr_count++;
3265     }
3266 
3267     /* Additional passes if aggregation is insufficient */
3268     if (aggr_count == 0 || (c_n_rows + aggr_count)*ncoarse < f_n_rows)
3269       npass_max = npass;
3270 
3271   } while (npass < npass_max); /* Loop on passes */
3272 
3273   /* Finish assembly: rows that are not diagonally dominant and not in an
3274    * aggregate form their own aggregate */
3275   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3276     if (!penalize[ii] && f_c_row[ii] < 0) {
3277       f_c_row[ii] = c_n_rows;
3278       c_n_rows++;
3279     }
3280   }
3281 
3282   /* Free working arrays */
3283 
3284   BFT_FREE(_d_val);
3285   BFT_FREE(_x_val);
3286   BFT_FREE(c_aggr_count);
3287   BFT_FREE(maxi);
3288   BFT_FREE(penalize);
3289 }
3290 
3291 /*----------------------------------------------------------------------------
3292  * Build a coarse grid level from the previous level using
3293  * an automatic criterion, using the face to cells adjacency.
3294  *
3295  * parameters:
3296  *   f                    <-- Fine grid structure
3297  *   coarsening_type      <-- Coarsening type
3298  *   max_aggregation      <-- Max fine cells per coarse cell
3299  *   relaxation_parameter <-- P0/P1 relaxation factor
3300  *   verbosity            <-- Verbosity level
3301  *   f_c_cell             --> Fine cell -> coarse cell connectivity
3302  *----------------------------------------------------------------------------*/
3303 
3304 static void
_automatic_aggregation_fc(const cs_grid_t * f,cs_grid_coarsening_t coarsening_type,cs_lnum_t max_aggregation,double relaxation_parameter,int verbosity,cs_lnum_t * f_c_cell)3305 _automatic_aggregation_fc(const cs_grid_t       *f,
3306                           cs_grid_coarsening_t   coarsening_type,
3307                           cs_lnum_t              max_aggregation,
3308                           double                 relaxation_parameter,
3309                           int                    verbosity,
3310                           cs_lnum_t             *f_c_cell)
3311 {
3312   cs_lnum_t n_faces;
3313 
3314   cs_lnum_t isym = 2;
3315   int ncoarse = 8, npass_max = 10, inc_nei = 1;
3316   int _max_aggregation = 1, npass = 0;
3317 
3318   cs_lnum_t f_n_cells = f->n_rows;
3319   cs_lnum_t f_n_cells_ext = f->n_cols_ext;
3320   cs_lnum_t f_n_faces = f->n_faces;
3321 
3322   cs_lnum_t aggr_count = f_n_cells;
3323 
3324   cs_lnum_t r_n_faces = f_n_faces;
3325   cs_lnum_t c_n_cells = 0;
3326 
3327   cs_real_t epsilon = 1.e-6;
3328 
3329   cs_lnum_t *c_cardinality = NULL, *f_c_face = NULL;
3330   cs_lnum_t *merge_flag = NULL, *c_aggr_count = NULL;
3331   cs_lnum_t *i_work_array = NULL;
3332 
3333   const cs_lnum_t *db_size = f->db_size;
3334   const cs_lnum_t *eb_size = f->eb_size;
3335   const cs_lnum_2_t *f_face_cells = f->face_cell;
3336   const cs_real_t *f_da = f->da;
3337   const cs_real_t *f_xa = f->xa;
3338 
3339   /* Reduce block to scalar equivalents */
3340 
3341   const cs_real_t *_f_da = f_da;
3342   const cs_real_t *_f_xa = f_xa;
3343 
3344   cs_real_t *s_da = NULL, *s_xa = NULL;
3345 
3346   if (db_size[0] > 1) {
3347     BFT_MALLOC(s_da, f_n_cells, cs_real_t);
3348     _reduce_block(f_n_cells, db_size, f_da, s_da);
3349     _f_da = s_da;
3350   }
3351 
3352   if (eb_size[0] > 1) {
3353     BFT_MALLOC(s_xa, f_n_faces*isym, cs_real_t);
3354     _reduce_block(f_n_faces*isym, db_size, f_xa, s_xa);
3355     _f_xa = s_xa;
3356   }
3357 
3358   /* Allocate working arrays */
3359 
3360   BFT_MALLOC(i_work_array, f_n_cells_ext*2 + f_n_faces*3, cs_lnum_t);
3361 
3362   c_cardinality = i_work_array;
3363   c_aggr_count = i_work_array + f_n_cells_ext;
3364   f_c_face = i_work_array + 2*f_n_cells_ext; /* fine face -> coarse face */
3365   merge_flag = i_work_array + 2*f_n_cells_ext + f_n_faces;
3366 
3367   /* Initialization */
3368 
3369   if (f->symmetric == true)
3370     isym = 1;
3371 
3372 # pragma omp parallel for if(f_n_cells_ext > CS_THR_MIN)
3373   for (cs_lnum_t ii = 0; ii < f_n_cells_ext; ii++) {
3374     c_cardinality[ii] = -1;
3375     f_c_cell[ii] = -1;
3376     c_aggr_count[ii] = 1;
3377   }
3378 
3379 # pragma omp parallel for if(f_n_faces > CS_THR_MIN)
3380   for (cs_lnum_t face_id = 0; face_id < f_n_faces; face_id++) {
3381     merge_flag[face_id] = face_id +1;
3382     f_c_face[face_id] = 0;
3383   }
3384 
3385   /* Compute cardinality (number of neighbors for each cell -1) */
3386 
3387   for (cs_lnum_t face_id = 0; face_id < f_n_faces; face_id++) {
3388     cs_lnum_t ii = f_face_cells[face_id][0];
3389     cs_lnum_t jj = f_face_cells[face_id][1];
3390 
3391     c_cardinality[ii] += 1;
3392     c_cardinality[jj] += 1;
3393   }
3394 
3395   /* Passes */
3396 
3397   if (verbosity > 3)
3398     bft_printf("\n     %s:\n", __func__);
3399 
3400   do {
3401 
3402     npass++;
3403     n_faces = r_n_faces;
3404     _max_aggregation++;
3405     _max_aggregation = CS_MIN(_max_aggregation, max_aggregation);
3406 
3407 #   pragma omp parallel for if(n_faces > CS_THR_MIN)
3408     for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++) {
3409       f_c_face[face_id] = merge_flag[face_id];
3410       merge_flag[face_id] = 0;
3411     }
3412 
3413     if (n_faces < f_n_faces) {
3414 #     pragma omp parallel for if(f_n_faces > CS_THR_MIN)
3415       for (cs_lnum_t face_id = n_faces; face_id < f_n_faces; face_id++) {
3416         merge_flag[face_id] = 0;
3417         f_c_face[face_id] = 0;
3418       }
3419     }
3420 
3421     if (verbosity > 3)
3422       bft_printf("       pass %3d; r_n_faces = %10ld; aggr_count = %10ld\n",
3423                  npass, (long)r_n_faces, (long)aggr_count);
3424 
3425     /* Increment number of neighbors */
3426 
3427 #   pragma omp parallel for if(f_n_cells > CS_THR_MIN)
3428     for (cs_lnum_t ii = 0; ii < f_n_cells; ii++)
3429       c_cardinality[ii] += inc_nei;
3430 
3431     /* Initialize non-eliminated faces */
3432     r_n_faces = 0;
3433 
3434     /* Loop on non-eliminated faces */
3435 
3436     cs_real_t ag_mult = 1.;
3437     cs_real_t ag_threshold = 1. - epsilon;
3438     if (coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
3439       ag_mult = -1.;
3440       ag_threshold = - (1. - epsilon) * pow(relaxation_parameter, npass);
3441       // ag_threshold = (1. - epsilon) * pow(relaxation_parameter, npass);
3442     }
3443 
3444     for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++) {
3445 
3446       cs_lnum_t c_face = f_c_face[face_id] -1;
3447 
3448       cs_lnum_t ii = f_face_cells[c_face][0];
3449       cs_lnum_t jj = f_face_cells[c_face][1];
3450 
3451       /* Exclude faces on parallel or periodic boundary, so as not to */
3452       /* coarsen the grid across those boundaries (which would change */
3453       /* the communication pattern and require a more complex algorithm). */
3454 
3455       if (   (ii < f_n_cells && jj < f_n_cells)
3456           && (f_c_cell[ii] < 0 || f_c_cell[jj] < 0)) {
3457 
3458         cs_lnum_t count = 0;
3459 
3460         cs_lnum_t ix0 = c_face*isym, ix1 = (c_face +1)*isym -1;
3461 
3462         cs_real_t f_da0_da1 =   (_f_da[ii] * _f_da[jj])
3463                               / (c_cardinality[ii]*c_cardinality[jj]);
3464 
3465         cs_real_t aggr_crit;
3466 
3467         if (coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
3468           cs_real_t f_xa0 = CS_MAX(-_f_xa[ix0], 1.e-15);
3469           cs_real_t f_xa1 = CS_MAX(-_f_xa[ix1], 1.e-15);
3470           aggr_crit =   CS_MAX(f_xa0, f_xa1)
3471                       / CS_MAX(sqrt(f_da0_da1), 1.e-15);
3472         }
3473         else {
3474           cs_real_t f_xa0_xa1 =  _f_xa[ix0] * _f_xa[ix1];
3475           /* TODO: replace this test, or adimensionalize it */
3476           f_xa0_xa1 = CS_MAX(f_xa0_xa1, 1.e-30);
3477 
3478           aggr_crit = f_da0_da1 / f_xa0_xa1;
3479         }
3480 
3481         if (ag_mult*aggr_crit < ag_threshold) {
3482 
3483           if (f_c_cell[ii] > -1 && f_c_cell[jj] < 0 ) {
3484             if (c_aggr_count[f_c_cell[ii]] < _max_aggregation +1) {
3485               f_c_cell[jj] = f_c_cell[ii];
3486               c_aggr_count[f_c_cell[ii]] += 1;
3487               count++;
3488             }
3489           }
3490           else if (f_c_cell[ii] < 0 && f_c_cell[jj] > -1) {
3491             if (c_aggr_count[f_c_cell[jj]] < _max_aggregation +1) {
3492               f_c_cell[ii] = f_c_cell[jj];
3493               c_aggr_count[f_c_cell[jj]] += 1;
3494               count++;
3495             }
3496           }
3497           else if (f_c_cell[ii] < 0 && f_c_cell[jj] < 0) {
3498             f_c_cell[ii] = c_n_cells;
3499             f_c_cell[jj] = c_n_cells;
3500             c_aggr_count[c_n_cells] += 1;
3501             c_n_cells++;
3502             count++;
3503           }
3504         }
3505 
3506         if (count == 0 && (f_c_cell[ii] < 0 || f_c_cell[jj] < 0)) {
3507           merge_flag[r_n_faces] = c_face +1;
3508           r_n_faces++;
3509         }
3510 
3511       }
3512 
3513     }
3514 
3515     /* Check the number of coarse cells created */
3516     aggr_count = 0;
3517 #   pragma omp parallel for reduction(+:aggr_count) if(f_n_cells > CS_THR_MIN)
3518     for (cs_lnum_t i = 0; i < f_n_cells; i++) {
3519       if (f_c_cell[i] < 0)
3520         aggr_count++;
3521     }
3522 
3523     /* Additional passes if aggregation is insufficient */
3524 
3525     if (   aggr_count == 0 || (c_n_cells + aggr_count)*ncoarse < f_n_cells
3526         || r_n_faces == 0)
3527       npass_max = npass;
3528 
3529   } while (npass < npass_max); /* Loop on passes */
3530 
3531   BFT_FREE(s_da);
3532   BFT_FREE(s_xa);
3533 
3534   /* Finish assembly */
3535   for (cs_lnum_t i = 0; i < f_n_cells; i++) {
3536     if (f_c_cell[i] < 0) {
3537       f_c_cell[i] = c_n_cells;
3538       c_n_cells++;
3539     }
3540   }
3541 
3542   /* Free working arrays */
3543 
3544   BFT_FREE(i_work_array);
3545 }
3546 
3547 /*----------------------------------------------------------------------------
3548  * Compute volume and center of coarse cells.
3549  *
3550  * parameters:
3551  *   fine_grid   <-- Fine grid structure
3552  *   coarse_grid <-> Coarse grid structure
3553  *----------------------------------------------------------------------------*/
3554 
3555 static void
_compute_coarse_cell_quantities(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid)3556 _compute_coarse_cell_quantities(const cs_grid_t  *fine_grid,
3557                                 cs_grid_t        *coarse_grid)
3558 {
3559   cs_lnum_t ic, ii;
3560 
3561   cs_lnum_t f_n_cells = fine_grid->n_rows;
3562 
3563   cs_lnum_t c_n_cells = coarse_grid->n_rows;
3564   cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
3565 
3566   cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
3567 
3568   cs_real_t *c_cell_vol = coarse_grid->_cell_vol;
3569   cs_real_t *c_cell_cen = coarse_grid->_cell_cen;
3570 
3571   const cs_real_t *f_cell_vol = fine_grid->cell_vol;
3572   const cs_real_t *f_cell_cen = fine_grid->cell_cen;
3573 
3574   /* Compute volume and center of coarse cells */
3575 
3576 # pragma omp parallel for if(c_n_cells_ext > CS_THR_MIN)
3577   for (ic = 0; ic < c_n_cells_ext; ic++) {
3578     c_cell_vol[ic] = 0.;
3579     c_cell_cen[3*ic]    = 0.;
3580     c_cell_cen[3*ic +1] = 0.;
3581     c_cell_cen[3*ic +2] = 0.;
3582   }
3583 
3584   for (ii = 0; ii < f_n_cells; ii++) {
3585     ic = c_coarse_row[ii];
3586     c_cell_vol[ic] += f_cell_vol[ii];
3587     c_cell_cen[3*ic]    += f_cell_vol[ii]*f_cell_cen[3*ii];
3588     c_cell_cen[3*ic +1] += f_cell_vol[ii]*f_cell_cen[3*ii +1];
3589     c_cell_cen[3*ic +2] += f_cell_vol[ii]*f_cell_cen[3*ii +2];
3590   }
3591 
3592 # pragma omp parallel for if(c_n_cells > CS_THR_MIN)
3593   for (ic = 0; ic < c_n_cells; ic++) {
3594     c_cell_cen[3*ic]    /= c_cell_vol[ic];
3595     c_cell_cen[3*ic +1] /= c_cell_vol[ic];
3596     c_cell_cen[3*ic +2] /= c_cell_vol[ic];
3597   }
3598 }
3599 
3600 /*----------------------------------------------------------------------------
3601  * Verification for matrix quantities.
3602  *
3603  * parameters:
3604  *   g <-- pointer to grid structure
3605  *----------------------------------------------------------------------------*/
3606 
3607 static void
_verify_matrix(const cs_grid_t * g)3608 _verify_matrix(const cs_grid_t  *g)
3609 {
3610   const cs_lnum_t n_cols = cs_matrix_get_n_columns(g->matrix);
3611   const cs_lnum_t n_rows = cs_matrix_get_n_rows(g->matrix);
3612   const cs_lnum_t *db_size = g->db_size;
3613 
3614   cs_real_t *val;
3615   BFT_MALLOC(val, n_cols*db_size[1], cs_real_t);
3616 
3617   /* Evaluate fine and coarse grids diagonal dominance */
3618 
3619   cs_matrix_diag_dominance(g->matrix, val);
3620 
3621   cs_real_t vmin = HUGE_VAL, vmax = -HUGE_VAL;
3622 
3623   const cs_lnum_t n_ents = n_rows*db_size[1];
3624 
3625   for (cs_lnum_t i = 0; i < n_ents; i++) {
3626     if (val[i] < vmin)
3627       vmin = val[i];
3628     else if (val[i] > vmax)
3629       vmax = val[i];
3630   }
3631 
3632   BFT_FREE(val);
3633 
3634 #if defined(HAVE_MPI)
3635   if (cs_glob_mpi_comm != MPI_COMM_NULL) {
3636     cs_real_t _vmin = vmin, _vmax = vmax;
3637     MPI_Allreduce(&_vmin, &vmin, 1, CS_MPI_REAL, MPI_MIN, g->comm);
3638     MPI_Allreduce(&_vmax, &vmax, 1, CS_MPI_REAL, MPI_MAX, g->comm);
3639   }
3640 #endif
3641 
3642   bft_printf(_("       grid level %2d diag. dominance: min = %12.5e\n"
3643                "                                      max = %12.5e\n\n"),
3644              g->level, vmin, vmax);
3645 }
3646 
3647 /*----------------------------------------------------------------------------
3648  * Verification for coarse quantities computed from fine quantities.
3649  *
3650  * parameters:
3651  *   fine_grid   <-- Fine grid structure
3652  *   coarse_grid <-- Coarse grid structure
3653  *   n_clips_min <-- number of clippings to minimum value
3654  *   n_clips_max <-- number of clippings to maximum value
3655  *   interp      <-- 0 for no intepolation, > 0 for interpolation
3656  *----------------------------------------------------------------------------*/
3657 
3658 static void
_verify_coarse_quantities(const cs_grid_t * fine_grid,const cs_grid_t * coarse_grid,cs_gnum_t n_clips_min,cs_gnum_t n_clips_max,int interp)3659 _verify_coarse_quantities(const cs_grid_t  *fine_grid,
3660                           const cs_grid_t  *coarse_grid,
3661                           cs_gnum_t         n_clips_min,
3662                           cs_gnum_t         n_clips_max,
3663                           int               interp)
3664 {
3665   cs_lnum_t ic, jc, ii, jj, c_face, face_id;
3666 
3667   int isym = 2;
3668 
3669   cs_lnum_t f_n_cells = fine_grid->n_rows;
3670   cs_lnum_t f_n_cells_ext = fine_grid->n_cols_ext;
3671   cs_lnum_t f_n_faces = fine_grid->n_faces;
3672 
3673   cs_lnum_t c_n_cells = coarse_grid->n_rows;
3674   cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
3675   cs_lnum_t c_n_faces = coarse_grid->n_faces;
3676 
3677   const cs_real_t *c_xa0 = coarse_grid->_xa0;
3678   const cs_real_t *c_xa = coarse_grid->_xa;
3679 
3680   cs_real_t *w1 = NULL;
3681 
3682   const cs_lnum_t *db_size = fine_grid->db_size;
3683 
3684   const cs_lnum_2_t *f_face_cell = fine_grid->face_cell;
3685   const cs_lnum_2_t *c_face_cell = coarse_grid->face_cell;
3686 
3687   const cs_real_t *f_xa = fine_grid->xa;
3688 
3689   BFT_MALLOC(w1, f_n_cells_ext*db_size[3], cs_real_t);
3690 
3691   if (fine_grid->symmetric == true)
3692     isym = 1;
3693 
3694 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
3695   MPI_Comm comm = fine_grid->comm;
3696   if (comm != MPI_COMM_NULL) {
3697     cs_gnum_t n_clips[2] = {n_clips_min, n_clips_max};
3698     MPI_Allreduce(MPI_IN_PLACE, n_clips, 2, CS_MPI_GNUM, MPI_SUM, comm);
3699     n_clips_min = n_clips[0];
3700     n_clips_max = n_clips[0];
3701   }
3702 #endif
3703 
3704   if (n_clips_min+n_clips_max > 0)
3705     bft_printf("\n     %s:\n"
3706                "       coarse_matrix < xag0 on %10llu faces\n"
3707                "                     > 0    on %10llu faces\n",
3708                __func__,
3709                (unsigned long long)n_clips_min,
3710                (unsigned long long)n_clips_max);
3711 
3712   cs_real_t *w2, *w3, *w4;
3713   double anmin[2] = {HUGE_VAL, HUGE_VAL};
3714   double anmax[2] = {-HUGE_VAL, -HUGE_VAL};
3715 
3716   BFT_MALLOC(w2, f_n_cells_ext*db_size[3], cs_real_t);
3717   BFT_MALLOC(w3, c_n_cells_ext*db_size[3], cs_real_t);
3718   BFT_MALLOC(w4, c_n_cells_ext*db_size[3], cs_real_t);
3719 
3720   /* Evaluate anisotropy of fine and coarse grids */
3721 
3722   for (ii = 0; ii < f_n_cells_ext; ii++) {
3723     w1[ii] = -HUGE_VAL;
3724     w2[ii] = HUGE_VAL;
3725   }
3726 
3727   for (ic = 0; ic < c_n_cells_ext; ic++) {
3728     w3[ic] = -HUGE_VAL;
3729     w4[ic] = HUGE_VAL;
3730   }
3731 
3732   for (face_id = 0; face_id < f_n_faces; face_id++) {
3733     ii = f_face_cell[face_id][0];
3734     jj = f_face_cell[face_id][1];
3735     w1[ii] = CS_MAX(fabs(f_xa[face_id*isym]), w1[ii]);
3736     w2[ii] = CS_MIN(fabs(f_xa[face_id*isym]), w2[ii]);
3737     w1[jj] = CS_MAX(fabs(f_xa[(face_id +1)*isym -1]), w1[jj]);
3738     w2[jj] = CS_MIN(fabs(f_xa[(face_id +1)*isym -1]), w2[jj]);
3739   }
3740 
3741   for (c_face = 0; c_face < c_n_faces; c_face++) {
3742     ic = c_face_cell[c_face][0];
3743     jc = c_face_cell[c_face][1];
3744     w3[ic] = CS_MAX(fabs(c_xa[c_face*isym]), w3[ic]);
3745     w4[ic] = CS_MIN(fabs(c_xa[c_face*isym]), w4[ic]);
3746     w3[jc] = CS_MAX(fabs(c_xa[(c_face +1)*isym -1]), w3[jc]);
3747     w4[jc] = CS_MIN(fabs(c_xa[(c_face +1)*isym -1]), w4[jc]);
3748   }
3749 
3750   for (ii = 0; ii < f_n_cells; ii++)
3751     w1[ii] = w2[ii] / w1[ii];
3752 
3753   for (ic = 0; ic < c_n_cells; ic++)
3754     w3[ic] = w4[ic] / w3[ic];
3755 
3756   anmin[0] = HUGE_VAL; anmin[1] = HUGE_VAL;
3757   anmax[0] = -HUGE_VAL; anmax[1] = -HUGE_VAL;
3758 
3759   for (ii = 0; ii < f_n_cells; ii++) {
3760     if (w1[ii] < anmin[0])
3761       anmin[0] = w1[ii];
3762     else if (w1[ii] > anmax[0])
3763       anmax[0] = w1[ii];
3764   }
3765 
3766   for (ic = 0; ic < c_n_cells; ic++) {
3767     if (w3[ic] < anmin[1])
3768       anmin[1] = w3[ic];
3769     else if (w3[ic] > anmax[1])
3770       anmax[1] = w3[ic];
3771   }
3772 
3773 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
3774   if (comm != MPI_COMM_NULL) {
3775     MPI_Allreduce(MPI_IN_PLACE, anmin, 2, MPI_DOUBLE, MPI_MIN, comm);
3776     MPI_Allreduce(MPI_IN_PLACE, anmax, 2, MPI_DOUBLE, MPI_MAX, comm);
3777   }
3778 #endif
3779 
3780   bft_printf(_("       fine   grid anisotropy: min      = %12.5e\n"
3781                "                               max      = %12.5e\n"
3782                "       coarse grid anisotropy: min      = %12.5e\n"
3783                "                               max      = %12.5e\n"),
3784              anmin[0], anmax[0], anmin[1], anmax[1]);
3785 
3786   BFT_FREE(w2);
3787   BFT_FREE(w4);
3788 
3789   if (interp == 1) {
3790     double rmin = HUGE_VAL, rmax = -HUGE_VAL;
3791     for (c_face = 0; c_face < c_n_faces; c_face++) {
3792       rmin = CS_MIN(rmin, c_xa[c_face*isym] / c_xa0[c_face]);
3793       rmax = CS_MAX(rmax, c_xa[c_face*isym] / c_xa0[c_face]);
3794     }
3795 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
3796     if (comm != MPI_COMM_NULL) {
3797       MPI_Allreduce(MPI_IN_PLACE, &rmin, 1, MPI_DOUBLE, MPI_MIN, comm);
3798       MPI_Allreduce(MPI_IN_PLACE, &rmax, 1, MPI_DOUBLE, MPI_MAX, comm);
3799     }
3800 #endif
3801     bft_printf(_("       minimum xag_p1 / xag_p0          = %12.5e\n"
3802                  "       maximum xag_p1 / xag_p0          = %12.5e\n"),
3803                rmin, rmax);
3804   }
3805 
3806   BFT_FREE(w3);
3807   BFT_FREE(w1);
3808 }
3809 
3810 /*----------------------------------------------------------------------------
3811  * Build coarse grid matrix in the MSR format given the 4 necessary arrays.
3812  *
3813  * parameters:
3814  *   c           <-> coarse grid structure
3815  *   symmetric   <-- symmetry flag
3816  *   c_row_index <-- MSR row index (0 to n-1)
3817  *   c_col_id    <-- MSR column id (0 to n-1)
3818  *   c_d_val     <-- diagonal values
3819  *   c_x_val     <-- extradiagonal values
3820  *----------------------------------------------------------------------------*/
3821 
3822 static void
_build_coarse_matrix_msr(cs_grid_t * c,bool symmetric,cs_lnum_t * c_row_index,cs_lnum_t * c_col_id,cs_real_t * c_d_val,cs_real_t * c_x_val)3823 _build_coarse_matrix_msr(cs_grid_t *c,
3824                          bool       symmetric,
3825                          cs_lnum_t *c_row_index,
3826                          cs_lnum_t *c_col_id,
3827                          cs_real_t *c_d_val,
3828                          cs_real_t *c_x_val)
3829 {
3830   cs_matrix_structure_t  *ms
3831     = cs_matrix_structure_create_msr(CS_MATRIX_MSR,
3832                                      true, /* transfer */
3833                                      true, /* have_diag */
3834                                      c->n_rows,
3835                                      c->n_cols_ext,
3836                                      &c_row_index,
3837                                      &c_col_id,
3838                                      c->halo,
3839                                      NULL);
3840 
3841   c->matrix_struct = ms;
3842 
3843   c->_matrix = cs_matrix_create(c->matrix_struct);
3844   c->matrix = c->_matrix;
3845 
3846   const cs_lnum_t *_c_row_index, *_c_col_id;
3847 
3848   cs_matrix_get_msr_arrays(c->matrix,
3849                            &_c_row_index, &_c_col_id,
3850                            NULL, NULL);
3851 
3852   cs_matrix_transfer_coefficients_msr(c->_matrix,
3853                                       symmetric,
3854                                       NULL,
3855                                       NULL,
3856                                       _c_row_index,
3857                                       _c_col_id,
3858                                       &c_d_val,
3859                                       &c_x_val);
3860 }
3861 
3862 /*----------------------------------------------------------------------------
3863  * Build a coarse level from a finer level using face->cells connectivity
3864  *
3865  * parameters:
3866  *   fine_grid   <-- fine grid structure
3867  *   coarse_grid <-> coarse grid structure
3868  *   verbosity   <-- verbosity level
3869  *----------------------------------------------------------------------------*/
3870 
3871 static void
_compute_coarse_quantities_native(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid,int verbosity)3872 _compute_coarse_quantities_native(const cs_grid_t  *fine_grid,
3873                                   cs_grid_t        *coarse_grid,
3874                                   int               verbosity)
3875 {
3876   cs_lnum_t ic, jc, ii, jj, kk, face_id;
3877 
3878   cs_real_t dsigjg, dsxaij, agij;
3879 
3880   int isym = 2;
3881 
3882   cs_lnum_t f_n_cells = fine_grid->n_rows;
3883   cs_lnum_t f_n_cells_ext = fine_grid->n_cols_ext;
3884   cs_lnum_t f_n_faces = fine_grid->n_faces;
3885 
3886   cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
3887   cs_lnum_t c_n_faces = coarse_grid->n_faces;
3888 
3889   cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
3890   cs_lnum_t *c_coarse_face = coarse_grid->coarse_face;
3891 
3892   cs_gnum_t n_clips_min = 0, n_clips_max = 0;
3893 
3894   cs_real_t *f_xa0ij = fine_grid->xa0ij;
3895 
3896   cs_real_t *c_cell_cen = coarse_grid->_cell_cen;
3897   cs_real_t *c_face_normal = coarse_grid->_face_normal;
3898 
3899   cs_real_t *c_xa0 = coarse_grid->_xa0;
3900   cs_real_t *c_xa0ij = coarse_grid->xa0ij;
3901   cs_real_t *c_da = coarse_grid->_da;
3902   cs_real_t *c_xa = coarse_grid->_xa;
3903 
3904   const cs_lnum_t *db_size = fine_grid->db_size;
3905 
3906   const cs_lnum_2_t *f_face_cell = fine_grid->face_cell;
3907   const cs_lnum_2_t *c_face_cell = coarse_grid->face_cell;
3908 
3909   const cs_real_t *f_face_normal = fine_grid->face_normal;
3910   const cs_real_t *f_xa0 = fine_grid->xa0;
3911   const cs_real_t *f_da = fine_grid->da;
3912   const cs_real_t *f_xa = fine_grid->xa;
3913 
3914   const cs_real_t relax_param = coarse_grid->relaxation;
3915 
3916   if (fine_grid->symmetric == true)
3917     isym = 1;
3918 
3919   /*  Finalize computation of matrix in c_da, c_xa */
3920   /*  relax_param <= 0 : P0 restriction / P0 prolongation => c_xa = c_xa0 */
3921   /*  relax_parm > 0   : P0 restriction / P1 prolongation => c_xa = c_xa0ij/icjc */
3922 
3923   /* Extradiagonal terms */
3924 
3925   cs_lnum_t c_n_vals = c_n_faces*isym;
3926 
3927 # pragma omp parallel for if(c_n_vals*6 > CS_THR_MIN)
3928   for (cs_lnum_t c_val = 0; c_val < c_n_vals; c_val++) {
3929     c_xa[c_val] = 0.;
3930   }
3931 
3932   if (relax_param <= 0) {
3933 
3934     if (isym == 1) {
3935 
3936       for (face_id = 0; face_id < f_n_faces; face_id++) {
3937         cs_lnum_t c_face = c_coarse_face[face_id];
3938         if (c_face > 0) {
3939           c_face = c_face - 1;
3940           c_xa[c_face] += f_xa[face_id];
3941         }
3942         else if (c_face < 0) {
3943           c_face = -c_face - 1;
3944           c_xa[c_face] += f_xa[face_id];
3945         }
3946       }
3947 
3948     }
3949 
3950     else if (isym == 2) {
3951 
3952       for (face_id = 0; face_id < f_n_faces; face_id++) {
3953         cs_lnum_t c_face = c_coarse_face[face_id];
3954         if (c_face > 0) {
3955           c_face = c_face - 1;
3956           c_xa[c_face*2] += f_xa[face_id*2];
3957           c_xa[c_face*2 + 1] += f_xa[face_id*2 + 1];
3958         }
3959         else if (c_face < 0) {
3960           c_face = -c_face - 1;
3961           c_xa[c_face*2] += f_xa[face_id*2];
3962           c_xa[c_face*2 + 1] += f_xa[face_id*2 + 1];
3963         }
3964       }
3965 
3966     }
3967 
3968   }
3969 
3970   else if (relax_param > 0) {
3971 
3972     /* P0 restriction of matrices, "interior" surface: */
3973     /* xag0(nfacg), surfag(3,nfacgl), xagxg0(2,nfacg) */
3974 
3975 #   pragma omp parallel for if(c_n_faces*6 > CS_THR_MIN)
3976     for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
3977       c_xa0[c_face] = 0.;
3978       c_face_normal[3*c_face]    = 0.;
3979       c_face_normal[3*c_face +1] = 0.;
3980       c_face_normal[3*c_face +2] = 0.;
3981       c_xa0ij[3*c_face]    = 0.;
3982       c_xa0ij[3*c_face +1] = 0.;
3983       c_xa0ij[3*c_face +2] = 0.;
3984     }
3985 
3986     for (face_id = 0; face_id < f_n_faces; face_id++) {
3987 
3988       if (c_coarse_face[face_id] > 0 ) {
3989         cs_lnum_t c_face = c_coarse_face[face_id] -1;
3990 
3991         c_xa0[c_face] += f_xa0[face_id];
3992         c_face_normal[3*c_face]    += f_face_normal[3*face_id];
3993         c_face_normal[3*c_face +1] += f_face_normal[3*face_id +1];
3994         c_face_normal[3*c_face +2] += f_face_normal[3*face_id +2];
3995         c_xa0ij[3*c_face]    += f_xa0ij[3*face_id];
3996         c_xa0ij[3*c_face +1] += f_xa0ij[3*face_id +1];
3997         c_xa0ij[3*c_face +2] += f_xa0ij[3*face_id +2];
3998       }
3999       else if (c_coarse_face[face_id] < 0) {
4000         cs_lnum_t c_face = -c_coarse_face[face_id] -1;
4001 
4002         c_xa0[c_face] += f_xa0[face_id];
4003         c_face_normal[3*c_face]    -= f_face_normal[3*face_id];
4004         c_face_normal[3*c_face +1] -= f_face_normal[3*face_id +1];
4005         c_face_normal[3*c_face +2] -= f_face_normal[3*face_id +2];
4006         c_xa0ij[3*c_face]    -= f_xa0ij[3*face_id];
4007         c_xa0ij[3*c_face +1] -= f_xa0ij[3*face_id +1];
4008         c_xa0ij[3*c_face +2] -= f_xa0ij[3*face_id +2];
4009       }
4010 
4011     }
4012 
4013     /* Matrix initialized to c_xa0 */
4014 
4015     if (isym == 1) {
4016 #     pragma omp parallel for if(c_n_faces > CS_THR_MIN)
4017       for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++)
4018         c_xa[c_face] = c_xa0[c_face];
4019     }
4020     else {
4021 #     pragma omp parallel for if(c_n_faces*2 > CS_THR_MIN)
4022       for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4023         c_xa[2*c_face]    = c_xa0[c_face];
4024         c_xa[2*c_face +1] = c_xa0[c_face];
4025       }
4026     }
4027 
4028     for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4029 
4030       ic = c_face_cell[c_face][0];
4031       jc = c_face_cell[c_face][1];
4032 
4033       dsigjg =   (  c_cell_cen[3*jc]
4034                   - c_cell_cen[3*ic])    * c_face_normal[3*c_face]
4035                + (  c_cell_cen[3*jc +1]
4036                   - c_cell_cen[3*ic +1]) * c_face_normal[3*c_face +1]
4037                + (  c_cell_cen[3*jc +2]
4038                   - c_cell_cen[3*ic +2]) * c_face_normal[3*c_face +2];
4039 
4040       dsxaij =   c_xa0ij[3*c_face]    * c_face_normal[3*c_face]
4041                + c_xa0ij[3*c_face +1] * c_face_normal[3*c_face +1]
4042                + c_xa0ij[3*c_face +2] * c_face_normal[3*c_face +2];
4043 
4044       if (fabs(dsigjg) > EPZERO) {
4045 
4046         agij = dsxaij/dsigjg;
4047 
4048         /* Standard */
4049         c_xa[c_face*isym] = agij;
4050         c_xa[(c_face +1)*isym -1] = agij;
4051 
4052         /* Clipped matrix */
4053         if (agij < c_xa0[c_face] || agij > 0.) {
4054           c_xa[c_face*isym] = c_xa0[c_face];
4055           c_xa[(c_face +1)*isym -1] = c_xa0[c_face];
4056           if (agij < c_xa0[c_face]) n_clips_min++;
4057           if (agij > 0.) n_clips_max++;
4058         }
4059 
4060       }
4061     }
4062 
4063     /* Possible P1 matrix / P0 matrix relaxation defined
4064        by the user in cs_user_parameters.c
4065        (using cs_multigrid_set_coarsening_options) */
4066 
4067     if (isym == 1) {
4068       for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++)
4069         c_xa[c_face] = relax_param*c_xa[c_face]
4070                      + (1. - relax_param)*c_xa0[c_face];
4071     }
4072     else {
4073       for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4074         c_xa[2*c_face]    =         relax_param  * c_xa[2*c_face]
4075                             + (1. - relax_param) * c_xa0[c_face];
4076         c_xa[2*c_face +1] =         relax_param  * c_xa[2*c_face +1]
4077                             + (1. - relax_param) * c_xa0[c_face];
4078       }
4079     }
4080 
4081   } /* relax_param > 0 */
4082 
4083   /* Initialize non differential fine grid term saved in w1 */
4084 
4085   cs_real_t *w1 = NULL;
4086   BFT_MALLOC(w1, f_n_cells_ext*db_size[3], cs_real_t);
4087 
4088   if (db_size[0] == 1) {
4089 #   pragma omp parallel for if(f_n_cells > CS_THR_MIN)
4090     for (ii = 0; ii < f_n_cells; ii++)
4091       w1[ii] = f_da[ii];
4092   }
4093   else {
4094 #   pragma omp parallel for private(jj, kk) if(f_n_cells > CS_THR_MIN)
4095     for (ii = 0; ii < f_n_cells; ii++) {
4096       for (jj = 0; jj < db_size[0]; jj++) {
4097         for (kk = 0; kk < db_size[0]; kk++)
4098           w1[ii*db_size[3] + db_size[2]*jj + kk]
4099             = f_da[ii*db_size[3] + db_size[2]*jj + kk];
4100       }
4101     }
4102   }
4103 # pragma omp parallel for if(f_n_cells_ext - f_n_cells > CS_THR_MIN)
4104   for (ii = f_n_cells*db_size[3]; ii < f_n_cells_ext*db_size[3]; ii++)
4105     w1[ii] = 0.;
4106 
4107   for (face_id = 0; face_id < f_n_faces; face_id++) {
4108     ii = f_face_cell[face_id][0];
4109     jj = f_face_cell[face_id][1];
4110     for (kk = 0; kk < db_size[0]; kk++) {
4111       w1[ii*db_size[3] + db_size[2]*kk + kk] += f_xa[face_id*isym];
4112       w1[jj*db_size[3] + db_size[2]*kk + kk] += f_xa[(face_id +1)*isym -1];
4113     }
4114   }
4115 
4116   /* Diagonal term */
4117 
4118 # pragma omp parallel for if(c_n_cells_ext > CS_THR_MIN)
4119   for (ic = 0; ic < c_n_cells_ext*db_size[3]; ic++)
4120     c_da[ic] = 0.;
4121 
4122   if (db_size[0] == 1) {
4123     for (ii = 0; ii < f_n_cells; ii++) {
4124       ic = c_coarse_row[ii];
4125       if (ic > -1)
4126         c_da[ic] += w1[ii];
4127     }
4128   }
4129   else {
4130     for (ii = 0; ii < f_n_cells; ii++) {
4131       ic = c_coarse_row[ii];
4132       if (ic > -1) {
4133         for (jj = 0; jj < db_size[0]; jj++) {
4134           for (kk = 0; kk < db_size[0]; kk++)
4135             c_da[ic*db_size[3] + db_size[2]*jj + kk]
4136               += w1[ii*db_size[3] + db_size[2]*jj + kk];
4137         }
4138       }
4139     }
4140   }
4141 
4142   for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4143     ic = c_face_cell[c_face][0];
4144     jc = c_face_cell[c_face][1];
4145     for (kk = 0; kk < db_size[0]; kk++) {
4146       c_da[ic*db_size[3] + db_size[2]*kk + kk] -= c_xa[c_face*isym];
4147       c_da[jc*db_size[3] + db_size[2]*kk + kk] -= c_xa[(c_face +1)*isym -1];
4148     }
4149   }
4150 
4151   BFT_FREE(w1);
4152 
4153   /* Optional verification */
4154 
4155   if (verbosity > 3) {
4156     int interp = (relax_param > 0) ? 1 : 0;
4157     _verify_coarse_quantities(fine_grid,
4158                               coarse_grid,
4159                               n_clips_min,
4160                               n_clips_max,
4161                               interp);
4162   }
4163 
4164 }
4165 
4166 /*----------------------------------------------------------------------------
4167  * Build a coarse level from a finer level for convection/diffusion case.
4168  *
4169  * parameters:
4170  *   fine_grid   <-- fine grid structure
4171  *   coarse_grid <-> coarse grid structure
4172  *   verbosity   <-- verbosity level
4173  *----------------------------------------------------------------------------*/
4174 
4175 static void
_compute_coarse_quantities_conv_diff(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid,int verbosity)4176 _compute_coarse_quantities_conv_diff(const cs_grid_t  *fine_grid,
4177                                      cs_grid_t        *coarse_grid,
4178                                      int               verbosity)
4179 {
4180   cs_lnum_t ic, jc, ii, jj, kk, c_face, face_id;
4181 
4182   cs_real_t dsigjg, dsxaij, agij;
4183 
4184   cs_lnum_t f_n_cells = fine_grid->n_rows;
4185   cs_lnum_t f_n_cells_ext = fine_grid->n_cols_ext;
4186   cs_lnum_t f_n_faces = fine_grid->n_faces;
4187 
4188   cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
4189   cs_lnum_t c_n_faces = coarse_grid->n_faces;
4190 
4191   cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
4192   cs_lnum_t *c_coarse_face = coarse_grid->coarse_face;
4193 
4194   cs_gnum_t n_clips_min = 0, n_clips_max = 0;
4195 
4196   cs_real_t *f_xa0ij = fine_grid->xa0ij;
4197 
4198   cs_real_t *c_cell_cen = coarse_grid->_cell_cen;
4199   cs_real_t *c_face_normal = coarse_grid->_face_normal;
4200 
4201   cs_real_t *c_xa0 = coarse_grid->_xa0;
4202   cs_real_t *c_xa0_diff = coarse_grid->_xa0_diff;
4203   cs_real_t *c_xa0ij = coarse_grid->xa0ij;
4204   cs_real_t *c_da = coarse_grid->_da;
4205   cs_real_t *c_xa = coarse_grid->_xa;
4206   cs_real_t *c_da_conv = coarse_grid->_da_conv;
4207   cs_real_t *c_xa_conv = coarse_grid->_xa_conv;
4208   cs_real_t *c_da_diff = coarse_grid->_da_diff;
4209   cs_real_t *c_xa_diff = coarse_grid->_xa_diff;
4210 
4211   cs_real_t *w1 = NULL, *w1_conv = NULL, *w1_diff = NULL;
4212 
4213   const cs_lnum_t *db_size = fine_grid->db_size;
4214 
4215   const cs_lnum_2_t *f_face_cell = fine_grid->face_cell;
4216   const cs_lnum_2_t *c_face_cell = coarse_grid->face_cell;
4217 
4218   const cs_real_t *f_face_normal = fine_grid->face_normal;
4219   const cs_real_t *f_xa0 = fine_grid->xa0;
4220   const cs_real_t *f_da = fine_grid->da;
4221   const cs_real_t *f_da_conv = fine_grid->da_conv;
4222   const cs_real_t *f_xa_conv = fine_grid->xa_conv;
4223   const cs_real_t *f_xa0_diff = fine_grid->xa0_diff;
4224   const cs_real_t *f_da_diff = fine_grid->da_diff;
4225   const cs_real_t *f_xa_diff = fine_grid->xa_diff;
4226 
4227   BFT_MALLOC(w1,      2*f_n_cells_ext*db_size[3], cs_real_t);
4228   BFT_MALLOC(w1_conv, 2*f_n_cells_ext*db_size[3], cs_real_t);
4229   BFT_MALLOC(w1_diff, 2*f_n_cells_ext*db_size[3], cs_real_t);
4230 
4231   assert(fine_grid->symmetric == false);
4232 
4233   /* P0 restriction of matrices, "interior" surface: */
4234   /* xag0(nfacg), surfag(3,nfacgl), xagxg0(2,nfacg) */
4235 
4236 # pragma omp parallel for if(c_n_faces*6 > CS_THR_MIN)
4237   for (c_face = 0; c_face < c_n_faces; c_face++) {
4238     c_xa0[2*c_face]         = 0.;
4239     c_xa0[2*c_face +1]      = 0.;
4240     c_xa0_diff[c_face]      = 0.;
4241     c_face_normal[3*c_face]    = 0.;
4242     c_face_normal[3*c_face +1] = 0.;
4243     c_face_normal[3*c_face +2] = 0.;
4244     c_xa0ij[3*c_face]    = 0.;
4245     c_xa0ij[3*c_face +1] = 0.;
4246     c_xa0ij[3*c_face +2] = 0.;
4247     c_xa_conv[2*c_face]    = 0.;
4248     c_xa_conv[2*c_face +1] = 0.;
4249   }
4250 
4251   for (face_id = 0; face_id < f_n_faces; face_id++) {
4252 
4253     if (c_coarse_face[face_id] > 0 ) {
4254       c_face = c_coarse_face[face_id] -1;
4255 
4256       c_xa0[2*c_face]         += f_xa0[2*face_id];
4257       c_xa0[2*c_face +1]      += f_xa0[2*face_id +1];
4258       c_xa_conv[2*c_face]     += f_xa_conv[2*face_id];
4259       c_xa_conv[2*c_face +1]  += f_xa_conv[2*face_id +1];
4260       c_xa0_diff[c_face]      += f_xa0_diff[face_id];
4261       c_face_normal[3*c_face]    += f_face_normal[3*face_id];
4262       c_face_normal[3*c_face +1] += f_face_normal[3*face_id +1];
4263       c_face_normal[3*c_face +2] += f_face_normal[3*face_id +2];
4264       c_xa0ij[3*c_face]    += f_xa0ij[3*face_id];
4265       c_xa0ij[3*c_face +1] += f_xa0ij[3*face_id +1];
4266       c_xa0ij[3*c_face +2] += f_xa0ij[3*face_id +2];
4267     }
4268     else if (c_coarse_face[face_id] < 0) {
4269       c_face = -c_coarse_face[face_id] -1;
4270 
4271       c_xa0[2*c_face]         += f_xa0[2*face_id +1];
4272       c_xa0[2*c_face +1]      += f_xa0[2*face_id];
4273       c_xa_conv[2*c_face]     += f_xa_conv[2*face_id +1];
4274       c_xa_conv[2*c_face +1]  += f_xa_conv[2*face_id];
4275       c_xa0_diff[c_face]      += f_xa0_diff[face_id];
4276       c_face_normal[3*c_face]    -= f_face_normal[3*face_id];
4277       c_face_normal[3*c_face +1] -= f_face_normal[3*face_id +1];
4278       c_face_normal[3*c_face +2] -= f_face_normal[3*face_id +2];
4279       c_xa0ij[3*c_face]    -= f_xa0ij[3*face_id];
4280       c_xa0ij[3*c_face +1] -= f_xa0ij[3*face_id +1];
4281       c_xa0ij[3*c_face +2] -= f_xa0ij[3*face_id +2];
4282     }
4283 
4284   }
4285 
4286   /*  Finalize computation of matrix in c_da, c_xa */
4287   /*  interp = 0 : P0 restriction / P0 prolongation => c_xa = c_xa0 */
4288   /*  interp = 1 : P0 restriction / P1 prolongation => c_xa = c_xa0ij/icjc */
4289 
4290   /*  Initialization */
4291 
4292   const cs_real_t relax_param = coarse_grid->relaxation;
4293 
4294   int interp = 1;
4295 
4296   /* Initialize non differential fine grid term saved in w1 */
4297 
4298   if (db_size[0] == 1) {
4299 #   pragma omp parallel for if(f_n_cells > CS_THR_MIN)
4300     for (ii = 0; ii < f_n_cells; ii++) {
4301       w1_conv[ii] = f_da_conv[ii];
4302       w1_diff[ii] = f_da_diff[ii];
4303       w1[ii]      = f_da[ii] - f_da_conv[ii] - f_da_diff[ii];
4304     }
4305   }
4306   else {
4307 #   pragma omp parallel for private(jj, kk) if(f_n_cells > CS_THR_MIN)
4308     for (ii = 0; ii < f_n_cells; ii++) {
4309       for (jj = 0; jj < db_size[0]; jj++) {
4310         for (kk = 0; kk < db_size[0]; kk++) {
4311           w1_conv[ii*db_size[3] + db_size[2]*jj + kk]
4312             = f_da_conv[ii*db_size[3] + db_size[2]*jj + kk];
4313           w1_diff[ii*db_size[3] + db_size[2]*jj + kk]
4314             = f_da_diff[ii*db_size[3] + db_size[2]*jj + kk];
4315           w1[ii*db_size[3] + db_size[2]*jj + kk]
4316             = f_da[ii*db_size[3] + db_size[2]*jj + kk]
4317               - f_da_conv[ii*db_size[3] + db_size[2]*jj + kk]
4318               - f_da_diff[ii*db_size[3] + db_size[2]*jj + kk];
4319         }
4320       }
4321     }
4322   }
4323 
4324 # pragma omp parallel for if(f_n_cells_ext - f_n_cells > CS_THR_MIN)
4325   for (ii = f_n_cells*db_size[3]; ii < f_n_cells_ext*db_size[3]; ii++)
4326     w1[ii] = 0.;
4327 
4328   if (db_size[0] == 1) {
4329     for (face_id = 0; face_id < f_n_faces; face_id++) {
4330       ii = f_face_cell[face_id][0];
4331       jj = f_face_cell[face_id][1];
4332       w1_conv[ii] += f_xa_conv[2*face_id];
4333       w1_conv[jj] += f_xa_conv[2*face_id +1];
4334       w1_diff[ii] += f_xa_diff[face_id];
4335       w1_diff[jj] += f_xa_diff[face_id];
4336     }
4337   }
4338   else {
4339     for (face_id = 0; face_id < f_n_faces; face_id++) {
4340       ii = f_face_cell[face_id][0];
4341       jj = f_face_cell[face_id][1];
4342       for (kk = 0; kk < db_size[0]; kk++) {
4343         w1_conv[ii*db_size[3] + db_size[2]*kk + kk]
4344           += f_xa_conv[2*face_id];
4345         w1_conv[jj*db_size[3] + db_size[2]*kk + kk]
4346           += f_xa_conv[2*face_id +1];
4347         w1_diff[ii*db_size[3] + db_size[2]*kk + kk]
4348           += f_xa_diff[face_id];
4349         w1_diff[jj*db_size[3] + db_size[2]*kk + kk]
4350           += f_xa_diff[face_id];
4351       }
4352     }
4353   }
4354 
4355   /* Initialize coarse matrix storage on (c_da, c_xa) */
4356 
4357 # pragma omp parallel for if(c_n_cells_ext > CS_THR_MIN)
4358   for (ic = 0; ic < c_n_cells_ext*db_size[3]; ic++) {
4359     c_da[ic]      = 0.;
4360     c_da_conv[ic] = 0.;
4361     c_da_diff[ic] = 0.;
4362   }
4363 
4364   /* Extradiagonal terms */
4365   /* (symmetric matrices for now, even with non symmetric storage isym = 2) */
4366 
4367   /* Matrix initialized to c_xa0 (interp=0) */
4368 
4369 # pragma omp parallel for if(c_n_faces*2 > CS_THR_MIN)
4370   for (c_face = 0; c_face < c_n_faces; c_face++) {
4371     c_xa[2*c_face]         = c_xa0[2*c_face];
4372     c_xa[2*c_face +1]      = c_xa0[2*c_face +1];
4373     c_xa_diff[c_face]      = c_xa0_diff[c_face];
4374   }
4375 
4376   if (interp == 1) {
4377    for (c_face = 0; c_face < c_n_faces; c_face++) {
4378 
4379       ic = c_face_cell[c_face][0];
4380       jc = c_face_cell[c_face][1];
4381 
4382       dsigjg =   (  c_cell_cen[3*jc]
4383                   - c_cell_cen[3*ic])    * c_face_normal[3*c_face]
4384                + (  c_cell_cen[3*jc +1]
4385                   - c_cell_cen[3*ic +1]) * c_face_normal[3*c_face +1]
4386                + (  c_cell_cen[3*jc +2]
4387                   - c_cell_cen[3*ic +2]) * c_face_normal[3*c_face +2];
4388 
4389       dsxaij =   c_xa0ij[3*c_face]    * c_face_normal[3*c_face]
4390                + c_xa0ij[3*c_face +1] * c_face_normal[3*c_face +1]
4391                + c_xa0ij[3*c_face +2] * c_face_normal[3*c_face +2];
4392 
4393       if (fabs(dsigjg) > EPZERO) {
4394 
4395         agij = dsxaij/dsigjg;
4396 
4397         /* Standard */
4398         c_xa_diff[c_face] = agij;
4399 
4400         /* Clipped matrix */
4401         if (agij < c_xa0_diff[c_face] || agij > 0.) {
4402           c_xa_diff[c_face] = c_xa0_diff[c_face];
4403           if (agij < c_xa0_diff[c_face]) n_clips_min++;
4404           if (agij > 0.) n_clips_max++;
4405         }
4406 
4407       }
4408     }
4409 
4410     /* Possible P1 matrix / P0 matrix relaxation defined
4411        by the user in cs_user_parameters.c
4412        (using cs_multigrid_set_coarsening_options) */
4413     for (c_face = 0; c_face < c_n_faces; c_face++) {
4414       c_xa_diff[c_face] =         relax_param  * c_xa_diff[c_face]
4415                           + (1. - relax_param) * c_xa0_diff[c_face];
4416     }
4417 
4418   } /* endif interp == 1 */
4419 
4420   if (interp != 0 && interp != 1)
4421     bft_error(__FILE__, __LINE__, 0, "interp incorrectly defined.");
4422 
4423   /* Diagonal term */
4424 
4425   if (db_size[0] == 1) {
4426     for (ii = 0; ii < f_n_cells; ii++) {
4427       ic = c_coarse_row[ii];
4428       c_da_conv[ic] += w1_conv[ii];
4429       c_da_diff[ic] += w1_diff[ii];
4430     }
4431   }
4432   else {
4433     for (ii = 0; ii < f_n_cells; ii++) {
4434       ic = c_coarse_row[ii];
4435       for (jj = 0; jj < db_size[0]; jj++) {
4436         for (kk = 0; kk < db_size[0]; kk++) {
4437           c_da_conv[ic*db_size[3] + db_size[2]*jj + kk]
4438             += w1_conv[ii*db_size[3] + db_size[2]*jj + kk];
4439           c_da_diff[ic*db_size[3] + db_size[2]*jj + kk]
4440             += w1_diff[ii*db_size[3] + db_size[2]*jj + kk];
4441         }
4442       }
4443     }
4444   }
4445 
4446   for (c_face = 0; c_face < c_n_faces; c_face++) {
4447     ic = c_face_cell[c_face][0];
4448     jc = c_face_cell[c_face][1];
4449     c_da_conv[ic] -= c_xa_conv[2*c_face];
4450     c_da_conv[jc] -= c_xa_conv[2*c_face +1];
4451     c_da_diff[ic] -= c_xa_diff[c_face];
4452     c_da_diff[jc] -= c_xa_diff[c_face];
4453   }
4454 
4455   /* Convection/diffusion matrix */
4456 
4457   for (c_face = 0; c_face < c_n_faces; c_face++) {
4458     c_xa[2*c_face]    = c_xa_conv[2*c_face]    + c_xa_diff[c_face];
4459     c_xa[2*c_face +1] = c_xa_conv[2*c_face +1] + c_xa_diff[c_face];
4460   }
4461 
4462   if (db_size[0] == 1) {
4463     for (ii = 0; ii < f_n_cells; ii++) {
4464       ic = c_coarse_row[ii];
4465       c_da[ic] += w1[ii];
4466     }
4467   }
4468   else {
4469     for (ii = 0; ii < f_n_cells; ii++) {
4470       ic = c_coarse_row[ii];
4471       for (jj = 0; jj < db_size[0]; jj++) {
4472         for (kk = 0; kk < db_size[0]; kk++)
4473           c_da[ic*db_size[3] + db_size[2]*jj + kk]
4474             += w1[ii*db_size[3] + db_size[2]*jj + kk];
4475       }
4476     }
4477   }
4478 
4479   for (ic = 0; ic < c_n_cells_ext; ic++)
4480     c_da[ic] += c_da_conv[ic] + c_da_diff[ic];
4481 
4482   BFT_FREE(w1);
4483   BFT_FREE(w1_conv);
4484   BFT_FREE(w1_diff);
4485 
4486   /* Optional verification */
4487 
4488   if (verbosity > 3)
4489     _verify_coarse_quantities(fine_grid,
4490                               coarse_grid,
4491                               n_clips_min,
4492                               n_clips_max,
4493                               interp);
4494 
4495 }
4496 
4497 /*----------------------------------------------------------------------------
4498  * Build a coarse level from a finer level with an MSR matrix.
4499  *
4500  * parameters:
4501  *   fine_grid   <-- Fine grid structure
4502  *   coarse_grid <-> Coarse grid structure
4503  *----------------------------------------------------------------------------*/
4504 
4505 static void
_compute_coarse_quantities_msr(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid)4506 _compute_coarse_quantities_msr(const cs_grid_t  *fine_grid,
4507                                cs_grid_t        *coarse_grid)
4508 
4509 {
4510   const cs_lnum_t *db_size = fine_grid->db_size;
4511 
4512   const cs_lnum_t f_n_rows = fine_grid->n_rows;
4513 
4514   const cs_lnum_t c_n_rows = coarse_grid->n_rows;
4515   const cs_lnum_t c_n_cols = coarse_grid->n_cols_ext;
4516   const cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
4517 
4518   /* Fine matrix in the MSR format */
4519 
4520   const cs_lnum_t  *f_row_index, *f_col_id;
4521   const cs_real_t  *f_d_val, *f_x_val;
4522 
4523   cs_matrix_get_msr_arrays(fine_grid->matrix,
4524                            &f_row_index,
4525                            &f_col_id,
4526                            &f_d_val,
4527                            &f_x_val);
4528 
4529   /* Coarse matrix elements in the MSR format */
4530 
4531   cs_lnum_t *restrict c_row_index,  *restrict c_col_id;
4532   cs_real_t *restrict c_d_val, *restrict c_x_val;
4533 
4534   /* Diagonal elements
4535      ----------------- */
4536 
4537   BFT_MALLOC(c_d_val, c_n_rows*db_size[3], cs_real_t);
4538 
4539   for (cs_lnum_t i = 0; i < c_n_rows*db_size[3]; i++)
4540     c_d_val[i] = 0.0;
4541 
4542   /* Careful here, we exclude penalized rows
4543      from the aggregation process */
4544 
4545   if (db_size[0] == 1) {
4546     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4547       cs_lnum_t ic = c_coarse_row[ii];
4548       if (ic > -1 && ic < c_n_rows)
4549         c_d_val[ic] += f_d_val[ii];
4550     }
4551   }
4552   else {
4553     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4554       cs_lnum_t ic = c_coarse_row[ii];
4555       if (ic > -1 && ic < c_n_rows) {
4556         for (cs_lnum_t jj = 0; jj < db_size[0]; jj++) {
4557           for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4558             c_d_val[ic*db_size[3] + db_size[2]*jj + kk]
4559               += f_d_val[ii*db_size[3] + db_size[2]*jj + kk];
4560         }
4561       }
4562     }
4563   }
4564 
4565   /* Extradiagonal elements
4566      ---------------------- */
4567 
4568   BFT_MALLOC(c_row_index, c_n_rows+1, cs_lnum_t);
4569 
4570   /* Prepare to traverse fine rows by increasing associated coarse row */
4571 
4572   cs_lnum_t  f_n_active_rows = 0;
4573   cs_lnum_t *f_row_id = NULL;
4574   {
4575     cs_lnum_t *cf_row_idx;
4576     BFT_MALLOC(cf_row_idx, c_n_rows+1, cs_lnum_t);
4577 
4578     for (cs_lnum_t i = 0; i <= c_n_rows; i++)
4579       cf_row_idx[i] = 0;
4580 
4581     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4582       cs_lnum_t i = c_coarse_row[ii];
4583       if (i > -1 && i < c_n_rows)
4584         cf_row_idx[i+1] += 1;
4585     }
4586 
4587     for (cs_lnum_t i = 0; i < c_n_rows; i++)
4588       cf_row_idx[i+1] += cf_row_idx[i];
4589 
4590     f_n_active_rows = cf_row_idx[c_n_rows];
4591 
4592     BFT_MALLOC(f_row_id, f_n_active_rows, cs_lnum_t);
4593     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4594       cs_lnum_t i = c_coarse_row[ii];
4595       if (i > -1 && i < c_n_rows) {
4596         f_row_id[cf_row_idx[i]] = ii;
4597         cf_row_idx[i] += 1;
4598       }
4599     }
4600 
4601     BFT_FREE(cf_row_idx);
4602   }
4603 
4604   /* Counting pass */
4605 
4606   {
4607     cs_lnum_t *last_row;
4608     BFT_MALLOC(last_row, c_n_cols, cs_lnum_t);
4609 
4610     for (cs_lnum_t i = 0; i <= c_n_rows; i++)
4611       c_row_index[i] = 0;
4612 
4613     for (cs_lnum_t i = 0; i < c_n_cols; i++)
4614       last_row[i] = -1;
4615 
4616     for (cs_lnum_t ii_id = 0; ii_id < f_n_active_rows; ii_id++) {
4617 
4618       cs_lnum_t ii = f_row_id[ii_id];
4619 
4620       cs_lnum_t s_id = f_row_index[ii];
4621       cs_lnum_t e_id = f_row_index[ii+1];
4622 
4623       for (cs_lnum_t jj_ind = s_id; jj_ind < e_id; jj_ind++) {
4624 
4625         cs_lnum_t jj = f_col_id[jj_ind];
4626 
4627         cs_lnum_t i = c_coarse_row[ii];
4628         cs_lnum_t j = c_coarse_row[jj];
4629 
4630         assert(i > -1 && i < c_n_rows);
4631 
4632         if (j > -1 && i != j && last_row[j] < i) {
4633           last_row[j] = i;
4634           c_row_index[i+1]++;
4635         }
4636 
4637       }
4638 
4639     }
4640 
4641     BFT_FREE(last_row);
4642   }
4643 
4644   /* Transform count to index */
4645 
4646   for (cs_lnum_t i = 0; i < c_n_rows; i++)
4647     c_row_index[i+1] += c_row_index[i];
4648 
4649   cs_lnum_t c_size = c_row_index[c_n_rows];
4650 
4651   BFT_MALLOC(c_x_val, c_size, cs_real_t);
4652   BFT_MALLOC(c_col_id, c_size, cs_lnum_t);
4653 
4654   /* Assignment pass */
4655 
4656   {
4657     for (cs_lnum_t i = 0; i < c_size; i++)
4658       c_col_id[i] = -1;
4659 
4660     cs_lnum_t *r_col_idx; /* col_idx for a given id in current row */
4661     BFT_MALLOC(r_col_idx, c_n_cols, cs_lnum_t);
4662 
4663     for (cs_lnum_t i = 0; i < c_n_cols; i++)
4664       r_col_idx[i] = -1;
4665 
4666     cs_lnum_t i_prev = -1;
4667     cs_lnum_t r_count = 0;
4668 
4669     for (cs_lnum_t ii_id = 0; ii_id < f_n_active_rows; ii_id++) {
4670 
4671       cs_lnum_t ii = f_row_id[ii_id];
4672 
4673       cs_lnum_t i = c_coarse_row[ii];
4674 
4675       if (i_prev != i) {
4676         r_count = 0;
4677         i_prev = i;
4678       }
4679 
4680       assert(i > -1 && i < c_n_rows);
4681 
4682       for (cs_lnum_t jj_ind = f_row_index[ii];
4683            jj_ind < f_row_index[ii+1];
4684            jj_ind++) {
4685 
4686         cs_lnum_t jj = f_col_id[jj_ind];
4687 
4688         cs_lnum_t j = c_coarse_row[jj];
4689         if (j > -1 && i != j) {
4690           if (r_col_idx[j] < c_row_index[i]) {
4691               r_col_idx[j] = c_row_index[i] + r_count;
4692               c_col_id[r_col_idx[j]] = j;
4693               r_count++;
4694           }
4695         }
4696       }
4697     }
4698 
4699     BFT_FREE(r_col_idx);
4700   }
4701 
4702   BFT_FREE(f_row_id);
4703 
4704   /* Order column ids in case some algorithms expect it */
4705 
4706   cs_sort_indexed(c_n_rows, c_row_index, c_col_id);
4707 
4708   /* Values assignment pass */
4709 
4710   {
4711     for (cs_lnum_t i = 0; i < c_size; i++)
4712       c_x_val[i] = 0;
4713 
4714     for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4715 
4716       cs_lnum_t i = c_coarse_row[ii];
4717 
4718       if (i > -1 && i < c_n_rows) {
4719 
4720         for (cs_lnum_t jj_ind = f_row_index[ii];
4721              jj_ind < f_row_index[ii+1];
4722              jj_ind++) {
4723 
4724           cs_lnum_t jj = f_col_id[jj_ind];
4725 
4726           cs_lnum_t j = c_coarse_row[jj];
4727 
4728           if (j > -1) {
4729 
4730             if (i != j) {
4731               cs_lnum_t s_id = c_row_index[i];
4732               cs_lnum_t n_cols = c_row_index[i+1] - s_id;
4733               /* ids are sorted, so binary search possible */
4734               cs_lnum_t k = _l_id_binary_search(n_cols, j, c_col_id + s_id);
4735               c_x_val[k + s_id] += f_x_val[jj_ind];
4736             }
4737             else { /* i == j */
4738               for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4739                 /* diagonal terms only */
4740                 c_d_val[i*db_size[3] + db_size[2]*kk + kk]
4741                   += f_x_val[jj_ind];
4742               }
4743             }
4744 
4745           }
4746         }
4747 
4748       }
4749 
4750     }
4751 
4752   }
4753 
4754   _build_coarse_matrix_msr(coarse_grid, fine_grid->symmetric,
4755                            c_row_index, c_col_id,
4756                            c_d_val, c_x_val);
4757 }
4758 
4759 /*----------------------------------------------------------------------------
4760  * Build edge-based matrix values from MSR matrix.
4761  *
4762  * parameters:
4763  *   grid <-> grid structure
4764  *----------------------------------------------------------------------------*/
4765 
4766 static void
_native_from_msr(cs_grid_t * g)4767 _native_from_msr(cs_grid_t  *g)
4768 
4769 {
4770   const cs_lnum_t db_stride = g->db_size[3];
4771   const cs_lnum_t eb_stride = g->eb_size[3];
4772 
4773   const cs_lnum_t n_rows = g->n_rows;
4774   const cs_lnum_t n_cols_ext = g->n_cols_ext;
4775 
4776   /* Matrix in the MSR format */
4777 
4778   const cs_lnum_t  *row_index, *col_id;
4779   const cs_real_t  *d_val, *x_val;
4780 
4781   cs_matrix_get_msr_arrays(g->matrix,
4782                            &row_index,
4783                            &col_id,
4784                            &d_val,
4785                            &x_val);
4786 
4787   {
4788     BFT_REALLOC(g->_da, db_stride*n_cols_ext, cs_real_t);
4789     g->da = g->_da;
4790 
4791     for (cs_lnum_t i = 0; i < n_rows; i++) {
4792       for (cs_lnum_t l = 0; l < eb_stride; l++)
4793         g->_da[i*db_stride + l] = d_val[i*db_stride + l];
4794     }
4795   }
4796 
4797   if (g->symmetric) {
4798     BFT_REALLOC(g->_face_cell, row_index[n_rows], cs_lnum_2_t);
4799     BFT_REALLOC(g->_xa, eb_stride*row_index[n_rows], cs_real_t);
4800 
4801     cs_lnum_t n_edges = 0;
4802 
4803     for (cs_lnum_t i = 0; i < n_rows; i++) {
4804       cs_lnum_t s_id = row_index[i];
4805       cs_lnum_t e_id = row_index[i+1];
4806       for (cs_lnum_t j = s_id; j < e_id; j++) {
4807         cs_lnum_t k = col_id[j];
4808         if (k <= i)
4809           continue;
4810         g->_face_cell[n_edges][0] = i;
4811         g->_face_cell[n_edges][1] = k;
4812         for (cs_lnum_t l = 0; l < eb_stride; l++)
4813           g->_xa[n_edges*eb_stride + l] = x_val[j*eb_stride + l];
4814         n_edges += 1;
4815       }
4816     }
4817 
4818     g->n_faces = n_edges;
4819     BFT_REALLOC(g->_face_cell, n_edges, cs_lnum_2_t);
4820     BFT_REALLOC(g->_xa, eb_stride*n_edges, cs_real_t);
4821     g->face_cell = (const cs_lnum_2_t *)(g->_face_cell);
4822     g->xa = g->_xa;
4823   }
4824   else
4825     bft_error(__FILE__, __LINE__, 0,
4826               "%s: currently only implemented for symmetric cases.",
4827               __func__);
4828 
4829   /* Synchronize matrix diagonal values */
4830 
4831   if (g->halo != NULL)
4832     cs_halo_sync_var_strided(g->halo, CS_HALO_STANDARD, g->_da, g->db_size[3]);
4833 
4834   cs_matrix_destroy(&(g->_matrix));
4835   g->matrix = NULL;
4836   cs_matrix_structure_destroy(&(g->matrix_struct));
4837 }
4838 
4839 /*----------------------------------------------------------------------------
4840  * Build MSR matrix from edge-based matrix values.
4841  *
4842  * parameters:
4843  *   grid <-> grid structure
4844  *----------------------------------------------------------------------------*/
4845 
4846 static void
_msr_from_native(cs_grid_t * g)4847 _msr_from_native(cs_grid_t  *g)
4848 
4849 {
4850   g->matrix_struct = cs_matrix_structure_create(CS_MATRIX_MSR,
4851                                                 true,
4852                                                 g->n_rows,
4853                                                 g->n_cols_ext,
4854                                                 g->n_faces,
4855                                                 g->face_cell,
4856                                                 g->halo,
4857                                                 NULL);
4858 
4859   g->_matrix = cs_matrix_create(g->matrix_struct);
4860 
4861   cs_matrix_set_coefficients(g->_matrix,
4862                              g->symmetric,
4863                              g->db_size,
4864                              g->eb_size,
4865                              g->n_faces,
4866                              g->face_cell,
4867                              g->da,
4868                              g->xa);
4869 }
4870 
4871 /*----------------------------------------------------------------------------
4872  * Project coarse grid row numbers to parent grid.
4873  *
4874  * parameters:
4875  *   c  <-- coarse grid structure
4876  *----------------------------------------------------------------------------*/
4877 
4878 static void
_project_coarse_row_to_parent(cs_grid_t * c)4879 _project_coarse_row_to_parent(cs_grid_t  *c)
4880 {
4881   assert(c != NULL);
4882 
4883   const cs_grid_t  *f = c->parent;
4884 
4885   assert(f != NULL);
4886 
4887   if (f->parent != NULL) { /* level > 0*/
4888 
4889     cs_lnum_t *c_coarse_row = c->coarse_row;
4890     cs_lnum_t *f_coarse_row = f->coarse_row;
4891 
4892     cs_lnum_t f_n_cols = f->parent->n_cols_ext;
4893 
4894     for (cs_lnum_t ii = 0; ii < f_n_cols; ii++) {
4895       cs_lnum_t ic = f_coarse_row[ii];
4896       if (ic >= 0)
4897         f_coarse_row[ii] = c_coarse_row[ic];
4898     }
4899 
4900   }
4901 }
4902 
4903 /*----------------------------------------------------------------------------
4904  * Build locally empty matrix.
4905  *
4906  * parameters:
4907  *   c           <-> coarse grid structure
4908  *   matrix_type <-- matrix type
4909  *----------------------------------------------------------------------------*/
4910 
4911 static void
_build_coarse_matrix_null(cs_grid_t * c,cs_matrix_type_t matrix_type)4912 _build_coarse_matrix_null(cs_grid_t         *c,
4913                           cs_matrix_type_t   matrix_type)
4914 {
4915   cs_matrix_structure_t  *ms
4916     = cs_matrix_structure_create(matrix_type,
4917                                  true,
4918                                  0,
4919                                  0,
4920                                  0,
4921                                  NULL,
4922                                  NULL,
4923                                  NULL);
4924 
4925   c->matrix_struct = ms;
4926 
4927   c->_matrix = cs_matrix_create(c->matrix_struct);
4928   c->matrix = c->_matrix;
4929 
4930   cs_matrix_set_coefficients(c->_matrix,
4931                              c->symmetric,
4932                              c->db_size,
4933                              c->eb_size,
4934                              0,
4935                              NULL,
4936                              NULL,
4937                              NULL);
4938 }
4939 
4940 /*----------------------------------------------------------------------------
4941  * Compute fine row integer values from coarse row values
4942  *
4943  * parameters:
4944  *   c       <-- Fine grid structure
4945  *   f       <-- Fine grid structure
4946  *   c_num   --> Variable (rank) defined on coarse grid rows
4947  *   f_num   <-- Variable (rank) defined on fine grid rows
4948  *----------------------------------------------------------------------------*/
4949 
4950 static void
_prolong_row_int(const cs_grid_t * c,const cs_grid_t * f,int * c_num,int * f_num)4951 _prolong_row_int(const cs_grid_t  *c,
4952                  const cs_grid_t  *f,
4953                  int              *c_num,
4954                  int              *f_num)
4955 {
4956   const cs_lnum_t *coarse_row;
4957   const int *_c_num = c_num;
4958 
4959   cs_lnum_t f_n_rows = f->n_rows;
4960 
4961   assert(f != NULL);
4962   assert(c != NULL);
4963   assert(c->coarse_row != NULL || f_n_rows == 0);
4964   assert(f_num != NULL);
4965   assert(c_num != NULL);
4966 
4967 #if defined(HAVE_MPI)
4968   _scatter_row_int(c, c_num);
4969 #endif
4970 
4971   /* Set fine values (possible penalization at first level) */
4972 
4973   coarse_row = c->coarse_row;
4974 
4975 # pragma omp parallel for if(f_n_rows > CS_THR_MIN)
4976   for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4977     cs_lnum_t i = coarse_row[ii];
4978     if (i >= 0)
4979       f_num[ii] = _c_num[i];
4980     else
4981       f_num[ii] = -1;
4982   }
4983 }
4984 
4985 /*============================================================================
4986  * Semi-private function definitions
4987  *
4988  * The following functions are intended to be used by the multigrid layer
4989  * (cs_multigrid.c), not directly by the user, so they are no more
4990  * documented than private static functions)
4991  *============================================================================*/
4992 
4993 /*----------------------------------------------------------------------------
4994  * Create base grid by mapping from shared mesh values.
4995  *
4996  * Note that as arrays given as arguments are shared by the created grid
4997  * (which can only access them, not modify them), the grid should be
4998  * destroyed before those arrays.
4999  *
5000  * parameters:
5001  *   n_faces        <-- Local number of faces
5002  *   db_size        <-- Block sizes for diagonal, or NULL
5003  *   eb_size        <-- Block sizes for diagonal, or NULL
5004  *   face_cell      <-- Face -> cells connectivity
5005  *   cell_cen       <-- Cell center (size: 3.n_cells_ext)
5006  *   cell_vol       <-- Cell volume (size: n_cells_ext)
5007  *   face_normal    <-- Internal face normals (size: 3.n_faces)
5008  *   a              <-- Associated matrix
5009  *   a_conv         <-- Associated matrix (convection)
5010  *   a_diff         <-- Associated matrix (diffusion)
5011  *
5012  * returns:
5013  *   base grid structure
5014  *----------------------------------------------------------------------------*/
5015 
5016 cs_grid_t *
cs_grid_create_from_shared(cs_lnum_t n_faces,const cs_lnum_t * db_size,const cs_lnum_t * eb_size,const cs_lnum_2_t * face_cell,const cs_real_t * cell_cen,const cs_real_t * cell_vol,const cs_real_t * face_normal,const cs_matrix_t * a,const cs_matrix_t * a_conv,const cs_matrix_t * a_diff)5017 cs_grid_create_from_shared(cs_lnum_t              n_faces,
5018                            const cs_lnum_t       *db_size,
5019                            const cs_lnum_t       *eb_size,
5020                            const cs_lnum_2_t     *face_cell,
5021                            const cs_real_t       *cell_cen,
5022                            const cs_real_t       *cell_vol,
5023                            const cs_real_t       *face_normal,
5024                            const cs_matrix_t     *a,
5025                            const cs_matrix_t     *a_conv,
5026                            const cs_matrix_t     *a_diff)
5027 {
5028   cs_lnum_t ii;
5029 
5030   cs_grid_t *g = NULL;
5031 
5032   /* Create empty structure and map base data */
5033 
5034   g = _create_grid();
5035 
5036   g->level = 0;
5037   g->conv_diff = false;
5038   g->symmetric = cs_matrix_is_symmetric(a);
5039 
5040   if (db_size != NULL) {
5041     for (ii = 0; ii < 4; ii++)
5042       g->db_size[ii] = db_size[ii];
5043   }
5044   else {
5045     for (ii = 0; ii < 4; ii++)
5046       g->db_size[ii] = 1;
5047   }
5048 
5049   if (eb_size != NULL) {
5050     for (ii = 0; ii < 4; ii++)
5051       g->eb_size[ii] = eb_size[ii];
5052   }
5053   else {
5054     for (ii = 0; ii < 4; ii++)
5055       g->eb_size[ii] = 1;
5056   }
5057 
5058   g->n_rows = cs_matrix_get_n_rows(a);
5059   g->n_cols_ext = cs_matrix_get_n_columns(a);
5060   g->n_faces = n_faces;
5061   g->n_g_rows = g->n_rows;
5062 
5063 #if defined(HAVE_MPI)
5064   if (cs_glob_n_ranks > 1) {
5065     cs_gnum_t _n_rows = g->n_rows;
5066     MPI_Allreduce(&_n_rows, &(g->n_g_rows), 1, CS_MPI_GNUM, MPI_SUM,
5067                   cs_glob_mpi_comm);
5068   }
5069 #endif
5070 
5071   if (cs_matrix_is_mapped_from_native(a))
5072     g->face_cell = face_cell;
5073 
5074   g->relaxation = 0;
5075 
5076   g->cell_cen = cell_cen;
5077   g->cell_vol = cell_vol;
5078   g->face_normal = face_normal;
5079 
5080   g->halo = cs_matrix_get_halo(a);
5081 
5082   /* Set shared matrix coefficients */
5083 
5084   if (cs_matrix_is_mapped_from_native(a)) {
5085     g->da = cs_matrix_get_diagonal(a);
5086     g->xa= cs_matrix_get_extra_diagonal(a);
5087   }
5088 
5089   if (a_conv != NULL || a_diff != NULL) {
5090     g->conv_diff = true;
5091     g->da_conv = cs_matrix_get_diagonal(a_conv);
5092     g->da_diff = cs_matrix_get_diagonal(a_diff);
5093     g->xa_conv = cs_matrix_get_extra_diagonal(a_conv);
5094     g->xa_diff = cs_matrix_get_extra_diagonal(a_diff);
5095   }
5096 
5097   if (g->face_cell != NULL) {
5098 
5099     /* Build symmetrized extra-diagonal terms if necessary,
5100        or point to existing terms if already symmetric */
5101 
5102     if (g->symmetric == true) {
5103       g->xa0 = g->xa;
5104       g->_xa0 = NULL;
5105     }
5106     else if (g->conv_diff) {
5107       g->xa0  = g->xa;
5108       g->_xa0 = NULL;
5109       g->xa0_diff = g->xa_diff;
5110       g->_xa0_diff = NULL;
5111     }
5112     else {
5113       BFT_MALLOC(g->_xa0, n_faces, cs_real_t);
5114       for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++)
5115         g->_xa0[face_id] = 0.5 * (g->xa[face_id*2] + g->xa[face_id*2+1]);
5116       g->xa0 = g->_xa0;
5117     }
5118 
5119     /* Compute multigrid-specific terms */
5120 
5121     BFT_MALLOC(g->xa0ij, n_faces*3, cs_real_t);
5122 
5123     const cs_real_t *restrict g_xa0 = g->xa0;
5124     if (g->conv_diff)
5125       g_xa0 = g->xa0_diff;
5126 
5127 #   pragma omp parallel for  if(n_faces > CS_THR_MIN)
5128     for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++) {
5129       cs_lnum_t i0 = face_cell[face_id][0];
5130       cs_lnum_t i1 = face_cell[face_id][1];
5131       for (cs_lnum_t kk = 0; kk < 3; kk++) {
5132         g->xa0ij[face_id*3 + kk] =   g_xa0[face_id]
5133                                    * (  cell_cen[i1*3 + kk]
5134                                       - cell_cen[i0*3 + kk]);
5135       }
5136     }
5137 
5138   }
5139 
5140   g->matrix_struct = NULL;
5141   g->matrix = a;
5142   g->_matrix = NULL;
5143 
5144   return g;
5145 }
5146 
5147 /*----------------------------------------------------------------------------
5148  * Create base grid by mapping from parent (possibly shared) matrix.
5149  *
5150  * Note that as arrays given as arguments are shared by the created grid
5151  * (which can only access them, not modify them), the grid should be
5152  * destroyed before those arrays.
5153  *
5154  * parameters:
5155  *   a       <-- associated matrix
5156  *   n_ranks <-- number of active ranks (<= 1 to restrict to local values)
5157  *
5158  * returns:
5159  *   base grid structure
5160  *----------------------------------------------------------------------------*/
5161 
5162 cs_grid_t *
cs_grid_create_from_parent(const cs_matrix_t * a,int n_ranks)5163 cs_grid_create_from_parent(const cs_matrix_t  *a,
5164                            int                 n_ranks)
5165 {
5166   cs_grid_t *g = NULL;
5167 
5168   /* Create empty structure and map base data */
5169 
5170   g = _create_grid();
5171 
5172   bool local = true;
5173   const cs_halo_t *h = cs_matrix_get_halo(a);
5174   if (h != NULL) {
5175     local = false;
5176     if (h->n_c_domains == 1) {
5177       if (h->c_domain_rank[0] == cs_glob_rank_id)
5178         local = true;
5179     }
5180   }
5181 
5182   if (n_ranks > 1 || local) {
5183     g->matrix = a;
5184 #if defined(HAVE_MPI)
5185     g->comm = cs_base_get_rank_step_comm(g->merge_stride);
5186     MPI_Comm_size(g->comm, &(g->n_ranks));
5187 #endif
5188   }
5189   else {
5190     g->_matrix = cs_matrix_create_by_local_restrict(a);
5191     g->matrix = g->_matrix;
5192 #if defined(HAVE_MPI)
5193     g->comm = cs_base_get_rank_step_comm(g->merge_stride);
5194     MPI_Comm_size(g->comm, &(g->n_ranks));
5195 #endif
5196   }
5197 
5198   g->level = 0;
5199   g->symmetric = cs_matrix_is_symmetric(g->matrix);
5200 
5201   const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(g->matrix);
5202   const cs_lnum_t *eb_size = cs_matrix_get_extra_diag_block_size(g->matrix);
5203 
5204   for (cs_lnum_t ii = 0; ii < 4; ii++)
5205     g->db_size[ii] = db_size[ii];
5206 
5207   for (cs_lnum_t ii = 0; ii < 4; ii++)
5208     g->eb_size[ii] = eb_size[ii];
5209 
5210   g->n_rows = cs_matrix_get_n_rows(g->matrix);
5211   g->n_cols_ext = cs_matrix_get_n_columns(g->matrix);
5212   g->halo = cs_matrix_get_halo(g->matrix);
5213 
5214   g->n_g_rows = g->n_rows;
5215 
5216 #if defined(HAVE_MPI)
5217   if (g->halo != NULL && g->comm != MPI_COMM_NULL) {
5218     cs_gnum_t _g_n_rows = g->n_rows;
5219     MPI_Allreduce(&_g_n_rows, &(g->n_g_rows), 1, CS_MPI_GNUM, MPI_SUM, g->comm);
5220   }
5221 #endif
5222 
5223   return g;
5224 }
5225 
5226 /*----------------------------------------------------------------------------
5227  * Destroy a grid structure.
5228  *
5229  * parameters:
5230  *   grid <-> Pointer to grid structure pointer
5231  *----------------------------------------------------------------------------*/
5232 
5233 void
cs_grid_destroy(cs_grid_t ** grid)5234 cs_grid_destroy(cs_grid_t **grid)
5235 {
5236   if (grid != NULL && *grid != NULL) {
5237 
5238     cs_grid_t *g = *grid;
5239     cs_grid_free_quantities(g);
5240 
5241     BFT_FREE(g->_face_cell);
5242 
5243     BFT_FREE(g->coarse_row);
5244 
5245     if (g->_halo != NULL)
5246       cs_halo_destroy(&(g->_halo));
5247 
5248     BFT_FREE(g->_da);
5249     BFT_FREE(g->_xa);
5250 
5251     cs_matrix_destroy(&(g->_matrix));
5252     cs_matrix_structure_destroy(&(g->matrix_struct));
5253 
5254 #if defined(HAVE_MPI)
5255     BFT_FREE(g->merge_cell_idx);
5256 #endif
5257 
5258     BFT_FREE(*grid);
5259   }
5260 }
5261 
5262 /*----------------------------------------------------------------------------
5263  * Free a grid structure's associated quantities.
5264  *
5265  * The quantities required to compute a coarser grid with relaxation from a
5266  * given grid are not needed after that stage, so may be freed.
5267  *
5268  * parameters:
5269  *   g <-> Pointer to grid structure
5270  *----------------------------------------------------------------------------*/
5271 
5272 void
cs_grid_free_quantities(cs_grid_t * g)5273 cs_grid_free_quantities(cs_grid_t  *g)
5274 {
5275   assert(g != NULL);
5276 
5277   if (cs_matrix_get_type(g->matrix) != CS_MATRIX_NATIVE) {
5278     BFT_FREE(g->_face_cell);
5279     g->face_cell = NULL;
5280     BFT_FREE(g->_xa);
5281     g->_xa = NULL;
5282     if (cs_matrix_get_type(g->matrix) != CS_MATRIX_MSR) {
5283       BFT_FREE(g->_da);
5284       g->xa = NULL;
5285     }
5286   }
5287 
5288   BFT_FREE(g->coarse_face);
5289 
5290   BFT_FREE(g->_cell_cen);
5291   BFT_FREE(g->_cell_vol);
5292   BFT_FREE(g->_face_normal);
5293 
5294   BFT_FREE(g->_da_conv);
5295   BFT_FREE(g->_da_diff);
5296   BFT_FREE(g->_xa_conv);
5297   BFT_FREE(g->_xa_diff);
5298   BFT_FREE(g->_xa0);
5299   BFT_FREE(g->_xa0_diff);
5300 
5301   BFT_FREE(g->xa0ij);
5302 }
5303 
5304 /*----------------------------------------------------------------------------
5305  * Get grid information.
5306  *
5307  * parameters:
5308  *   g          <-- Grid structure
5309  *   level      --> Level in multigrid hierarchy (or NULL)
5310  *   symmetric  --> Symmetric matrix coefficients indicator (or NULL)
5311  *   db_size    --> Size of the diagonal block (or NULL)
5312  *   eb_size    --> Size of the extra diagonal block (or NULL)
5313  *   n_ranks    --> number of ranks with data (or NULL)
5314  *   n_rows     --> Number of local rows (or NULL)
5315  *   n_cols_ext --> Number of columns including ghosts (or NULL)
5316  *   n_entries  --> Number of entries (or NULL)
5317  *   n_g_rows   --> Number of global rows (or NULL)
5318  *----------------------------------------------------------------------------*/
5319 
5320 void
cs_grid_get_info(const cs_grid_t * g,int * level,bool * symmetric,cs_lnum_t * db_size,cs_lnum_t * eb_size,int * n_ranks,cs_lnum_t * n_rows,cs_lnum_t * n_cols_ext,cs_lnum_t * n_entries,cs_gnum_t * n_g_rows)5321 cs_grid_get_info(const cs_grid_t  *g,
5322                  int              *level,
5323                  bool             *symmetric,
5324                  cs_lnum_t        *db_size,
5325                  cs_lnum_t        *eb_size,
5326                  int              *n_ranks,
5327                  cs_lnum_t        *n_rows,
5328                  cs_lnum_t        *n_cols_ext,
5329                  cs_lnum_t        *n_entries,
5330                  cs_gnum_t        *n_g_rows)
5331 {
5332   assert(g != NULL);
5333 
5334   if (level != NULL)
5335     *level = g->level;
5336 
5337   if (symmetric != NULL)
5338     *symmetric = g->symmetric;
5339 
5340   if (db_size != NULL) {
5341     db_size[0] = g->db_size[0];
5342     db_size[1] = g->db_size[1];
5343     db_size[2] = g->db_size[2];
5344     db_size[3] = g->db_size[3];
5345   }
5346 
5347  if (eb_size != NULL) {
5348     eb_size[0] = g->eb_size[0];
5349     eb_size[1] = g->eb_size[1];
5350     eb_size[2] = g->eb_size[2];
5351     eb_size[3] = g->eb_size[3];
5352   }
5353 
5354   if (n_ranks != NULL) {
5355 #if defined(HAVE_MPI)
5356     *n_ranks = g->n_ranks;
5357 #else
5358     *n_ranks = 1;
5359 #endif
5360   }
5361 
5362   if (n_rows != NULL)
5363     *n_rows = g->n_rows;
5364   if (n_cols_ext != NULL)
5365     *n_cols_ext = g->n_cols_ext;
5366   assert(g->n_rows <= g->n_cols_ext);
5367   if (n_entries != NULL) {
5368     if (g->matrix != NULL)
5369       *n_entries = cs_matrix_get_n_entries(g->matrix);
5370     else
5371       *n_entries = 0;
5372   }
5373 
5374   if (n_g_rows != NULL)
5375     *n_g_rows = g->n_g_rows;
5376 }
5377 
5378 /*----------------------------------------------------------------------------
5379  * Get number of rows corresponding to a grid.
5380  *
5381  * parameters:
5382  *   g <-- Grid structure
5383  *
5384  * returns:
5385  *   number of rows of grid structure
5386  *----------------------------------------------------------------------------*/
5387 
5388 cs_lnum_t
cs_grid_get_n_rows(const cs_grid_t * g)5389 cs_grid_get_n_rows(const cs_grid_t  *g)
5390 {
5391   assert(g != NULL);
5392 
5393   return g->n_rows;
5394 }
5395 
5396 /*----------------------------------------------------------------------------
5397  * Get number of extended (local + ghost) columns corresponding to a grid.
5398  *
5399  * parameters:
5400  *   g <-- Grid structure
5401  *
5402  * returns:
5403  *   number of extended rows of grid structure
5404  *----------------------------------------------------------------------------*/
5405 
5406 cs_lnum_t
cs_grid_get_n_cols_ext(const cs_grid_t * g)5407 cs_grid_get_n_cols_ext(const cs_grid_t  *g)
5408 {
5409   assert(g != NULL);
5410 
5411   return g->n_cols_ext;
5412 }
5413 
5414 /*----------------------------------------------------------------------------
5415  * Get maximum number of extended (local + ghost) columns corresponding to
5416  * a grid, both with and without merging between ranks
5417  *
5418  * parameters:
5419  *   g <-- Grid structure
5420  *
5421  * returns:
5422  *   maximum number of extended cells of grid structure, with or without
5423  *   merging
5424  *----------------------------------------------------------------------------*/
5425 
5426 cs_lnum_t
cs_grid_get_n_cols_max(const cs_grid_t * g)5427 cs_grid_get_n_cols_max(const cs_grid_t  *g)
5428 {
5429   cs_lnum_t retval = 0;
5430 
5431   if (g != NULL)
5432     retval = CS_MAX(g->n_cols_ext, g->n_elts_r[1]);
5433 
5434   return retval;
5435 }
5436 
5437 /*----------------------------------------------------------------------------
5438  * Get global number of rows corresponding to a grid.
5439  *
5440  * parameters:
5441  *   g <-- Grid structure
5442  *
5443  * returns:
5444  *   global number of rows of grid structure
5445  *----------------------------------------------------------------------------*/
5446 
5447 cs_gnum_t
cs_grid_get_n_g_rows(const cs_grid_t * g)5448 cs_grid_get_n_g_rows(const cs_grid_t  *g)
5449 {
5450   assert(g != NULL);
5451 
5452   return g->n_g_rows;
5453 }
5454 
5455 /*----------------------------------------------------------------------------
5456  * Get grid's associated matrix information.
5457  *
5458  * parameters:
5459  *   g <-- Grid structure
5460  *
5461  * returns:
5462  *   pointer to matrix structure
5463  *----------------------------------------------------------------------------*/
5464 
5465 const cs_matrix_t *
cs_grid_get_matrix(const cs_grid_t * g)5466 cs_grid_get_matrix(const cs_grid_t  *g)
5467 {
5468   const cs_matrix_t *m = NULL;
5469 
5470   assert(g != NULL);
5471 
5472   m = g->matrix;
5473 
5474   return m;
5475 }
5476 
5477 #if defined(HAVE_MPI)
5478 
5479 /*----------------------------------------------------------------------------
5480  * Get the MPI subcommunicator for a given grid.
5481  *
5482  * parameters:
5483  *   g <-- Grid structure
5484  *
5485  * returns:
5486  *   MPI communicator
5487  *----------------------------------------------------------------------------*/
5488 
5489 MPI_Comm
cs_grid_get_comm(const cs_grid_t * g)5490 cs_grid_get_comm(const cs_grid_t  *g)
5491 {
5492   assert(g != NULL);
5493 
5494   return g->comm;
5495 }
5496 
5497 /*----------------------------------------------------------------------------
5498  * Get the MPI subcommunicator for a given merge stride.
5499  *
5500  * parameters:
5501  *   parent       <-- parent MPI communicator
5502  *   merge_stride <-- associated merge stride
5503  *
5504  * returns:
5505  *   MPI communicator
5506  *----------------------------------------------------------------------------*/
5507 
5508 MPI_Comm
cs_grid_get_comm_merge(MPI_Comm parent,int merge_stride)5509 cs_grid_get_comm_merge(MPI_Comm  parent,
5510                        int       merge_stride)
5511 {
5512   MPI_Comm comm = MPI_COMM_NULL;
5513 
5514   if (parent != MPI_COMM_NULL) {
5515     int size;
5516     MPI_Comm_size(parent, &size);
5517     int rank_step = cs_glob_n_ranks / size;
5518     if (cs_glob_n_ranks %size > 0)
5519       rank_step += 1;
5520     rank_step *= merge_stride;
5521     comm = cs_base_get_rank_step_comm(rank_step);
5522   }
5523 
5524   return comm;
5525 }
5526 
5527 #endif
5528 
5529 /*----------------------------------------------------------------------------
5530  * Create coarse grid from fine grid.
5531  *
5532  * parameters:
5533  *   f                          <-- Fine grid structure
5534  *   coarsening_type            <-- Coarsening criteria type
5535  *   aggregation_limit          <-- Maximum allowed fine rows per coarse rows
5536  *   verbosity                  <-- Verbosity level
5537  *   merge_stride               <-- Associated merge stride
5538  *   merge_rows_mean_threshold  <-- mean number of rows under which
5539  *                                  merging should be applied
5540  *   merge_rows_glob_threshold  <-- global number of rows under which
5541  *                                  merging should be applied
5542  *   relaxation_parameter       <-- P0/P1 relaxation factor
5543  *
5544  * returns:
5545  *   coarse grid structure
5546  *----------------------------------------------------------------------------*/
5547 
5548 cs_grid_t *
cs_grid_coarsen(const cs_grid_t * f,int coarsening_type,int aggregation_limit,int verbosity,int merge_stride,int merge_rows_mean_threshold,cs_gnum_t merge_rows_glob_threshold,double relaxation_parameter)5549 cs_grid_coarsen(const cs_grid_t  *f,
5550                 int               coarsening_type,
5551                 int               aggregation_limit,
5552                 int               verbosity,
5553                 int               merge_stride,
5554                 int               merge_rows_mean_threshold,
5555                 cs_gnum_t         merge_rows_glob_threshold,
5556                 double            relaxation_parameter)
5557 {
5558   int recurse = 0;
5559 
5560   cs_lnum_t isym = 2;
5561   bool conv_diff = f->conv_diff;
5562 
5563   /* By default, always use MSR structure, as it usually provides the
5564      best performance, and is required for the hybrid Gauss-Seidel-Jacobi
5565      smoothers. In multithreaded case, we also prefer to use a matrix
5566      structure allowing threading without a specific renumbering, as
5567      structures are rebuilt often (so only CSR and MSR can be considered) */
5568 
5569   cs_matrix_type_t fine_matrix_type = cs_matrix_get_type(f->matrix);
5570   cs_matrix_type_t coarse_matrix_type = CS_MATRIX_MSR;
5571 
5572   cs_matrix_variant_t *coarse_mv = NULL;
5573 
5574   cs_grid_t *c = NULL;
5575 
5576   const cs_lnum_t *db_size = f->db_size;
5577 
5578   assert(f != NULL);
5579 
5580   /* Initialization */
5581 
5582   c = _coarse_init(f);
5583 
5584   if (f->symmetric == true)
5585     isym = 1;
5586 
5587   c->relaxation = relaxation_parameter;
5588   if (f->face_cell == NULL && c->relaxation > 0)
5589     c->relaxation = 0;
5590 
5591   /* Ensure default is available */
5592 
5593   if (coarsening_type == CS_GRID_COARSENING_DEFAULT) {
5594     if (f->face_cell != NULL) {
5595       if (f->conv_diff == false)
5596         coarsening_type = CS_GRID_COARSENING_SPD_DX;
5597       else
5598         coarsening_type = CS_GRID_COARSENING_CONV_DIFF_DX;
5599     }
5600     else
5601       coarsening_type = CS_GRID_COARSENING_SPD_MX;
5602   }
5603   else if (coarsening_type == CS_GRID_COARSENING_SPD_DX) {
5604     /* closest altenative */
5605     if (f->face_cell == NULL)
5606       coarsening_type = CS_GRID_COARSENING_SPD_MX;
5607   }
5608 
5609   else if (coarsening_type == CS_GRID_COARSENING_SPD_PW) {
5610     /* closest altenative */
5611     if (fine_matrix_type == CS_MATRIX_NATIVE)
5612       coarsening_type = CS_GRID_COARSENING_SPD_MX;
5613   }
5614 
5615   else if (coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
5616     /* closest altenative */
5617     if (f->face_cell == NULL)
5618       coarsening_type = CS_GRID_COARSENING_SPD_MX;
5619   }
5620 
5621   /* Determine fine->coarse cell connectivity (aggregation) */
5622 
5623   if (   coarsening_type == CS_GRID_COARSENING_SPD_DX
5624       || coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
5625     if (f->face_cell != NULL)
5626       _automatic_aggregation_fc(f,
5627                                 coarsening_type,
5628                                 aggregation_limit,
5629                                 c->relaxation,
5630                                 verbosity,
5631                                 c->coarse_row);
5632   }
5633   else if (coarsening_type == CS_GRID_COARSENING_SPD_MX) {
5634     switch (fine_matrix_type) {
5635     case CS_MATRIX_NATIVE:
5636       _automatic_aggregation_mx_native(f, aggregation_limit, verbosity,
5637                                        c->coarse_row);
5638       break;
5639     case CS_MATRIX_MSR:
5640       _automatic_aggregation_mx_msr(f, aggregation_limit, verbosity,
5641                                     c->coarse_row);
5642       break;
5643     default:
5644       bft_error(__FILE__, __LINE__, 0,
5645                 _("%s: selected coarsening type (%s)\n"
5646                   "not available for %s structured matrices."),
5647                 __func__, cs_grid_coarsening_type_name[coarsening_type],
5648                 _(cs_matrix_get_type_name(f->matrix)));
5649     }
5650   }
5651   else if (coarsening_type == CS_GRID_COARSENING_SPD_PW) {
5652     switch (fine_matrix_type) {
5653     case CS_MATRIX_MSR:
5654       _automatic_aggregation_pw_msr(f, verbosity, c->coarse_row);
5655       if (aggregation_limit > 2)
5656         recurse = 2;
5657       break;
5658     default:
5659       bft_error(__FILE__, __LINE__, 0,
5660                 _("%s: selected coarsening type (%s)\n"
5661                   "not available for %s structured matrices."),
5662                 __func__, cs_grid_coarsening_type_name[coarsening_type],
5663                 _(cs_matrix_get_type_name(f->matrix)));
5664     }
5665   }
5666 
5667   _coarsen(f, c);
5668 
5669   if (verbosity > 3)
5670     _aggregation_stats_log(f, c, verbosity);
5671 
5672   BFT_MALLOC(c->_da, c->n_cols_ext * c->db_size[3], cs_real_t);
5673   c->da = c->_da;
5674 
5675   BFT_MALLOC(c->_xa, c->n_faces*isym, cs_real_t);
5676   c->xa = c->_xa;
5677 
5678   if (  (fine_matrix_type == CS_MATRIX_NATIVE || f->face_cell != NULL)
5679       && c->relaxation > 0) {
5680 
5681     /* Allocate permanent arrays in coarse grid */
5682 
5683     BFT_MALLOC(c->_cell_cen, c->n_cols_ext*3, cs_real_t);
5684     c->cell_cen = c->_cell_cen;
5685 
5686     BFT_MALLOC(c->_cell_vol, c->n_cols_ext, cs_real_t);
5687     c->cell_vol = c->_cell_vol;
5688 
5689     BFT_MALLOC(c->_face_normal, c->n_faces*3, cs_real_t);
5690     c->face_normal = c->_face_normal;
5691 
5692     if (conv_diff) {
5693       BFT_MALLOC(c->_da_conv, c->n_cols_ext * c->db_size[3], cs_real_t);
5694       c->da_conv = c->_da_conv;
5695       BFT_MALLOC(c->_da_diff, c->n_cols_ext * c->db_size[3], cs_real_t);
5696       c->da_diff = c->_da_diff;
5697       BFT_MALLOC(c->_xa_conv, c->n_faces*2, cs_real_t);
5698       c->xa_conv = c->_xa_conv;
5699       BFT_MALLOC(c->_xa_diff, c->n_faces, cs_real_t);
5700       c->xa_diff = c->_xa_diff;
5701     }
5702 
5703     /* We could have xa0 point to xa if symmetric, but this would require
5704        caution in CRSTGR to avoid overwriting. */
5705 
5706     BFT_MALLOC(c->_xa0, c->n_faces*isym, cs_real_t);
5707     c->xa0 = c->_xa0;
5708 
5709     if (conv_diff) {
5710       BFT_MALLOC(c->_xa0_diff, c->n_faces, cs_real_t);
5711       c->xa0_diff = c->_xa0_diff;
5712     }
5713 
5714     BFT_MALLOC(c->xa0ij, c->n_faces*3, cs_real_t);
5715 
5716     /* Matrix-related data */
5717 
5718     _compute_coarse_cell_quantities(f, c);
5719 
5720     /* Synchronize grid's geometric quantities */
5721 
5722     if (c->halo != NULL) {
5723 
5724       cs_halo_sync_var_strided(c->halo, CS_HALO_STANDARD, c->_cell_cen, 3);
5725       if (c->halo->n_transforms > 0)
5726         cs_halo_perio_sync_coords(c->halo, CS_HALO_STANDARD, c->_cell_cen);
5727 
5728       cs_halo_sync_var(c->halo, CS_HALO_STANDARD, c->_cell_vol);
5729 
5730     }
5731 
5732   }
5733 
5734   if (fine_matrix_type == CS_MATRIX_MSR && c->relaxation <= 0) {
5735 
5736    _compute_coarse_quantities_msr(f, c);
5737 
5738     /* Merge grids if we are below the threshold */
5739 #if defined(HAVE_MPI)
5740    if (merge_stride > 1 && c->n_ranks > 1 && recurse == 0) {
5741       cs_gnum_t  _n_ranks = c->n_ranks;
5742       cs_gnum_t  _n_mean_g_rows = c->n_g_rows / _n_ranks;
5743       if (   _n_mean_g_rows < (cs_gnum_t)merge_rows_mean_threshold
5744           || c->n_g_rows < merge_rows_glob_threshold) {
5745         _native_from_msr(c);
5746         _merge_grids(c, merge_stride, verbosity);
5747         _msr_from_native(c);
5748       }
5749     }
5750 #endif
5751 
5752   }
5753 
5754   else if (f->face_cell != NULL) {
5755 
5756     if (conv_diff)
5757       _compute_coarse_quantities_conv_diff(f, c, verbosity);
5758     else
5759       _compute_coarse_quantities_native(f, c, verbosity);
5760 
5761     /* Synchronize matrix's geometric quantities */
5762 
5763     if (c->halo != NULL)
5764       cs_halo_sync_var_strided(c->halo, CS_HALO_STANDARD, c->_da, db_size[3]);
5765 
5766     /* Merge grids if we are below the threshold */
5767 
5768 #if defined(HAVE_MPI)
5769     if (merge_stride > 1 && c->n_ranks > 1 && recurse == 0) {
5770       cs_gnum_t  _n_ranks = c->n_ranks;
5771       cs_gnum_t  _n_mean_g_rows = c->n_g_rows / _n_ranks;
5772       if (   _n_mean_g_rows < (cs_gnum_t)merge_rows_mean_threshold
5773           || c->n_g_rows < merge_rows_glob_threshold)
5774         _merge_grids(c, merge_stride, verbosity);
5775     }
5776 #endif
5777 
5778     c->matrix_struct = cs_matrix_structure_create(coarse_matrix_type,
5779                                                   true,
5780                                                   c->n_rows,
5781                                                   c->n_cols_ext,
5782                                                   c->n_faces,
5783                                                   c->face_cell,
5784                                                   c->halo,
5785                                                   NULL);
5786 
5787     c->_matrix = cs_matrix_create(c->matrix_struct);
5788 
5789     cs_matrix_set_coefficients(c->_matrix,
5790                                c->symmetric,
5791                                c->db_size,
5792                                c->eb_size,
5793                                c->n_faces,
5794                                c->face_cell,
5795                                c->da,
5796                                c->xa);
5797 
5798     c->matrix = c->_matrix;
5799 
5800     /* Apply tuning if needed */
5801 
5802     if (_grid_tune_max_level > 0) {
5803 
5804       cs_matrix_fill_type_t mft
5805         = cs_matrix_get_fill_type(f->symmetric,
5806                                   f->db_size,
5807                                   f->eb_size);
5808 
5809       if (_grid_tune_max_level > f->level) {
5810         int k = CS_MATRIX_N_FILL_TYPES*(f->level) + mft;
5811         coarse_mv = _grid_tune_variant[k];
5812 
5813         /* Create tuned variant upon first pass for this level and
5814            fill type */
5815 
5816         if  (   coarse_mv == NULL
5817              && _grid_tune_max_fill_level[mft] > f->level) {
5818 
5819           cs_log_printf(CS_LOG_PERFORMANCE,
5820                         _("\n"
5821                           "Tuning for coarse matrices of level %d and type: %s\n"
5822                           "==========================\n"),
5823                         f->level + 1, cs_matrix_fill_type_name[mft]);
5824 
5825           int n_min_products;
5826           double t_measure;
5827 
5828           cs_matrix_get_tuning_runs(&n_min_products, &t_measure);
5829 
5830           coarse_mv = cs_matrix_variant_tuned(c->matrix,
5831                                               1,
5832                                               n_min_products,
5833                                               t_measure);
5834 
5835           _grid_tune_variant[k] = coarse_mv;
5836 
5837           if  (_grid_tune_max_fill_level[mft] == f->level + 1) {
5838             cs_log_printf(CS_LOG_PERFORMANCE, "\n");
5839             cs_log_separator(CS_LOG_PERFORMANCE);
5840           }
5841         }
5842 
5843       }
5844 
5845     }
5846 
5847     if (coarse_mv != NULL)
5848       cs_matrix_variant_apply(c->_matrix, coarse_mv);
5849   }
5850 
5851   if (c->matrix == NULL) {
5852     assert(c->n_rows == 0);
5853     _build_coarse_matrix_null(c, coarse_matrix_type);
5854   }
5855 
5856   /* Recurse if necessary */
5857 
5858   if (recurse > 1) {
5859 
5860     /* Build coarser grid from coarse grid */
5861     cs_grid_t *cc = cs_grid_coarsen(c,
5862                                     coarsening_type,
5863                                     aggregation_limit / recurse,
5864                                     verbosity,
5865                                     merge_stride,
5866                                     merge_rows_mean_threshold,
5867                                     merge_rows_glob_threshold,
5868                                     relaxation_parameter);
5869 
5870     /* Project coarsening */
5871 
5872     _project_coarse_row_to_parent(cc);
5873     BFT_FREE(cc->coarse_row);
5874     cc->coarse_row = c->coarse_row;
5875     c->coarse_row = NULL;
5876 
5877     if (c->face_cell != NULL) {
5878       BFT_FREE(cc->coarse_face);
5879       BFT_FREE(cc->_face_cell);
5880       _coarsen_faces(f,
5881                      cc->coarse_row,
5882                      cc->n_rows,
5883                      &(cc->n_faces),
5884                      &(cc->coarse_face),
5885                      &(cc->_face_cell));
5886       cc->face_cell = (const cs_lnum_2_t  *)(cc->_face_cell);
5887     }
5888 
5889     /* Keep coarsest grid only */
5890 
5891     cc->level -=1;
5892     cc->parent = f;
5893 
5894     assert(cc->parent == c->parent);
5895 
5896     cs_grid_destroy(&c);
5897     c = cc;
5898   }
5899 
5900   /* Optional verification */
5901 
5902   if (verbosity > 3) {
5903     if (f->level == 0)
5904       _verify_matrix(f);
5905     _verify_matrix(c);
5906   }
5907 
5908   /* Return new (coarse) grid */
5909 
5910   return c;
5911 }
5912 
5913 /*----------------------------------------------------------------------------
5914  * Create coarse grid with only one row per rank from fine grid.
5915  *
5916  * parameters:
5917  *   f            <-- Fine grid structure
5918  *   merge_stride <-- Associated merge stride
5919  *   verbosity    <-- Verbosity level
5920  *
5921  * returns:
5922  *   coarse grid structure
5923  *----------------------------------------------------------------------------*/
5924 
5925 cs_grid_t *
cs_grid_coarsen_to_single(const cs_grid_t * f,int merge_stride,int verbosity)5926 cs_grid_coarsen_to_single(const cs_grid_t  *f,
5927                           int               merge_stride,
5928                           int               verbosity)
5929 {
5930   cs_lnum_t isym = 2;
5931 
5932   /* By default, always use MSR structure, as it often seems to provide the
5933      best performance, and is required for the hybrid Gauss-Seidel-Jacobi
5934      smoothers. In multithreaded case, we also prefer to use a matrix
5935      structure allowing threading without a specific renumbering, as
5936      structures are rebuilt often (so only CSR and MSR can be considered) */
5937 
5938   cs_matrix_type_t fine_matrix_type = cs_matrix_get_type(f->matrix);
5939 
5940   cs_grid_t *c = NULL;
5941 
5942   const cs_lnum_t *db_size = f->db_size;
5943 
5944   assert(f != NULL);
5945 
5946   /* Initialization */
5947 
5948   c = _coarse_init(f);
5949 
5950   if (f->symmetric == true)
5951     isym = 1;
5952 
5953   c->relaxation = 0;
5954 
5955   /* All to a single row */
5956 
5957   for (cs_lnum_t i = 0; i < f->n_rows; i++)
5958     c->coarse_row[i] = 0;
5959 
5960   _coarsen(f, c);
5961 
5962   if (verbosity > 3)
5963     _aggregation_stats_log(f, c, verbosity);
5964 
5965   if (fine_matrix_type == CS_MATRIX_MSR) {
5966     _compute_coarse_quantities_msr(f, c);
5967 
5968 #if defined(HAVE_MPI)
5969     if (c->n_ranks > 1 && merge_stride > 1) {
5970       _native_from_msr(c);
5971       _merge_grids(c, merge_stride, verbosity);
5972       _msr_from_native(c);
5973     }
5974 #endif
5975   }
5976 
5977   else if (f->face_cell != NULL) {
5978 
5979     BFT_MALLOC(c->_da, c->n_cols_ext * c->db_size[3], cs_real_t);
5980     c->da = c->_da;
5981 
5982     BFT_MALLOC(c->_xa, c->n_faces*isym, cs_real_t);
5983     c->xa = c->_xa;
5984 
5985     _compute_coarse_quantities_native(f, c, verbosity);
5986 
5987     /* Synchronize matrix's geometric quantities */
5988 
5989     if (c->halo != NULL)
5990       cs_halo_sync_var_strided(c->halo, CS_HALO_STANDARD, c->_da, db_size[3]);
5991 
5992     /* Merge grids if we are below the threshold */
5993 
5994 #if defined(HAVE_MPI)
5995     if (c->n_ranks > 1 && merge_stride > 1)
5996       _merge_grids(c, merge_stride, verbosity);
5997 #endif
5998 
5999     _msr_from_native(c);
6000   }
6001 
6002   c->matrix = c->_matrix;
6003 
6004   /* Optional verification */
6005 
6006   if (verbosity > 3) {
6007     if (f->level == 0)
6008       _verify_matrix(f);
6009     _verify_matrix(c);
6010   }
6011 
6012   /* Return new (coarse) grid */
6013 
6014   return c;
6015 }
6016 
6017 /*----------------------------------------------------------------------------
6018  * Compute coarse row variable values from fine row values
6019  *
6020  * parameters:
6021  *   f       <-- Fine grid structure
6022  *   c       <-- Fine grid structure
6023  *   f_var   <-- Variable defined on fine grid rows
6024  *   c_var   --> Variable defined on coarse grid rows
6025  *----------------------------------------------------------------------------*/
6026 
6027 void
cs_grid_restrict_row_var(const cs_grid_t * f,const cs_grid_t * c,const cs_real_t * f_var,cs_real_t * c_var)6028 cs_grid_restrict_row_var(const cs_grid_t  *f,
6029                          const cs_grid_t  *c,
6030                          const cs_real_t  *f_var,
6031                          cs_real_t        *c_var)
6032 {
6033   cs_lnum_t f_n_rows = f->n_rows;
6034   cs_lnum_t c_n_cols_ext = c->n_elts_r[1];
6035 
6036   const cs_lnum_t *coarse_row;
6037   const cs_lnum_t *db_size = f->db_size;
6038 
6039   assert(f != NULL);
6040   assert(c != NULL);
6041   assert(c->coarse_row != NULL || f_n_rows == 0);
6042   assert(f_var != NULL || f_n_rows == 0);
6043   assert(c_var != NULL || c_n_cols_ext == 0);
6044 
6045   /* Set coarse values */
6046 
6047   coarse_row = c->coarse_row;
6048 
6049   cs_lnum_t _c_n_cols_ext = c_n_cols_ext*db_size[0];
6050 
6051 # pragma omp parallel for  if(_c_n_cols_ext > CS_THR_MIN)
6052   for (cs_lnum_t ii = 0; ii < _c_n_cols_ext; ii++)
6053     c_var[ii] = 0.;
6054 
6055   if (f->level == 0) { /* possible penalization at first level */
6056 
6057     if (db_size[0] == 1) {
6058       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6059         cs_lnum_t i = coarse_row[ii];
6060         if (i >= 0)
6061           c_var[i] += f_var[ii];
6062       }
6063     }
6064     else {
6065       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6066         cs_lnum_t i = coarse_row[ii];
6067         if (i >= 0) {
6068           for (cs_lnum_t j = 0; j < db_size[0]; j++)
6069             c_var[i*db_size[1]+j] += f_var[ii*db_size[1]+j];
6070         }
6071       }
6072     }
6073   }
6074 
6075   else {
6076     if (db_size[0] == 1) {
6077       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
6078         c_var[coarse_row[ii]] += f_var[ii];
6079     }
6080     else {
6081       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6082         cs_lnum_t i = coarse_row[ii];
6083         for (cs_lnum_t j = 0; j < db_size[0]; j++)
6084           c_var[i*db_size[1]+j] += f_var[ii*db_size[1]+j];
6085       }
6086     }
6087   }
6088 
6089 #if defined(HAVE_MPI)
6090 
6091   /* If grid merging has taken place, gather coarse data */
6092 
6093   if (c->merge_sub_size > 1) {
6094 
6095     MPI_Comm  comm = cs_glob_mpi_comm;
6096     static const int tag = 'r'+'e'+'s'+'t'+'r'+'i'+'c'+'t';
6097 
6098     /* Append data */
6099 
6100     if (c->merge_sub_rank == 0) {
6101       int rank_id;
6102       MPI_Status status;
6103       assert(cs_glob_rank_id == c->merge_sub_root);
6104       for (rank_id = 1; rank_id < c->merge_sub_size; rank_id++) {
6105         cs_lnum_t n_recv = (  c->merge_cell_idx[rank_id+1]
6106                             - c->merge_cell_idx[rank_id]);
6107         int dist_rank = c->merge_sub_root + c->merge_stride*rank_id;
6108         MPI_Recv(c_var + c->merge_cell_idx[rank_id]*db_size[1],
6109                  n_recv*db_size[1], CS_MPI_REAL, dist_rank, tag, comm, &status);
6110       }
6111     }
6112     else
6113       MPI_Send(c_var, c->n_elts_r[0]*db_size[1], CS_MPI_REAL,
6114                c->merge_sub_root, tag, comm);
6115   }
6116 
6117 #endif /* defined(HAVE_MPI) */
6118 }
6119 
6120 /*----------------------------------------------------------------------------
6121  * Compute fine row variable values from coarse row values
6122  *
6123  * parameters:
6124  *   c       <-- Fine grid structure
6125  *   f       <-- Fine grid structure
6126  *   c_var   <-- Variable defined on coarse grid rows
6127  *   f_var   --> Variable defined on fine grid rows
6128  *----------------------------------------------------------------------------*/
6129 
6130 void
cs_grid_prolong_row_var(const cs_grid_t * c,const cs_grid_t * f,cs_real_t * c_var,cs_real_t * f_var)6131 cs_grid_prolong_row_var(const cs_grid_t  *c,
6132                         const cs_grid_t  *f,
6133                         cs_real_t        *c_var,
6134                         cs_real_t        *f_var)
6135 {
6136   const cs_lnum_t *coarse_row;
6137   const cs_real_t *_c_var = c_var;
6138 
6139   const cs_lnum_t *db_size = f->db_size;
6140 
6141   cs_lnum_t f_n_rows = f->n_rows;
6142 
6143   assert(f != NULL);
6144   assert(c != NULL);
6145   assert(c->coarse_row != NULL || f_n_rows == 0);
6146   assert(f_var != NULL);
6147   assert(c_var != NULL);
6148 
6149 #if defined(HAVE_MPI)
6150 
6151   /* If grid merging has taken place, scatter coarse data */
6152 
6153   if (c->merge_sub_size > 1) {
6154 
6155     MPI_Comm  comm = cs_glob_mpi_comm;
6156     static const int tag = 'p'+'r'+'o'+'l'+'o'+'n'+'g';
6157 
6158     /* Append data */
6159 
6160     if (c->merge_sub_rank == 0) {
6161       int rank_id;
6162       assert(cs_glob_rank_id == c->merge_sub_root);
6163       for (rank_id = 1; rank_id < c->merge_sub_size; rank_id++) {
6164         cs_lnum_t n_send = (  c->merge_cell_idx[rank_id+1]
6165                             - c->merge_cell_idx[rank_id]);
6166         int dist_rank = c->merge_sub_root + c->merge_stride*rank_id;
6167         MPI_Send(c_var + c->merge_cell_idx[rank_id]*db_size[1],
6168                  n_send*db_size[1], CS_MPI_REAL, dist_rank, tag, comm);
6169       }
6170     }
6171     else {
6172       MPI_Status status;
6173       MPI_Recv(c_var, c->n_elts_r[0]*db_size[1], CS_MPI_REAL,
6174                c->merge_sub_root, tag, comm, &status);
6175     }
6176   }
6177 
6178 #endif /* defined(HAVE_MPI) */
6179 
6180   /* Set fine values (possible penalization at first level) */
6181 
6182   coarse_row = c->coarse_row;
6183 
6184   if (f->level == 0) {
6185 
6186     if (db_size[0] == 1) {
6187 #     pragma omp parallel if(f_n_rows > CS_THR_MIN)
6188       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6189         cs_lnum_t ic = coarse_row[ii];
6190         if (ic >= 0) {
6191           f_var[ii] = _c_var[ic];
6192         }
6193         else
6194           f_var[ii] = 0;
6195       }
6196     }
6197     else {
6198 #     pragma omp parallel if(f_n_rows > CS_THR_MIN)
6199       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6200         cs_lnum_t ic = coarse_row[ii];
6201         if (ic >= 0) {
6202           for (cs_lnum_t i = 0; i < db_size[0]; i++)
6203             f_var[ii*db_size[1]+i] = _c_var[ic*db_size[1]+i];
6204         }
6205         else {
6206           for (cs_lnum_t i = 0; i < db_size[0]; i++)
6207             f_var[ii*db_size[1]+i] = 0;
6208         }
6209       }
6210     }
6211 
6212   }
6213 
6214   else { /* coarser levels, no need for penaliation tests */
6215 
6216     if (db_size[0] == 1) {
6217 #     pragma omp parallel if(f_n_rows > CS_THR_MIN)
6218       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
6219         f_var[ii] = _c_var[coarse_row[ii]];
6220     }
6221     else {
6222 #     pragma omp parallel if(f_n_rows > CS_THR_MIN)
6223       for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6224         cs_lnum_t ic = coarse_row[ii];
6225         for (cs_lnum_t i = 0; i < db_size[0]; i++)
6226           f_var[ii*db_size[1]+i] = _c_var[ic*db_size[1]+i];
6227       }
6228     }
6229 
6230   }
6231 }
6232 
6233 /*----------------------------------------------------------------------------
6234  * Project coarse grid row numbers to base grid.
6235  *
6236  * If a global coarse grid row number is larger than max_num, its
6237  * value modulo max_num is used.
6238  *
6239  * parameters:
6240  *   g           <-- Grid structure
6241  *   n_base_rows <-- Number of rows in base grid
6242  *   max_num     <-- Values of c_row_num = global_num % max_num
6243  *   c_row_num   --> Global coarse row number (modulo max_num)
6244  *----------------------------------------------------------------------------*/
6245 
6246 void
cs_grid_project_row_num(const cs_grid_t * g,cs_lnum_t n_base_rows,int max_num,int c_row_num[])6247 cs_grid_project_row_num(const cs_grid_t  *g,
6248                         cs_lnum_t         n_base_rows,
6249                         int               max_num,
6250                         int               c_row_num[])
6251 {
6252   cs_lnum_t ii = 0;
6253   cs_gnum_t base_shift = 1;
6254   cs_gnum_t _max_num = max_num;
6255   cs_lnum_t n_max_rows = 0;
6256   cs_lnum_t *tmp_num_1 = NULL, *tmp_num_2 = NULL;
6257   const cs_grid_t *_g = g;
6258 
6259   assert(g != NULL);
6260   assert(c_row_num != NULL);
6261 
6262   /* Initialize array */
6263 
6264   n_max_rows = g->n_rows;
6265   for (_g = g; _g != NULL; _g = _g->parent) {
6266     if (_g->n_rows > n_max_rows)
6267       n_max_rows = _g->n_rows;
6268   }
6269 
6270   BFT_MALLOC(tmp_num_1, n_max_rows, cs_lnum_t);
6271 
6272   /* Compute local base starting row number in parallel mode */
6273 
6274 #if defined(HAVE_MPI)
6275   if (g->comm != MPI_COMM_NULL) {
6276     cs_gnum_t local_shift = g->n_rows;
6277     cs_gnum_t global_shift = 0;
6278     MPI_Scan(&local_shift, &global_shift, 1, CS_MPI_GNUM, MPI_SUM,
6279              g->comm);
6280     base_shift = 1 + global_shift - g->n_rows;
6281   }
6282 #endif
6283 
6284   for (ii = 0; ii < g->n_rows; ii++)
6285     tmp_num_1[ii] = (cs_gnum_t)(ii + base_shift) % _max_num;
6286 
6287   if (g->level > 0) {
6288 
6289     /* Allocate temporary arrays */
6290 
6291     BFT_MALLOC(tmp_num_2, n_max_rows, cs_lnum_t);
6292     for (cs_lnum_t i = 0; i < n_max_rows; i++)
6293       tmp_num_2[i] = -1;        /* singleton */
6294 
6295     for (_g = g; _g->level > 0; _g = _g->parent) {
6296 
6297       cs_lnum_t n_parent_rows = _g->parent->n_rows;
6298 
6299 #if defined(HAVE_MPI)
6300       _scatter_row_num(_g, tmp_num_1);
6301 #endif /* defined(HAVE_MPI) */
6302 
6303       for (ii = 0; ii < n_parent_rows; ii++) {
6304         cs_lnum_t ic = _g->coarse_row[ii];
6305         if (ic >= 0)
6306           tmp_num_2[ii] = tmp_num_1[ic];
6307       }
6308 
6309       for (ii = 0; ii < n_parent_rows; ii++)
6310         tmp_num_1[ii] = tmp_num_2[ii];
6311 
6312     }
6313 
6314     assert(_g->level == 0);
6315     assert(_g->n_rows == n_base_rows);
6316 
6317     /* Free temporary arrays */
6318 
6319     BFT_FREE(tmp_num_2);
6320   }
6321 
6322   memcpy(c_row_num, tmp_num_1, n_base_rows*sizeof(int));
6323 
6324   BFT_FREE(tmp_num_1);
6325 }
6326 
6327 /*----------------------------------------------------------------------------
6328  * Project coarse grid row rank to base grid.
6329  *
6330  * parameters:
6331  *   g           <-- Grid structure
6332  *   n_base_rows <-- Number of rows in base grid
6333  *   f_row_rank  --> Global coarse row rank projected to fine rows
6334  *----------------------------------------------------------------------------*/
6335 
6336 void
cs_grid_project_row_rank(const cs_grid_t * g,cs_lnum_t n_base_rows,int f_row_rank[])6337 cs_grid_project_row_rank(const cs_grid_t  *g,
6338                          cs_lnum_t         n_base_rows,
6339                          int               f_row_rank[])
6340 {
6341   cs_lnum_t ii;
6342   cs_lnum_t n_max_rows = 0;
6343   int *tmp_rank_1 = NULL, *tmp_rank_2 = NULL;
6344   const cs_grid_t *_g = g;
6345 
6346   assert(g != NULL);
6347   assert(f_row_rank != NULL || g->n_rows == 0);
6348 
6349   n_max_rows = g->n_rows;
6350   for (_g = g; _g != NULL; _g = _g->parent) {
6351     if (_g->n_rows > n_max_rows)
6352       n_max_rows = _g->n_rows;
6353   }
6354 
6355   BFT_MALLOC(tmp_rank_1, n_max_rows, int);
6356 
6357   for (ii = 0; ii < g->n_rows; ii++)
6358     tmp_rank_1[ii] = cs_glob_rank_id;
6359 
6360   /* Project to finer levels */
6361 
6362   if (g->level > 0) {
6363 
6364     /* Allocate temporary arrays */
6365 
6366     BFT_MALLOC(tmp_rank_2, n_max_rows, int);
6367 
6368     for (_g = g; _g->level > 0; _g = _g->parent) {
6369 
6370       cs_lnum_t n_parent_rows = _g->parent->n_rows;
6371 
6372       _prolong_row_int(_g,
6373                        _g->parent,
6374                        tmp_rank_1,
6375                        tmp_rank_2);
6376 
6377       for (ii = 0; ii < n_parent_rows; ii++)
6378         tmp_rank_1[ii] = tmp_rank_2[ii];
6379 
6380     }
6381 
6382     assert(_g->level == 0);
6383     assert(_g->n_rows == n_base_rows);
6384 
6385     /* Free temporary arrays */
6386 
6387     BFT_FREE(tmp_rank_2);
6388   }
6389 
6390   memcpy(f_row_rank, tmp_rank_1, n_base_rows*sizeof(int));
6391 
6392   BFT_FREE(tmp_rank_1);
6393 }
6394 
6395 /*----------------------------------------------------------------------------
6396  * Project variable from coarse grid to base grid
6397  *
6398  * parameters:
6399  *   g           <-- Grid structure
6400  *   n_base_rows <-- Number of rows in base grid
6401  *   c_var       <-- Variable on coarse grid
6402  *   f_var       --> Variable projected to fine grid
6403  *----------------------------------------------------------------------------*/
6404 
6405 void
cs_grid_project_var(const cs_grid_t * g,cs_lnum_t n_base_rows,const cs_real_t c_var[],cs_real_t f_var[])6406 cs_grid_project_var(const cs_grid_t  *g,
6407                     cs_lnum_t         n_base_rows,
6408                     const cs_real_t   c_var[],
6409                     cs_real_t         f_var[])
6410 {
6411   cs_lnum_t ii;
6412   int i;
6413   cs_lnum_t n_max_rows = 0;
6414   cs_real_t *tmp_var_1 = NULL, *tmp_var_2 = NULL;
6415   const cs_grid_t *_g = g;
6416 
6417   const cs_lnum_t *db_size = g->db_size;
6418 
6419   assert(g != NULL);
6420   assert(c_var != NULL || g->n_rows == 0);
6421   assert(f_var != NULL);
6422 
6423   n_max_rows = g->n_rows;
6424   for (_g = g; _g != NULL; _g = _g->parent) {
6425     if (_g->n_rows > n_max_rows)
6426       n_max_rows = _g->n_rows;
6427   }
6428 
6429   BFT_MALLOC(tmp_var_1, n_max_rows*db_size[1], cs_real_t);
6430   memcpy(tmp_var_1, c_var, g->n_rows*db_size[1]*sizeof(cs_real_t));
6431 
6432   /* Project to finer levels */
6433 
6434   if (g->level > 0) {
6435 
6436     /* Allocate temporary arrays */
6437 
6438     BFT_MALLOC(tmp_var_2, n_max_rows*db_size[1], cs_real_t);
6439 
6440     for (_g = g; _g->level > 0; _g = _g->parent) {
6441 
6442       cs_lnum_t n_parent_rows = _g->parent->n_rows;
6443 
6444       cs_grid_prolong_row_var(_g,
6445                               _g->parent,
6446                               tmp_var_1,
6447                               tmp_var_2);
6448 
6449       for (ii = 0; ii < n_parent_rows; ii++)
6450         for (i = 0; i < db_size[0]; i++)
6451           tmp_var_1[ii*db_size[1]+i] = tmp_var_2[ii*db_size[1]+i];
6452 
6453     }
6454 
6455     assert(_g->level == 0);
6456     assert(_g->n_rows == n_base_rows);
6457 
6458     /* Free temporary arrays */
6459 
6460     BFT_FREE(tmp_var_2);
6461   }
6462 
6463   memcpy(f_var, tmp_var_1, n_base_rows*db_size[1]*sizeof(cs_real_t));
6464 
6465   BFT_FREE(tmp_var_1);
6466 }
6467 
6468 /*----------------------------------------------------------------------------
6469  * Compute diagonal dominance metric and project it to base grid
6470  *
6471  * parameters:
6472  *   g           <-- Grid structure
6473  *   n_base_rows <-- Number of rows in base grid
6474  *   diag_dom    --> Diagonal dominance metric (on fine grid)
6475  *----------------------------------------------------------------------------*/
6476 
6477 void
cs_grid_project_diag_dom(const cs_grid_t * g,cs_lnum_t n_base_rows,cs_real_t diag_dom[])6478 cs_grid_project_diag_dom(const cs_grid_t  *g,
6479                          cs_lnum_t         n_base_rows,
6480                          cs_real_t         diag_dom[])
6481 {
6482   cs_real_t *dd = NULL;
6483   const cs_lnum_t *db_size = g->db_size;
6484 
6485   assert(g != NULL);
6486   assert(diag_dom != NULL);
6487 
6488   if (g->level == 0)
6489     dd = diag_dom;
6490   else
6491     BFT_MALLOC(dd, g->n_cols_ext*db_size[3], cs_real_t);
6492 
6493   /* Compute coarse diagonal dominance */
6494 
6495   cs_matrix_diag_dominance(g->matrix, dd);
6496 
6497   /* Now project to finer levels */
6498 
6499   if (dd != diag_dom) {
6500     cs_grid_project_var(g, n_base_rows, dd, diag_dom);
6501     BFT_FREE(dd);
6502   }
6503 }
6504 
6505 /*----------------------------------------------------------------------------
6506  * Finalize global info related to multigrid solvers
6507  *----------------------------------------------------------------------------*/
6508 
6509 void
cs_grid_finalize(void)6510 cs_grid_finalize(void)
6511 {
6512   if (_grid_tune_max_level > 0) {
6513 
6514     for (int i = 0; i < _grid_tune_max_level; i++) {
6515       for (int j = 0; j < CS_MATRIX_N_FILL_TYPES; j++) {
6516         int k = CS_MATRIX_N_FILL_TYPES*i + j;
6517         if (_grid_tune_variant[k] != NULL)
6518           cs_matrix_variant_destroy(&(_grid_tune_variant[k]));
6519       }
6520     }
6521 
6522     BFT_FREE(_grid_tune_variant);
6523     BFT_FREE(_grid_tune_max_fill_level);
6524 
6525     _grid_tune_max_level = 0;
6526   }
6527 }
6528 
6529 /*----------------------------------------------------------------------------
6530  * Dump grid structure
6531  *
6532  * parameters:
6533  *   g <-- grid structure that should be dumped
6534  *----------------------------------------------------------------------------*/
6535 
6536 void
cs_grid_dump(const cs_grid_t * g)6537 cs_grid_dump(const cs_grid_t  *g)
6538 {
6539   cs_lnum_t  i;
6540 
6541   if (g == NULL) {
6542     bft_printf("\n\n  grid: null\n");
6543     return;
6544   }
6545   bft_printf("\n"
6546              "  grid:          %p\n"
6547              "  level:         %d (parent: %p)\n"
6548              "  n_rows:        %d\n"
6549              "  n_cols_ext:    %d\n"
6550              "  n_faces:       %d\n"
6551              "  n_g_cells:     %d\n"
6552              "  n_elts_r:      [%d, %d]\n",
6553              (const void *)g, g->level, (const void *)(g->parent),
6554              (int)(g->n_rows), (int)(g->n_cols_ext),
6555              (int)(g->n_faces), (int)(g->n_g_rows),
6556              (int)(g->n_elts_r[0]), (int)(g->n_elts_r[1]));
6557 
6558 #if defined(HAVE_MPI)
6559 
6560   bft_printf("\n"
6561              "  merge_sub_root:     %d\n"
6562              "  merge_sub_rank:     %d\n"
6563              "  merge_sub_size:     %d\n"
6564              "  merge_stride:       %d\n"
6565              "  next_merge_stride:  %d\n"
6566              "  n_ranks:            %d\n",
6567              g->merge_sub_root, g->merge_sub_rank, g->merge_sub_size,
6568              g->merge_stride, g->next_merge_stride, g->n_ranks);
6569 
6570   if (g->merge_cell_idx != NULL) {
6571     bft_printf("  merge_cell_idx\n");
6572     for (i = 0; i < g->merge_sub_size + 1; i++)
6573       bft_printf("    %ld: %ld\n", (long)i, (long)g->merge_cell_idx[i]);
6574   }
6575 
6576 #endif
6577   bft_printf("\n"
6578              "  face_cell:      %p\n"
6579              "  _face_cell:     %p\n"
6580              "  coarse_row:     %p\n"
6581              "  coarse_face:    %p\n"
6582              "  halo:           %p\n",
6583              (const void *)g->face_cell, (const void *)g->_face_cell,
6584              (const void *)g->coarse_row, (const void *)g->coarse_face,
6585              (const void *)g->halo);
6586 
6587   if (g->face_cell != NULL) {
6588     bft_printf("\n"
6589                "  face -> cell connectivity;\n");
6590     for (i = 0; i < g->n_faces; i++)
6591       bft_printf("    %ld : %ld, %ld\n", (long)(i+1),
6592                  (long)(g->face_cell[i][0]), (long)(g->face_cell[i][1]));
6593   }
6594 
6595   if (g->coarse_row != NULL && g->parent != NULL) {
6596     bft_printf("\n"
6597                "  coarse_row;\n");
6598     for (i = 0; i < g->parent->n_rows; i++)
6599       bft_printf("    %ld : %ld\n",
6600                  (long)(i+1), (long)(g->coarse_row[i]));
6601   }
6602 
6603   if (g->coarse_face != NULL && g->parent != NULL) {
6604     bft_printf("\n"
6605                "  coarse_face;\n");
6606     for (i = 0; i < g->parent->n_faces; i++)
6607       bft_printf("    %ld : %ld\n",
6608                  (long)(i+1), (long)(g->coarse_face[i]));
6609   }
6610 
6611   cs_halo_dump(g->halo, 1);
6612 }
6613 
6614 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
6615 
6616 /*============================================================================
6617  * Public function definitions
6618  *============================================================================*/
6619 
6620 /*----------------------------------------------------------------------------*/
6621 /*!
6622  * \brief Set matrix tuning behavior for multigrid coarse meshes.
6623  *
6624  * The finest mesh (level 0) is handled by the default tuning options,
6625  * so only coarser meshes are considered here.
6626  *
6627  * \param[in]  fill_type  associated matrix fill type
6628  * \param[in]  max_level  maximum level for which tuning is active
6629  */
6630 /*----------------------------------------------------------------------------*/
6631 
6632 void
cs_grid_set_matrix_tuning(cs_matrix_fill_type_t fill_type,int max_level)6633 cs_grid_set_matrix_tuning(cs_matrix_fill_type_t  fill_type,
6634                           int                    max_level)
6635 {
6636   if (_grid_tune_max_level < max_level) {
6637 
6638     if (_grid_tune_max_level == 0) {
6639       BFT_MALLOC(_grid_tune_max_fill_level, CS_MATRIX_N_FILL_TYPES, int);
6640       for (int i = 0; i < CS_MATRIX_N_FILL_TYPES; i++)
6641         _grid_tune_max_fill_level[i] = 0;
6642     }
6643 
6644     BFT_REALLOC(_grid_tune_variant,
6645                 CS_MATRIX_N_FILL_TYPES*max_level, cs_matrix_variant_t *);
6646 
6647     for (int i = _grid_tune_max_level; i < max_level; i++) {
6648       for (int j = 0; j < CS_MATRIX_N_FILL_TYPES; j++) {
6649         _grid_tune_variant[CS_MATRIX_N_FILL_TYPES*i + j] = NULL;
6650       }
6651     }
6652 
6653     _grid_tune_max_level = max_level;
6654   }
6655 
6656   _grid_tune_max_fill_level[fill_type] = max_level;
6657 }
6658 
6659 /*----------------------------------------------------------------------------*/
6660 
6661 END_C_DECLS
6662