1 /*============================================================================
2 * Grid connectivity and data used for multigrid coarsening
3 * and associated matrix construction.
4 *============================================================================*/
5
6 /*
7 This file is part of Code_Saturne, a general-purpose CFD tool.
8
9 Copyright (C) 1998-2021 EDF S.A.
10
11 This program is free software; you can redistribute it and/or modify it under
12 the terms of the GNU General Public License as published by the Free Software
13 Foundation; either version 2 of the License, or (at your option) any later
14 version.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19 details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
23 Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 */
25
26 /*----------------------------------------------------------------------------*/
27
28 #include "cs_defs.h"
29
30 /*----------------------------------------------------------------------------
31 * Standard C library headers
32 *----------------------------------------------------------------------------*/
33
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40
41 #if defined(__STDC_VERSION__) /* size_t */
42 #if (__STDC_VERSION__ == 199901L)
43 # include <stddef.h>
44 # else
45 # include <stdlib.h>
46 # endif
47 #else
48 #include <stdlib.h>
49 #endif
50
51 #if defined(HAVE_MPI)
52 #include <mpi.h>
53 #endif
54
55 /*----------------------------------------------------------------------------
56 * Local headers
57 *----------------------------------------------------------------------------*/
58
59 #include "bft_mem.h"
60 #include "bft_error.h"
61 #include "bft_printf.h"
62
63 #include "cs_base.h"
64 #include "cs_halo.h"
65 #include "cs_halo_perio.h"
66 #include "cs_log.h"
67 #include "cs_matrix.h"
68 #include "cs_matrix_default.h"
69 #include "cs_matrix_tuning.h"
70 #include "cs_matrix_util.h"
71 #include "cs_order.h"
72 #include "cs_prototypes.h"
73 #include "cs_sles.h"
74 #include "cs_sort.h"
75
76 #include "fvm_defs.h"
77
78 /*----------------------------------------------------------------------------
79 * Header for the current file
80 *----------------------------------------------------------------------------*/
81
82 #include "cs_grid.h"
83
84 /*----------------------------------------------------------------------------*/
85
86 BEGIN_C_DECLS
87
88 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
89
90 /*=============================================================================
91 * Local Macro Definitions
92 *============================================================================*/
93
94 #define EPZERO 1.E-12
95
96 #if !defined(HUGE_VAL)
97 #define HUGE_VAL 1.E+12
98 #endif
99
100 /*=============================================================================
101 * Local Type Definitions
102 *============================================================================*/
103
104 /*----------------------------------------------------------------------------
105 * Local Structure Definitions
106 *----------------------------------------------------------------------------*/
107
108 /* Structure associated with grid */
109 /*--------------------------------*/
110
111 struct _cs_grid_t {
112
113 int level; /* Level in multigrid hierarchy */
114
115 bool conv_diff; /* true if convection/diffusion case,
116 false otherwhise */
117 bool symmetric; /* Symmetric matrix coefficients
118 indicator */
119
120 cs_lnum_t db_size[4]; /* Block sizes for diagonal */
121 cs_lnum_t eb_size[4]; /* Block sizes for extra diagonal */
122
123 cs_gnum_t n_g_rows; /* Global number of rows */
124
125 cs_lnum_t n_rows; /* Local number of rows */
126 cs_lnum_t n_cols_ext; /* Local number of participating cells
127 (cells + ghost cells sharing a face) */
128
129 cs_lnum_t n_elts_r[2]; /* Size of array used for restriction
130 operations ({n_rows, n_cols_ext} when
131 no grid merging has taken place) */
132
133 /* Grid hierarchy information */
134
135 const struct _cs_grid_t *parent; /* Pointer to parent (finer) grid */
136
137 /* Connectivity information */
138
139 cs_lnum_t n_faces; /* Local number of faces */
140 const cs_lnum_2_t *face_cell; /* Face -> cells connectivity (1 to n) */
141 cs_lnum_2_t *_face_cell; /* Face -> cells connectivity
142 (private array) */
143
144 /* Restriction from parent to current level */
145
146 cs_lnum_t *coarse_row; /* Fine -> coarse row connectivity;
147 size: parent n_cols_ext */
148 cs_lnum_t *coarse_face; /* Fine -> coarse face connectivity
149 (1 to n, signed:
150 = 0 fine face inside coarse cell
151 > 0 orientation same as parent
152 < 0 orientation opposite as parent);
153 size: parent n_faces */
154
155 /* Geometric data */
156
157 cs_real_t relaxation; /* P0/P1 relaxation parameter */
158 const cs_real_t *cell_cen; /* Cell center (shared) */
159 cs_real_t *_cell_cen; /* Cell center (private) */
160
161 const cs_real_t *cell_vol; /* Cell volume (shared) */
162 cs_real_t *_cell_vol; /* Cell volume (private) */
163
164 const cs_real_t *face_normal; /* Surface normal of internal faces.
165 (shared; L2 norm = face area) */
166 cs_real_t *_face_normal; /* Surface normal of internal faces.
167 (private; L2 norm = face area) */
168
169 /* Parallel / periodic halo */
170
171 const cs_halo_t *halo; /* Halo for this connectivity (shared) */
172 cs_halo_t *_halo; /* Halo for this connectivity (private) */
173
174 /* Matrix-related data */
175
176 const cs_real_t *da; /* Diagonal (shared) */
177 cs_real_t *_da; /* Diagonal (private) */
178 const cs_real_t *da_conv; /* Diagonal (shared) */
179 cs_real_t *_da_conv; /* Diagonal (private) */
180 const cs_real_t *da_diff; /* Diagonal (shared) */
181 cs_real_t *_da_diff; /* Diagonal (private) */
182
183 const cs_real_t *xa; /* Extra-diagonal (shared) */
184 cs_real_t *_xa; /* Extra-diagonal (private) */
185 const cs_real_t *xa_conv; /* Extra-diagonal (shared) */
186 cs_real_t *_xa_conv; /* Extra-diagonal (private) */
187 const cs_real_t *xa_diff; /* Extra-diagonal (shared) */
188 cs_real_t *_xa_diff; /* Extra-diagonal (private) */
189
190 const cs_real_t *xa0; /* Symmetrized extra-diagonal (shared) */
191 cs_real_t *_xa0; /* Symmetrized extra-diagonal (private) */
192 const cs_real_t *xa0_diff; /* Symmetrized extra-diagonal (shared) */
193 cs_real_t *_xa0_diff; /* Symmetrized extra-diagonal (private) */
194
195 cs_real_t *xa0ij;
196
197 cs_matrix_structure_t *matrix_struct; /* Associated matrix structure */
198 const cs_matrix_t *matrix; /* Associated matrix (shared) */
199 cs_matrix_t *_matrix; /* Associated matrix (private) */
200
201 #if defined(HAVE_MPI)
202
203 /* Additional fields to allow merging grids */
204
205 int merge_sub_root; /* sub-root when merging */
206 int merge_sub_rank; /* sub-rank when merging
207 (0 <= sub-rank < merge_sub_size) */
208 int merge_sub_size; /* current number of merged ranks
209 for this subset */
210 int merge_stride; /* total number of ranks over which
211 merging occurred at previous levels */
212 int next_merge_stride; /* total number of ranks over which
213 merging occurred at current level */
214
215 cs_lnum_t *merge_cell_idx; /* start cell_id for each sub-rank
216 when merge_sub_rank = 0
217 (size: merge_size + 1) */
218
219 int n_ranks; /* Number of active ranks */
220 MPI_Comm comm; /* Associated communicator */
221
222 #endif
223 };
224
225 /* Structure associated with traversal and update of graph */
226 /*---------------------------------------------------------*/
227
228 typedef struct _cs_graph_m_ptr_t {
229
230 cs_lnum_t m_min; /* Minimum local number of columns */
231 cs_lnum_t m_max; /* Maximum local number of columns */
232
233 cs_lnum_t *m_head; /* pointer to head of rows list for each
234 value of m */
235
236 cs_lnum_t *next; /* pointer to next row */
237 cs_lnum_t *prev; /* pointer to previous row */
238
239 } cs_graph_m_ptr_t;
240
241 /*============================================================================
242 * Global variables
243 *============================================================================*/
244
245 cs_real_t _penalization_threshold = 1e4;
246
247 /* Threshold under which diagonal dominant rows are ignored in the
248 aggregation scheme (assuming that dominance allows strong enough
249 convergence for those rows by itself). Y Notay recommends using
250 a value of 5. */
251
252 cs_real_t _dd_threshold_pw = 5;
253
254 /* Names for coarsening options */
255
256 const char *cs_grid_coarsening_type_name[]
257 = {N_("default"),
258 N_("SPD, diag/extra-diag ratio based"),
259 N_("SPD, max extra-diag ratio based"),
260 N_("SPD, (multiple) pairwise aggregation"),
261 N_("convection + diffusion")};
262
263 /* Select tuning options */
264
265 static int _grid_tune_max_level = 0;
266 static int *_grid_tune_max_fill_level = NULL;
267 static cs_matrix_variant_t **_grid_tune_variant = NULL;
268
269 /*============================================================================
270 * Private function definitions
271 *============================================================================*/
272
273 /*----------------------------------------------------------------------------*/
274 /*!
275 * \brief Binary search for a given local id in a given array of
276 * ordered local ids, when the id might not be present
277 *
278 * We assume the id is present in the array.
279 *
280 * \param[in] l_id_array size array_size
281 * \param[in] l_id local id to search for
282 * \param[in] l_id_array ordered unique local ids array
283 *
284 * \return index of l_id in l_id_array, or -1 if not found
285 */
286 /*----------------------------------------------------------------------------*/
287
288 static inline cs_lnum_t
_l_id_binary_search(cs_lnum_t l_id_array_size,cs_lnum_t l_id,const cs_lnum_t l_id_array[])289 _l_id_binary_search(cs_lnum_t l_id_array_size,
290 cs_lnum_t l_id,
291 const cs_lnum_t l_id_array[])
292 {
293 if (l_id_array_size < 1)
294 return -1;
295
296 cs_lnum_t start_id = 0;
297 cs_lnum_t end_id = l_id_array_size - 1;
298 cs_lnum_t mid_id = (end_id -start_id) / 2;
299 while (start_id < end_id) {
300 if (l_id_array[mid_id] < l_id)
301 start_id = mid_id + 1;
302 else if (l_id_array[mid_id] > l_id)
303 end_id = mid_id - 1;
304 else
305 break;
306 mid_id = start_id + ((end_id -start_id) / 2);
307 }
308 if (l_id_array[mid_id] != l_id)
309 mid_id = -1;
310
311 return mid_id;
312 }
313
314 /*----------------------------------------------------------------------------
315 * Reduce block values to a single equivalent value for computation
316 * of aggregation criteria.
317 *
318 * parameters:
319 * n_blocs <-- number of blocks
320 * b_size <-- block dimensions
321 * b_vals <-- block values
322 * b_vals --> scalar values
323 *----------------------------------------------------------------------------*/
324
325 static void
_reduce_block(cs_lnum_t n_blocks,const cs_lnum_t b_size[4],const cs_real_t b_vals[],cs_real_t s_vals[])326 _reduce_block(cs_lnum_t n_blocks,
327 const cs_lnum_t b_size[4],
328 const cs_real_t b_vals[],
329 cs_real_t s_vals[])
330 {
331 const cs_real_t b_div = 1.0 / b_size[0];
332
333 # pragma omp parallel for if(n_blocks > CS_THR_MIN)
334 for (cs_lnum_t i = 0; i < n_blocks; i++) {
335 cs_real_t s = 0;
336 for (cs_lnum_t j = 0; j < b_size[0]; j++)
337 s += b_vals[i*b_size[3] + b_size[2]*j + j];
338 s_vals[i] = s * b_div;
339 }
340 }
341
342 /*----------------------------------------------------------------------------
343 * Allocate empty grid structure
344 *
345 * - Periodic faces are not handled yet
346 *
347 * returns:
348 * empty grid structure
349 *----------------------------------------------------------------------------*/
350
351 static cs_grid_t *
_create_grid(void)352 _create_grid(void)
353 {
354 cs_grid_t *g;
355
356 BFT_MALLOC(g, 1, cs_grid_t);
357
358 g->conv_diff = false;
359 g->symmetric = false;
360
361 g->db_size[0] = 1;
362 g->db_size[1] = 1;
363 g->db_size[2] = 1;
364 g->db_size[3] = 1;
365
366 g->eb_size[0] = 1;
367 g->eb_size[1] = 1;
368 g->eb_size[2] = 1;
369 g->eb_size[3] = 1;
370
371 g->level = 0;
372
373 g->n_g_rows = 0;
374 g->n_rows = 0;
375 g->n_cols_ext = 0;
376
377 g->n_faces = 0;
378
379 g->n_elts_r[0] = 0;
380 g->n_elts_r[1] = 0;
381
382 g->parent = NULL;
383 g->conv_diff = false;
384
385 g->relaxation = 0;
386
387 g->face_cell = NULL;
388 g->_face_cell = NULL;
389
390 g->coarse_row = NULL;
391 g->coarse_face = NULL;
392
393 g->cell_cen = NULL;
394 g->_cell_cen = NULL;
395 g->cell_vol = NULL;
396 g->_cell_vol = NULL;
397 g->face_normal = NULL;
398 g->_face_normal = NULL;
399
400 g->halo = NULL;
401 g->_halo = NULL;
402
403 g->da = NULL;
404 g->_da = NULL;
405 g->da_conv = NULL;
406 g->_da_conv = NULL;
407 g->da_diff = NULL;
408 g->_da_diff = NULL;
409 g->xa = NULL;
410 g->_xa = NULL;
411 g->xa_conv = NULL;
412 g->_xa_conv = NULL;
413 g->xa_diff = NULL;
414 g->_xa_diff = NULL;
415 g->xa0 = NULL;
416 g->_xa0 = NULL;
417 g->xa0_diff = NULL;
418 g->_xa0_diff = NULL;
419
420 g->xa0ij = NULL;
421
422 g->matrix_struct = NULL;
423 g->matrix = NULL;
424 g->_matrix = NULL;
425
426 #if defined(HAVE_MPI)
427
428 g->merge_sub_root = 0;
429 g->merge_sub_rank = 0;
430 g->merge_sub_size = 1;
431 g->merge_stride = 0;
432 g->next_merge_stride = 1;
433
434 g->merge_cell_idx = NULL;
435
436 g->n_ranks = cs_glob_n_ranks;
437 g->comm = cs_glob_mpi_comm;
438
439 #endif
440 return g;
441 }
442
443 /*----------------------------------------------------------------------------
444 * Initialize coarse grid from fine grid
445 *
446 * This creates au quasi-empty grid structure, with only symmetry and
447 * level information initialized, and coarsening array allocated and
448 * zeroed.
449 *
450 * After this function is called, the coarsening array must be determined,
451 * then _coarsen must be called to complete the coarse grid.
452 *
453 * parameters:
454 * f <-- Pointer to fine (parent) grid structure
455 *
456 * returns:
457 * coarse grid structure
458 *----------------------------------------------------------------------------*/
459
460 static cs_grid_t *
_coarse_init(const cs_grid_t * f)461 _coarse_init(const cs_grid_t *f)
462 {
463 cs_lnum_t ii;
464 int i;
465 cs_grid_t *c = NULL;
466
467 c = _create_grid();
468
469 c->parent = f;
470
471 c->level = f->level + 1;
472 c->symmetric = f->symmetric;
473 c->conv_diff = f->conv_diff;
474 for (i = 0; i < 4; i++)
475 c->db_size[i] = f->db_size[i];
476
477 for (i = 0; i < 4; i++)
478 c->eb_size[i] = f->eb_size[i];
479
480 BFT_MALLOC(c->coarse_row, f->n_cols_ext, cs_lnum_t);
481
482 # pragma omp parallel for if(f->n_cols_ext > CS_THR_MIN)
483 for (ii = 0; ii < f->n_cols_ext; ii++)
484 c->coarse_row[ii] = -1;
485
486 #if defined(HAVE_MPI)
487 c->merge_stride = f->merge_stride;
488 c->next_merge_stride = f->next_merge_stride;
489 c->n_ranks = f->n_ranks;
490 c->comm = f->comm;
491 #endif
492
493 return c;
494 }
495
496 /*----------------------------------------------------------------------------
497 * Log aggregation information using the face to cells adjacency.
498 *
499 * parameters:
500 * f <-- Fine grid structure
501 * c <-- Coarse grid structure
502 * verbosity <-- verbosity level
503 *----------------------------------------------------------------------------*/
504
505 static void
_aggregation_stats_log(const cs_grid_t * f,const cs_grid_t * c,int verbosity)506 _aggregation_stats_log(const cs_grid_t *f,
507 const cs_grid_t *c,
508 int verbosity)
509 {
510 const cs_lnum_t f_n_rows = f->n_rows;
511 const cs_lnum_t c_n_rows = c->n_rows;
512
513 cs_gnum_t c_n_rows_g = c->n_rows;
514
515 const cs_lnum_t *c_coarse_row = (const cs_lnum_t *)(c->coarse_row);
516
517 cs_lnum_t *c_aggr_count;
518
519 BFT_MALLOC(c_aggr_count, c_n_rows, cs_lnum_t);
520 for (cs_lnum_t i = 0; i < c_n_rows; i++)
521 c_aggr_count[i] = 0;
522
523 for (cs_lnum_t i = 0; i < f_n_rows; i++) {
524 cs_lnum_t j = c_coarse_row[i];
525 if (j > -1)
526 c_aggr_count[j] += 1;
527 }
528
529 cs_lnum_t aggr_min = f->n_rows, aggr_max = 0;
530 cs_gnum_t aggr_tot = 0;
531
532 for (cs_lnum_t i = 0; i < c_n_rows; i++) {
533 aggr_min = CS_MIN(aggr_min, c_aggr_count[i]);
534 aggr_max = CS_MAX(aggr_max, c_aggr_count[i]);
535 aggr_tot += c_aggr_count[i];
536 }
537
538 #if defined(HAVE_MPI)
539 if (f->comm != MPI_COMM_NULL) {
540 cs_lnum_t _aggr_min = aggr_min, _aggr_max = aggr_max;
541 cs_gnum_t _tot[2] = {aggr_tot, c_n_rows_g}, tot[2] = {0, 0};
542 MPI_Allreduce(&_aggr_min, &aggr_min, 1, CS_MPI_LNUM, MPI_MIN, f->comm);
543 MPI_Allreduce(&_aggr_max, &aggr_max, 1, CS_MPI_LNUM, MPI_MAX, f->comm);
544 MPI_Allreduce(&_tot, &tot, 2, CS_MPI_GNUM, MPI_SUM, f->comm);
545 aggr_tot = tot[0];
546 c_n_rows_g = tot[1];
547 }
548 #endif
549
550 bft_printf(" aggregation min = %ld; max = %ld; mean = %8.2f\n",
551 (long)aggr_min, (long)aggr_max,
552 (double)aggr_tot/(double)c_n_rows_g);
553
554 cs_lnum_t aggr_count = aggr_max - aggr_min + 1;
555
556 if (verbosity > 4 && c_n_rows_g > 0 && aggr_count > 0) {
557
558 /* Histogram */
559
560 bft_printf(" histogram\n");
561
562 cs_lnum_t *histogram;
563 BFT_MALLOC(histogram, aggr_count, cs_lnum_t);
564 for (cs_lnum_t i = 0; i < aggr_count; i++)
565 histogram[i] = 0;
566 for (cs_lnum_t ic = 0; ic < c_n_rows; ic++) {
567 for (cs_lnum_t i = 0; i < aggr_count; i++) {
568 if (c_aggr_count[ic] == aggr_min + i)
569 histogram[i] += 1;
570 }
571 }
572 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
573 if (f->comm != MPI_COMM_NULL)
574 MPI_Allreduce(MPI_IN_PLACE, histogram, aggr_count, CS_MPI_LNUM, MPI_SUM,
575 f->comm);
576 #endif
577 for (cs_lnum_t i = 0; i < aggr_count; i++) {
578 double epsp = 100. * histogram[i] / c_n_rows_g;
579 bft_printf(" aggregation %ld = %8.2f %%\n",
580 (long)(aggr_min + i), epsp);
581 }
582 BFT_FREE(histogram);
583 }
584
585 BFT_FREE(c_aggr_count);
586 }
587
588 /*----------------------------------------------------------------------------
589 * Build fine -> coarse face connectivity and coarse grid face->cell
590 * connectivity (also determining the number of coarse faces) from
591 * fine grid and cell coarsening array.
592 *
593 * Also, it is the caller's responsibility to free arrays coarse_face[]
594 * and coarse_face_cell_id[] when they are no longer used.
595 *
596 * parameters:
597 * fine <-- Pointer to fine (parent) grid structure
598 * coarse_row <-- Fine -> coarse row id
599 * size: fine->n_cols_ext
600 * n_coarse_rows <-- Number of coarse cells
601 * n_coarse_faces --> Number of coarse faces
602 * coarse_face --> Fine -> coarse face connectivity (1 to n, signed:
603 * = 0 fine face inside coarse cell
604 * > 0 orientation same as parent
605 * < 0 orientation opposite as parent);
606 * size: fine->n_faces
607 * coarse_face_cell --> coarse face -> cell connectivity
608 * size: n_coarse_faces
609 *----------------------------------------------------------------------------*/
610
611 static void
_coarsen_faces(const cs_grid_t * fine,const cs_lnum_t * restrict coarse_row,cs_lnum_t n_coarse_rows,cs_lnum_t * n_coarse_faces,cs_lnum_t ** coarse_face,cs_lnum_2_t ** coarse_face_cell)612 _coarsen_faces(const cs_grid_t *fine,
613 const cs_lnum_t *restrict coarse_row,
614 cs_lnum_t n_coarse_rows,
615 cs_lnum_t *n_coarse_faces,
616 cs_lnum_t **coarse_face,
617 cs_lnum_2_t **coarse_face_cell)
618 {
619 cs_lnum_t ii, jj, face_id, connect_size;
620
621 cs_lnum_t *restrict c_cell_cell_cnt = NULL;
622 cs_lnum_t *restrict c_cell_cell_idx = NULL;
623 cs_lnum_t *restrict c_cell_cell_id = NULL;
624 cs_lnum_t *restrict c_cell_cell_face = NULL;
625
626 cs_lnum_t *restrict _coarse_face = NULL;
627 cs_lnum_2_t *restrict _c_face_cell = NULL;
628
629 cs_lnum_t c_n_faces = 0;
630
631 const cs_lnum_t c_n_cells = n_coarse_rows;
632 const cs_lnum_t f_n_faces = fine->n_faces;
633 const cs_lnum_2_t *restrict f_face_cell = fine->face_cell;
634
635 /* Pre-allocate return values
636 (coarse face->cell connectivity is over-allocated) */
637
638 BFT_MALLOC(_coarse_face, f_n_faces, cs_lnum_t);
639 BFT_MALLOC(_c_face_cell, f_n_faces, cs_lnum_2_t);
640
641 # pragma omp parallel for if(f_n_faces > CS_THR_MIN)
642 for (face_id = 0; face_id < f_n_faces; face_id++)
643 _coarse_face[face_id] = 0;
644
645 /* Allocate index */
646
647 BFT_MALLOC(c_cell_cell_idx, c_n_cells + 1, cs_lnum_t);
648
649 # pragma omp parallel for if(c_n_cells > CS_THR_MIN)
650 for (ii = 0; ii <= c_n_cells; ii++)
651 c_cell_cell_idx[ii] = 0;
652
653 for (face_id = 0; face_id < f_n_faces; face_id++) {
654
655 ii = coarse_row[f_face_cell[face_id][0]];
656 jj = coarse_row[f_face_cell[face_id][1]];
657
658 if (ii < jj)
659 c_cell_cell_idx[ii+1] += 1;
660 else if (ii > jj)
661 c_cell_cell_idx[jj+1] += 1;
662
663 }
664
665 /* Transform to index */
666
667 for (ii = 0; ii < c_n_cells; ii++)
668 c_cell_cell_idx[ii+1] += c_cell_cell_idx[ii];
669
670 BFT_MALLOC(c_cell_cell_id,
671 c_cell_cell_idx[c_n_cells],
672 cs_lnum_t);
673
674 BFT_MALLOC(c_cell_cell_face,
675 c_cell_cell_idx[c_n_cells],
676 cs_lnum_t);
677
678 connect_size = c_cell_cell_idx[c_n_cells];
679
680 # pragma omp parallel for if(connect_size > CS_THR_MIN)
681 for (ii = 0; ii < connect_size; ii++)
682 c_cell_cell_face[ii] = 0;
683
684 /* Use a counter for array population, as array will usually
685 not be fully populated */
686
687 BFT_MALLOC(c_cell_cell_cnt, c_n_cells, cs_lnum_t);
688
689 # pragma omp parallel for if(c_n_cells > CS_THR_MIN)
690 for (ii = 0; ii < c_n_cells; ii++)
691 c_cell_cell_cnt[ii] = 0;
692
693 /* Build connectivity */
694
695 c_n_faces = 0;
696
697 for (face_id = 0; face_id < f_n_faces; face_id++) {
698
699 cs_lnum_t kk, start_id, end_id;
700 cs_lnum_t sign = 1;
701
702 ii = coarse_row[f_face_cell[face_id][0]];
703 jj = coarse_row[f_face_cell[face_id][1]];
704
705 if (ii != jj && ii > -1 && jj > -1) {
706
707 if (ii > jj) {
708 sign = -1;
709 kk = ii;
710 ii = jj;
711 jj = kk;
712 }
713
714 start_id = c_cell_cell_idx[ii];
715 end_id = c_cell_cell_idx[ii] + c_cell_cell_cnt[ii];
716
717 for (kk = start_id; kk < end_id; kk++) {
718 if (c_cell_cell_id[kk] == jj)
719 break;
720 }
721 if (kk == end_id) {
722 c_cell_cell_id[kk] = jj;
723 c_cell_cell_face[kk] = c_n_faces + 1;
724 _c_face_cell[c_n_faces][0] = ii;
725 _c_face_cell[c_n_faces][1] = jj;
726 c_n_faces++;
727 c_cell_cell_cnt[ii] += 1;
728 }
729 _coarse_face[face_id] = sign*c_cell_cell_face[kk];
730
731 }
732
733 }
734
735 /* Free temporary arrays */
736
737 BFT_FREE(c_cell_cell_cnt);
738 BFT_FREE(c_cell_cell_face);
739 BFT_FREE(c_cell_cell_id);
740 BFT_FREE(c_cell_cell_idx);
741
742 /* Set return values */
743
744 BFT_REALLOC(_c_face_cell, c_n_faces, cs_lnum_2_t);
745
746 *n_coarse_faces = c_n_faces;
747 *coarse_face = _coarse_face;
748 *coarse_face_cell = _c_face_cell;
749 }
750
751 /*----------------------------------------------------------------------------
752 * Send prepared coarsening ids (based on halo send lists) and receive
753 * similar values to a cell coarsening array halo.
754 *
755 * parameters:
756 * halo <-> pointer to halo structure
757 * coarse_send <-> values defined on halo send lists
758 * coarse_row <-> pointer to local row coarsening array
759 * whose halo values are to be received
760 *----------------------------------------------------------------------------*/
761
762 static void
_exchange_halo_coarsening(const cs_halo_t * halo,cs_lnum_t coarse_send[],cs_lnum_t coarse_row[])763 _exchange_halo_coarsening(const cs_halo_t *halo,
764 cs_lnum_t coarse_send[],
765 cs_lnum_t coarse_row[])
766 {
767 cs_lnum_t i, start, length;
768
769 int local_rank_id = (cs_glob_n_ranks == 1) ? 0 : -1;
770
771 #if defined(HAVE_MPI)
772
773 if (cs_glob_n_ranks > 1) {
774
775 int rank_id;
776 int request_count = 0;
777 const int local_rank = cs_glob_rank_id;
778
779 MPI_Request _request[128];
780 MPI_Request *request = _request;
781 MPI_Status _status[128];
782 MPI_Status *status = _status;
783
784 /* Allocate if necessary */
785
786 if (halo->n_c_domains*2 > 128) {
787 BFT_MALLOC(request, halo->n_c_domains*2, MPI_Request);
788 BFT_MALLOC(status, halo->n_c_domains*2, MPI_Status);
789 }
790
791 /* Receive data from distant ranks */
792
793 for (rank_id = 0; rank_id < halo->n_c_domains; rank_id++) {
794
795 start = halo->index[2*rank_id];
796 length = halo->index[2*rank_id + 2] - halo->index[2*rank_id];
797
798 if (halo->c_domain_rank[rank_id] != local_rank) {
799
800 MPI_Irecv(coarse_row + halo->n_local_elts + start,
801 length,
802 CS_MPI_LNUM,
803 halo->c_domain_rank[rank_id],
804 halo->c_domain_rank[rank_id],
805 cs_glob_mpi_comm,
806 &(request[request_count++]));
807
808 }
809 else
810 local_rank_id = rank_id;
811
812 }
813
814 /* We wait for posting all receives (often recommended) */
815
816 // MPI_Barrier(comm);
817
818 /* Send data to distant ranks */
819
820 for (rank_id = 0; rank_id < halo->n_c_domains; rank_id++) {
821
822 /* If this is not the local rank */
823
824 if (halo->c_domain_rank[rank_id] != local_rank) {
825
826 start = halo->send_index[2*rank_id];
827 length = halo->send_index[2*rank_id + 2] - halo->send_index[2*rank_id];
828
829 MPI_Isend(coarse_send + start,
830 length,
831 CS_MPI_LNUM,
832 halo->c_domain_rank[rank_id],
833 local_rank,
834 cs_glob_mpi_comm,
835 &(request[request_count++]));
836
837 }
838
839 }
840
841 /* Wait for all exchanges */
842
843 MPI_Waitall(request_count, request, status);
844
845 if (request != _request) {
846 BFT_FREE(request);
847 BFT_FREE(status);
848 }
849 }
850
851 #endif /* defined(HAVE_MPI) */
852
853 /* Copy local values in case of periodicity */
854
855 if (halo->n_transforms > 0) {
856
857 if (local_rank_id > -1) {
858
859 cs_lnum_t *_coarse_row
860 = coarse_row + halo->n_local_elts + halo->index[2*local_rank_id];
861
862 start = halo->send_index[2*local_rank_id];
863 length = halo->send_index[2*local_rank_id + 2]
864 - halo->send_index[2*local_rank_id];
865
866 # pragma omp parallel for if(length > CS_THR_MIN)
867 for (i = 0; i < length; i++)
868 _coarse_row[i] = coarse_send[start + i];
869
870 }
871
872 }
873 }
874
875 /*----------------------------------------------------------------------------
876 * Build a halo for a coarse grid from a fine grid and its halo.
877 *
878 * The coarse grid must previously have been initialized with _coarse_init()
879 * and its coarsening indicator determined for the local cells.
880 *
881 * parameters:
882 * f <-- Pointer to fine (parent) grid structure
883 * c <-> Pointer to coarse grid structure
884 *----------------------------------------------------------------------------*/
885
886 static void
_coarsen_halo(const cs_grid_t * f,cs_grid_t * c)887 _coarsen_halo(const cs_grid_t *f,
888 cs_grid_t *c)
889 {
890 int domain_id, tr_id, section_id;
891 cs_lnum_t ii, jj;
892 cs_lnum_t start_id, end_id, sub_count;
893
894 cs_lnum_t *start_end_id = NULL;
895 cs_lnum_t *sub_num = NULL;
896 cs_lnum_t *coarse_send = NULL;
897
898 cs_lnum_t *restrict coarse_row = c->coarse_row;
899
900 cs_halo_t *c_halo = NULL;
901 const cs_halo_t *f_halo = f->halo;
902
903 const cs_lnum_t c_n_rows = c->n_rows;
904
905 const int stride = f_halo->n_c_domains*4;
906 const int n_sections = f_halo->n_transforms + 1;
907 const int n_f_send = f_halo->n_send_elts[1]; /* Size of full list */
908
909 c_halo = cs_halo_create_from_ref(f_halo);
910
911 c->halo = c_halo;
912 c->_halo = c_halo;
913
914 /* Initialize coarse halo counters */
915
916 c_halo->n_local_elts = c_n_rows;
917 c_halo->n_send_elts[0] = 0;
918 c_halo->n_send_elts[1] = 0;
919
920 # pragma omp parallel for if(f->n_cols_ext > CS_THR_MIN)
921 for (ii = f->n_rows; ii < f->n_cols_ext; ii++)
922 coarse_row[ii] = -1;
923
924 /* Allocate and initialize counters */
925
926 BFT_MALLOC(start_end_id, n_sections*2, cs_lnum_t);
927 BFT_MALLOC(sub_num, c_n_rows + 1, cs_lnum_t);
928 BFT_MALLOC(coarse_send, n_f_send, cs_lnum_t);
929
930 /* Sub_num values shifted by 1, so 0 can be used for handling
931 of removed (penalized) rows) */
932
933 sub_num[0] = -2;
934 # pragma omp parallel for if(c_n_rows > CS_THR_MIN)
935 for (ii = 1; ii <= c_n_rows; ii++)
936 sub_num[ii] = -1;
937
938 # pragma omp parallel for if(n_f_send > CS_THR_MIN)
939 for (ii = 0; ii < n_f_send; ii++)
940 coarse_send[ii] = -1;
941
942 /* Counting and marking pass */
943 /*---------------------------*/
944
945 /* Proceed halo section by halo section */
946
947 for (domain_id = 0; domain_id < f_halo->n_c_domains; domain_id++) {
948
949 /* Handle purely parallel cells */
950
951 start_end_id[0] = f_halo->send_index[domain_id*2];
952
953 if (f_halo->n_transforms == 0)
954 start_end_id[1] = f_halo->send_index[(domain_id+1)*2];
955 else
956 start_end_id[1] = f_halo->send_perio_lst[4*domain_id];
957
958 /* Now handle periodic cells transformation by transformation */
959
960 for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
961 start_end_id[tr_id*2 + 2]
962 = f_halo->send_perio_lst[stride*tr_id + 4*domain_id];
963 start_end_id[tr_id*2 + 3]
964 = start_end_id[tr_id*2 + 2]
965 + f_halo->send_perio_lst[stride*tr_id + 4*domain_id + 1];
966 }
967
968 /* We can now loop on sections independently of the transform */
969
970 for (section_id = 0; section_id < n_sections; section_id++) {
971
972 start_id = start_end_id[section_id*2];
973 end_id = start_end_id[section_id*2 + 1];
974
975 sub_count = 0;
976
977 if (section_id > 0)
978 c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id]
979 = c_halo->n_send_elts[0];
980
981 /* Build halo renumbering */
982
983 for (ii = start_id; ii < end_id; ii++) {
984 jj = coarse_row[f_halo->send_list[ii]] + 1;
985 if (sub_num[jj] == -1) {
986 sub_num[jj] = sub_count;
987 sub_count += 1;
988 }
989 coarse_send[ii] = sub_num[jj];
990 }
991
992 /* Update send_list indexes and counts */
993
994 c_halo->n_send_elts[0] += sub_count;
995
996 if (section_id > 0) {
997 c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id + 1]
998 = sub_count;
999 c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id + 2]
1000 = c_halo->n_send_elts[0];
1001 c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id + 3]
1002 = 0;
1003 }
1004
1005 /* Reset sub_num for next section or domain */
1006
1007 for (ii = start_id; ii < end_id; ii++)
1008 sub_num[coarse_row[f_halo->send_list[ii]] + 1] = -1;
1009 sub_num[0] = -2;
1010
1011 }
1012
1013 /* Update send index (initialized at halo creation) */
1014
1015 c_halo->send_index[domain_id*2 + 1] = c_halo->n_send_elts[0];
1016 c_halo->send_index[domain_id*2 + 2] = c_halo->n_send_elts[0];
1017
1018 }
1019
1020 /* Exchange and update coarsening list halo */
1021 /*------------------------------------------*/
1022
1023 _exchange_halo_coarsening(f_halo, coarse_send, coarse_row);
1024
1025 BFT_FREE(coarse_send);
1026
1027 /* Proceed halo section by halo section */
1028
1029 c_halo->n_elts[0] = 0;
1030 c_halo->n_elts[1] = 0;
1031 c_halo->index[0] = 0;
1032
1033 for (domain_id = 0; domain_id < f_halo->n_c_domains; domain_id++) {
1034
1035 /* Handle purely parallel cells */
1036
1037 start_end_id[0] = f_halo->index[domain_id*2];
1038
1039 if (f_halo->n_transforms == 0)
1040 start_end_id[1] = f_halo->index[(domain_id+1)*2];
1041 else
1042 start_end_id[1] = f_halo->perio_lst[4*domain_id];
1043
1044 /* Now handle periodic cells transformation by transformation */
1045
1046 for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
1047 start_end_id[tr_id*2 + 2]
1048 = f_halo->perio_lst[stride*tr_id + 4*domain_id];
1049 start_end_id[tr_id*2 + 3]
1050 = start_end_id[tr_id*2 + 2]
1051 + f_halo->perio_lst[stride*tr_id + 4*domain_id + 1];
1052 }
1053
1054 /* We can now loop on sections independently of the transform */
1055
1056 for (section_id = 0; section_id < n_sections; section_id++) {
1057
1058 start_id = f_halo->n_local_elts + start_end_id[section_id*2];
1059 end_id = f_halo->n_local_elts + start_end_id[section_id*2 + 1];
1060
1061 sub_count = 0;
1062
1063 /* Build halo renumbering */
1064
1065 for (ii = start_id, jj = -1; ii < end_id; ii++) {
1066 if (coarse_row[ii] > -1) {
1067 if (coarse_row[ii] > jj)
1068 jj += 1;
1069 coarse_row[ii] += c_n_rows + c_halo->n_elts[0];
1070 }
1071 }
1072
1073 if (section_id > 0) {
1074 c_halo->perio_lst[stride*(section_id-1) + 4*domain_id]
1075 = c_halo->n_elts[0];
1076 c_halo->perio_lst[stride*(section_id-1) + 4*domain_id + 1]
1077 = jj + 1;
1078 }
1079
1080 c_halo->n_elts[0] += jj + 1;
1081
1082 }
1083
1084 for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
1085 c_halo->perio_lst[stride*tr_id + 4*domain_id + 2]
1086 = c_halo->n_elts[0];
1087 c_halo->perio_lst[stride*tr_id + 4*domain_id + 3]
1088 = 0;
1089 }
1090
1091 /* Update index */
1092
1093 c_halo->n_elts[1] = c_halo->n_elts[0];
1094
1095 c_halo->index[domain_id*2 + 1] = c_halo->n_elts[0];
1096 c_halo->index[domain_id*2 + 2] = c_halo->n_elts[0];
1097 }
1098
1099 /* Pass to build send list */
1100 /*-------------------------*/
1101
1102 CS_MALLOC_HD(c_halo->send_list, c_halo->n_send_elts[0], cs_lnum_t,
1103 CS_ALLOC_HOST);
1104
1105 c_halo->n_send_elts[0] = 0;
1106
1107 /* Proceed halo section by halo section */
1108
1109 for (domain_id = 0; domain_id < f_halo->n_c_domains; domain_id++) {
1110
1111 /* Handle purely parallel cells */
1112
1113 start_end_id[0] = f_halo->send_index[domain_id*2];
1114
1115 if (f_halo->n_transforms == 0)
1116 start_end_id[1] = f_halo->send_index[(domain_id+1)*2];
1117 else
1118 start_end_id[1] = f_halo->send_perio_lst[4*domain_id];
1119
1120 /* Now handle periodic cells transformation by transformation */
1121
1122 for (tr_id = 0; tr_id < f_halo->n_transforms; tr_id++) {
1123 start_end_id[tr_id*2 + 2]
1124 = f_halo->send_perio_lst[stride*tr_id + 4*domain_id];
1125 start_end_id[tr_id*2 + 3]
1126 = start_end_id[tr_id*2 + 2]
1127 + f_halo->send_perio_lst[stride*tr_id + 4*domain_id + 1];
1128 }
1129
1130 /* We can now loop on sections independently of the transform */
1131
1132 for (section_id = 0; section_id < n_sections; section_id++) {
1133
1134 start_id = start_end_id[section_id*2];
1135 end_id = start_end_id[section_id*2 + 1];
1136
1137 sub_count = 0;
1138
1139 if (section_id > 0)
1140 c_halo->send_perio_lst[stride*(section_id-1) + 4*domain_id]
1141 = c_halo->n_send_elts[0];
1142
1143 /* Build halo renumbering */
1144
1145 for (ii = start_id; ii < end_id; ii++) {
1146 jj = coarse_row[f_halo->send_list[ii]] + 1;
1147 if (sub_num[jj] == -1) {
1148 sub_num[jj] = sub_count;
1149 c_halo->send_list[c_halo->n_send_elts[0] + sub_count] = jj - 1;
1150 sub_count += 1;
1151 }
1152 }
1153
1154 c_halo->n_send_elts[0] += sub_count;
1155
1156 /* Reset sub_num for next section or domain */
1157
1158 for (ii = start_id; ii < end_id; ii++)
1159 sub_num[coarse_row[f_halo->send_list[ii]] + 1] = -1;
1160 sub_num[0] = -2;
1161
1162 }
1163
1164 }
1165
1166 c_halo->n_send_elts[1] = c_halo->n_send_elts[0];
1167
1168 /* Free memory */
1169
1170 BFT_FREE(coarse_send);
1171 BFT_FREE(sub_num);
1172 BFT_FREE(start_end_id);
1173 }
1174
1175 /*----------------------------------------------------------------------------
1176 * Build coarse grid from fine grid
1177 *
1178 * The coarse grid must previously have been initialized with _coarse_init()
1179 * and its coarsening indicator determined (at least for the local cells;
1180 * it is extended to halo cells here if necessary).
1181 *
1182 * - Periodic faces are not handled yet
1183 *
1184 * parameters:
1185 * f <-- Pointer to fine (parent) grid structure
1186 * c <-> Pointer to coarse grid structure
1187 *----------------------------------------------------------------------------*/
1188
1189 static void
_coarsen(const cs_grid_t * f,cs_grid_t * c)1190 _coarsen(const cs_grid_t *f,
1191 cs_grid_t *c)
1192 {
1193 cs_lnum_t c_n_rows = 0;
1194
1195 const cs_lnum_t f_n_faces = f->n_faces;
1196 const cs_lnum_t f_n_rows = cs_matrix_get_n_rows(f->matrix);
1197 const cs_lnum_2_t *restrict f_face_cell = f->face_cell;
1198
1199 /* Sanity check */
1200
1201 if (f_face_cell != NULL) {
1202 # pragma omp parallel for if(f_n_faces > CS_THR_MIN)
1203 for (cs_lnum_t face_id = 0; face_id < f_n_faces; face_id++) {
1204 cs_lnum_t ii = f_face_cell[face_id][0];
1205 cs_lnum_t jj = f_face_cell[face_id][1];
1206 if (ii == jj)
1207 bft_error(__FILE__, __LINE__, 0,
1208 _("Connectivity error:\n"
1209 "Face %d has same cell %d on both sides."),
1210 (int)(face_id+1), (int)(ii+1));
1211 }
1212 }
1213
1214 /* Compute number of coarse rows */
1215
1216 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
1217 if (c->coarse_row[ii] >= c_n_rows)
1218 c_n_rows = c->coarse_row[ii] + 1;
1219 }
1220
1221 c->n_rows = c_n_rows;
1222 c->n_g_rows = c_n_rows;
1223
1224 #if defined(HAVE_MPI)
1225 if (cs_glob_n_ranks > 1) {
1226 if (f->comm != MPI_COMM_NULL) {
1227 cs_gnum_t _c_n_rows = c_n_rows;
1228 MPI_Allreduce(&_c_n_rows, &(c->n_g_rows), 1, CS_MPI_GNUM, MPI_SUM,
1229 f->comm);
1230 }
1231 }
1232 #endif
1233
1234 /* Prolong mesh coarsening indicator to halo rows and build
1235 coarse mesh halos if necessary */
1236
1237 if (f->halo != NULL) {
1238 _coarsen_halo(f, c);
1239 c->n_cols_ext = c->n_rows + c->halo->n_elts[0];
1240 }
1241 else
1242 c->n_cols_ext = c_n_rows;
1243
1244 c->n_elts_r[0] = c->n_rows;
1245 c->n_elts_r[1] = c->n_cols_ext;
1246
1247 /* Build face coarsening and coarse grid face -> cells connectivity */
1248
1249 if ( f->face_cell != NULL
1250 && ( c->relaxation > 0
1251 || cs_matrix_get_type(f->matrix) == CS_MATRIX_NATIVE)) {
1252 _coarsen_faces(f,
1253 c->coarse_row,
1254 c->n_rows,
1255 &(c->n_faces),
1256 &(c->coarse_face),
1257 &(c->_face_cell));
1258
1259 c->face_cell = (const cs_lnum_2_t *)(c->_face_cell);
1260 }
1261
1262 }
1263
1264 #if defined(HAVE_MPI)
1265
1266 /*----------------------------------------------------------------------------
1267 * Merge halo info after appending arrays.
1268 *
1269 * parameters:
1270 * h <-> Pointer to halo structure
1271 * new_src_cell_num <-> new list of cells the senders should provide
1272 *----------------------------------------------------------------------------*/
1273
1274 static void
_rebuild_halo_send_lists(cs_halo_t * h,cs_lnum_t new_src_cell_id[])1275 _rebuild_halo_send_lists(cs_halo_t *h,
1276 cs_lnum_t new_src_cell_id[])
1277 {
1278 /* As halos are based on interior faces, every domain is both
1279 sender and receiver */
1280
1281 cs_lnum_t start, length;
1282
1283 int rank_id, tr_id;
1284 int n_sections = 1 + h->n_transforms;
1285 int request_count = 0;
1286 cs_lnum_t *send_buf = NULL, *recv_buf = NULL;
1287 MPI_Status *status = NULL;
1288 MPI_Request *request = NULL;
1289
1290 BFT_MALLOC(status, h->n_c_domains*2, MPI_Status);
1291 BFT_MALLOC(request, h->n_c_domains*2, MPI_Request);
1292 BFT_MALLOC(send_buf, h->n_c_domains*n_sections, cs_lnum_t);
1293 BFT_MALLOC(recv_buf, h->n_c_domains*n_sections, cs_lnum_t);
1294
1295 /* Exchange sizes */
1296
1297 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++)
1298 MPI_Irecv(recv_buf + rank_id*n_sections,
1299 n_sections,
1300 CS_MPI_LNUM,
1301 h->c_domain_rank[rank_id],
1302 h->c_domain_rank[rank_id],
1303 cs_glob_mpi_comm,
1304 &(request[request_count++]));
1305
1306 /* Send data to distant ranks */
1307
1308 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1309 send_buf[rank_id*n_sections]
1310 = h->index[2*(rank_id+1)] - h->index[2*rank_id];
1311 for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1312 send_buf[rank_id*n_sections + tr_id + 1]
1313 = h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1];
1314 }
1315 MPI_Isend(send_buf + rank_id*n_sections,
1316 n_sections,
1317 CS_MPI_LNUM,
1318 h->c_domain_rank[rank_id],
1319 cs_glob_rank_id,
1320 cs_glob_mpi_comm,
1321 &(request[request_count++]));
1322 }
1323
1324 /* Wait for all exchanges */
1325
1326 MPI_Waitall(request_count, request, status);
1327 request_count = 0;
1328
1329 /* Update sizes */
1330
1331 CS_MALLOC_HD(h->send_index, h->n_c_domains*2 + 1, cs_lnum_t, CS_ALLOC_HOST);
1332 h->send_index[0] = 0;
1333 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1334 h->send_index[rank_id*2 + 1]
1335 = h->send_index[rank_id*2] + recv_buf[rank_id*n_sections];
1336 h->send_index[rank_id*2 + 2] = h->send_index[rank_id*2 + 1];
1337 }
1338
1339 /* Update send_perio_lst in case of transforms */
1340
1341 if (h->n_transforms > 0) {
1342 BFT_MALLOC(h->send_perio_lst, h->n_c_domains*h->n_transforms*4, cs_lnum_t);
1343 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1344 cs_lnum_t n_cur_vals = recv_buf[rank_id*n_sections];
1345 for (tr_id = 0; tr_id < h->n_transforms; tr_id++)
1346 n_cur_vals -= recv_buf[rank_id*n_sections + 1 + tr_id];
1347 n_cur_vals += h->send_index[rank_id*2];
1348 for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1349 cs_lnum_t n_tr_vals = recv_buf[rank_id*n_sections + 1 + tr_id];
1350 h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id] = n_cur_vals;
1351 h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1] = n_tr_vals;
1352 n_cur_vals += n_tr_vals;
1353 h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 2] = n_cur_vals;
1354 h->send_perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 3] = 0;
1355 }
1356 }
1357 }
1358
1359 BFT_FREE(send_buf);
1360 BFT_FREE(recv_buf);
1361
1362 h->n_send_elts[0] = h->send_index[h->n_c_domains*2];
1363 h->n_send_elts[1] = h->n_send_elts[0];
1364
1365 CS_MALLOC_HD(h->send_list, h->n_send_elts[0], cs_lnum_t, CS_ALLOC_HOST);
1366
1367 /* Receive data from distant ranks */
1368
1369 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1370 start = h->send_index[2*rank_id];
1371 length = h->send_index[2*rank_id + 1] - h->send_index[2*rank_id];
1372 MPI_Irecv(h->send_list + start,
1373 length,
1374 CS_MPI_LNUM,
1375 h->c_domain_rank[rank_id],
1376 h->c_domain_rank[rank_id],
1377 cs_glob_mpi_comm,
1378 &(request[request_count++]));
1379 }
1380
1381 /* Send data to distant ranks */
1382
1383 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1384 start = h->index[2*rank_id];
1385 length = h->index[2*rank_id + 1] - h->index[2*rank_id];
1386 MPI_Isend(new_src_cell_id + start,
1387 length,
1388 CS_MPI_LNUM,
1389 h->c_domain_rank[rank_id],
1390 cs_glob_rank_id,
1391 cs_glob_mpi_comm,
1392 &(request[request_count++]));
1393 }
1394
1395 /* Wait for all exchanges */
1396
1397 MPI_Waitall(request_count, request, status);
1398
1399 BFT_FREE(request);
1400 BFT_FREE(status);
1401 }
1402
1403 /*----------------------------------------------------------------------------
1404 * Empty a halo that is only useful for global synchronization
1405 *
1406 * parameters:
1407 * h <-> pointer to halo structure being emptyied
1408 *----------------------------------------------------------------------------*/
1409
1410 static void
_empty_halo(cs_halo_t * h)1411 _empty_halo(cs_halo_t *h)
1412 {
1413 if (h == NULL)
1414 return;
1415
1416 h->n_c_domains = 0;
1417 BFT_FREE(h->c_domain_rank);
1418
1419 h->n_send_elts[0] = 0;
1420 h->n_send_elts[1] = 0;
1421 h->n_elts[0] = 0;
1422 h->n_elts[1] = 0;
1423
1424 CS_FREE_HD(h->send_list);
1425 CS_FREE_HD(h->send_index);
1426 BFT_FREE(h->send_perio_lst);
1427 BFT_FREE(h->index);
1428 BFT_FREE(h->perio_lst);
1429 }
1430
1431 /*----------------------------------------------------------------------------
1432 * Merge halo info after appending arrays.
1433 *
1434 * parameters:
1435 * h <-> pointer to halo structure
1436 * loc_rank_id <-- local rank id
1437 * n_new_cells <-- number of new local cells
1438 * new_src_cell_id <-> in: new halo sender cell id matching halo cell;
1439 * out: new list of cells the senders should provide
1440 * (same numbers, merged and possibly reordered)
1441 * new_halo_cell_id --> new cell id for each halo cell
1442 * (< n_new_cells for cells that have become local,
1443 * >= n_new_cells for cells still in halo)
1444 *----------------------------------------------------------------------------*/
1445
1446 static void
_merge_halo_data(cs_halo_t * h,int loc_rank_id,cs_lnum_t n_new_cells,cs_lnum_t new_src_cell_id[],cs_lnum_t new_halo_cell_id[])1447 _merge_halo_data(cs_halo_t *h,
1448 int loc_rank_id,
1449 cs_lnum_t n_new_cells,
1450 cs_lnum_t new_src_cell_id[],
1451 cs_lnum_t new_halo_cell_id[])
1452 {
1453 int rank_id, prev_rank_id, tr_id, prev_section_id;
1454 cs_lnum_t ii, ii_0, ii_1, cur_id, section_id, src_id, prev_src_id;
1455
1456 int stride = (h->n_transforms > 0) ? 3 : 2;
1457 int n_c_domains_ini = h->n_c_domains;
1458
1459 cs_lnum_t n_elts_ini = h->n_elts[0];
1460 cs_lnum_t *order = NULL, *section_idx = NULL;
1461 cs_gnum_t *tmp_num = NULL;
1462
1463 const int n_sections = h->n_transforms + 1;
1464
1465 if (h->n_elts[0] < 1) {
1466 _empty_halo(h);
1467 return;
1468 }
1469
1470 /* Order list by rank, transform, and new element number */
1471
1472 BFT_MALLOC(tmp_num, n_elts_ini*stride, cs_gnum_t);
1473
1474 for (rank_id = 0; rank_id < n_c_domains_ini; rank_id++) {
1475 for (ii = h->index[rank_id*2];
1476 ii < h->index[(rank_id+1)*2];
1477 ii++)
1478 tmp_num[ii*stride] = h->c_domain_rank[rank_id];
1479 }
1480
1481 if (stride == 2) {
1482 for (ii = 0; ii < n_elts_ini; ii++)
1483 tmp_num[ii*2 + 1] = new_src_cell_id[ii];
1484 }
1485 else { /* if (stride == 3) */
1486
1487 for (ii = 0; ii < n_elts_ini; ii++) {
1488 tmp_num[ii*3 + 1] = 0;
1489 tmp_num[ii*3 + 2] = new_src_cell_id[ii];
1490 }
1491
1492 for (rank_id = 0; rank_id < n_c_domains_ini; rank_id++) {
1493 for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1494 ii_0 = h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id];
1495 ii_1 = ii_0 + h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1];
1496 for (ii = ii_0; ii < ii_1; ii++)
1497 tmp_num[ii*3 + 1] = tr_id + 1;
1498 }
1499 }
1500 }
1501
1502 order = cs_order_gnum_s(NULL, tmp_num, stride, n_elts_ini);
1503
1504 /* Rebuild lists and build renumbering */
1505
1506 cur_id = order[0];
1507 h->n_c_domains = 0;
1508 h->index[0] = 0;
1509 h->n_elts[0] = 0;
1510 h->n_elts[1] = 0;
1511
1512 prev_rank_id = -1;
1513 prev_src_id = 0;
1514
1515 if (stride == 2) {
1516
1517 for (ii = 0; ii < n_elts_ini; ii++) {
1518
1519 bool is_same = true;
1520 cur_id = order[ii];
1521
1522 rank_id = tmp_num[cur_id*2];
1523 src_id = tmp_num[cur_id*2 + 1];
1524
1525 if (rank_id != loc_rank_id) {
1526
1527 if (rank_id != prev_rank_id) {
1528 h->c_domain_rank[h->n_c_domains] = rank_id;
1529 h->index[h->n_c_domains*2] = h->n_elts[0];
1530 h->n_c_domains += 1;
1531 is_same = false;
1532 }
1533 if (src_id != prev_src_id)
1534 is_same = false;
1535
1536 if (is_same == false) {
1537 new_src_cell_id[h->n_elts[0]] = src_id;
1538 h->n_elts[0] += 1;
1539 }
1540
1541 new_halo_cell_id[cur_id] = n_new_cells + h->n_elts[0] - 1;
1542
1543 prev_rank_id = rank_id;
1544 prev_src_id = src_id;
1545
1546 }
1547 else { /* if (rank_id == loc_rank_id) */
1548 new_halo_cell_id[cur_id] = tmp_num[cur_id*2 + 1];
1549 assert(new_halo_cell_id[cur_id] < n_new_cells);
1550 }
1551 }
1552
1553 }
1554 else { /* if (stride == 3) */
1555
1556 int rank_idx = -1;
1557 cs_lnum_t section_idx_size = 0;
1558
1559 prev_section_id = -1;
1560
1561 /* Index will be initialized as count */
1562
1563 for (ii = 0; ii < n_elts_ini; ii++) {
1564
1565 bool is_same = true;
1566 cur_id = order[ii];
1567
1568 rank_id = tmp_num[cur_id*3];
1569 section_id = tmp_num[cur_id*3 + 1];
1570 src_id = tmp_num[cur_id*3 + 2];
1571
1572 if (rank_id != loc_rank_id || tmp_num[cur_id*3 + 1] != 0) {
1573
1574 if (rank_id != prev_rank_id) {
1575 h->c_domain_rank[h->n_c_domains] = rank_id;
1576 h->index[h->n_c_domains*2] = h->n_elts[0];
1577 h->n_c_domains += 1;
1578 is_same = false;
1579 rank_idx += 1;
1580 }
1581 if (section_id != prev_section_id)
1582 is_same = false;
1583 if (src_id != prev_src_id)
1584 is_same = false;
1585
1586 if (is_same == false) {
1587 new_src_cell_id[h->n_elts[0]] = src_id;
1588 h->n_elts[0] += 1;
1589 while (rank_idx*n_sections + section_id + 1 >= section_idx_size) {
1590 cs_lnum_t section_idx_size_prv = section_idx_size;
1591 section_idx_size
1592 = (section_idx_size_prv > 0) ? section_idx_size_prv*2 : 16;
1593 BFT_REALLOC(section_idx, section_idx_size, cs_lnum_t);
1594 for (cs_lnum_t sid = section_idx_size_prv;
1595 sid < section_idx_size;
1596 sid++)
1597 section_idx[sid] = 0;
1598 };
1599 section_idx[rank_idx*n_sections + section_id + 1] += 1;
1600 }
1601
1602 new_halo_cell_id[cur_id] = n_new_cells + h->n_elts[0] - 1;
1603
1604 prev_rank_id = rank_id;
1605 prev_section_id = section_id;
1606 prev_src_id = src_id;
1607
1608 }
1609 else { /* if (rank_id == loc_rank_id && tmp_num[cur_id*3 + 1] == 0) */
1610 new_halo_cell_id[cur_id] = tmp_num[cur_id*3 + 2];
1611 assert(new_halo_cell_id[cur_id] < n_new_cells);
1612 }
1613
1614 }
1615
1616 /* Transform count to index */
1617
1618 section_idx_size = n_sections * h->n_c_domains + 1;
1619 for (cs_lnum_t sid = 1; sid < section_idx_size; sid++)
1620 section_idx[sid] += section_idx[sid - 1];
1621 }
1622
1623 if (h->n_transforms > 0) {
1624
1625 for (tr_id = 0; tr_id < h->n_transforms; tr_id++) {
1626 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1627 section_id = rank_id*n_sections + tr_id + 1;
1628 h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id]
1629 = section_idx[section_id];
1630 h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 1]
1631 = section_idx[section_id + 1] - section_idx[section_id];
1632 h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 2]
1633 = section_idx[section_id + 1];
1634 h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + 3]
1635 = 0;
1636 }
1637 }
1638
1639 BFT_FREE(section_idx);
1640
1641 }
1642
1643 /* Free and resize memory */
1644
1645 BFT_FREE(order);
1646 BFT_FREE(tmp_num);
1647 BFT_REALLOC(h->c_domain_rank, h->n_c_domains, int);
1648 BFT_REALLOC(h->index, h->n_c_domains*2+1, cs_lnum_t);
1649 if (h->n_transforms > 0)
1650 BFT_REALLOC(h->perio_lst,
1651 h->n_c_domains * h->n_transforms * 4,
1652 cs_lnum_t);
1653
1654 h->n_elts[1] = h->n_elts[0];
1655 h->index[h->n_c_domains*2] = h->n_elts[0];
1656 for (ii = 0; ii < h->n_c_domains; ii++)
1657 h->index[ii*2+1] = h->index[ii*2+2];
1658 }
1659
1660 /*----------------------------------------------------------------------------
1661 * Append halo info for grid grouping and merging.
1662 *
1663 * parameters:
1664 * g <-> pointer to grid structure being merged
1665 * new_cell_id <-> new cell ids for local cells
1666 * in: defined for local cells
1667 * out: updated also for halo cells
1668 *----------------------------------------------------------------------------*/
1669
1670 static void
_append_halos(cs_grid_t * g,cs_lnum_t * new_cell_id)1671 _append_halos(cs_grid_t *g,
1672 cs_lnum_t *new_cell_id)
1673 {
1674 cs_lnum_t ii, jj;
1675 int rank_id;
1676 int counts[3];
1677
1678 int *recv_count = NULL;
1679 cs_lnum_t *new_src_cell_id = NULL, *new_halo_cell_id = NULL;
1680
1681 cs_halo_t *h = g->_halo;
1682
1683 MPI_Status status;
1684 MPI_Comm comm = cs_glob_mpi_comm;
1685
1686 const int n_transforms = h->n_transforms;
1687 static const int tag = 'a'+'p'+'p'+'e'+'n'+'d'+'_'+'h';
1688
1689 /* Remove send indexes, which will be rebuilt */
1690
1691 h->n_send_elts[0] = 0;
1692 h->n_send_elts[1] = 0;
1693 CS_FREE_HD(h->send_list);
1694 CS_FREE_HD(h->send_index);
1695 BFT_FREE(h->send_perio_lst);
1696
1697 if (g->merge_sub_size == 0) {
1698 _empty_halo(h);
1699 return;
1700 }
1701
1702 /* Adjust numbering */
1703
1704 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1705 int init_rank_id = h->c_domain_rank[rank_id];
1706 h->c_domain_rank[rank_id]
1707 = init_rank_id - (init_rank_id % g->next_merge_stride);
1708 }
1709
1710 counts[0] = h->n_c_domains;
1711 counts[1] = h->n_local_elts;
1712 counts[2] = h->n_elts[0];
1713
1714 /* Exchange counters needed for concatenation */
1715
1716 if (g->merge_sub_rank == 0) {
1717 BFT_MALLOC(recv_count, g->merge_sub_size*3, int);
1718 for (ii = 0; ii < 3; ii++)
1719 recv_count[ii] = counts[ii];
1720 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1721 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1722 MPI_Recv(recv_count + 3*rank_id, 3, MPI_INT, dist_rank,
1723 tag, comm, &status);
1724 for (ii = 0; ii < 3; ii++)
1725 counts[ii] += recv_count[rank_id*3 + ii];
1726 }
1727 }
1728 else
1729 MPI_Send(counts, 3, MPI_INT, g->merge_sub_root, tag, comm);
1730
1731 /* In case of periodic transforms, transpose perio list
1732 so as to have blocs of fixed n_transforms size, easier
1733 to work with for append + merge operations */
1734
1735 if (n_transforms > 0) {
1736 cs_lnum_t *perio_list_tr;
1737 BFT_MALLOC(perio_list_tr, h->n_c_domains*n_transforms*4, cs_lnum_t);
1738 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1739 for (int tr_id = 0; tr_id < n_transforms; tr_id++) {
1740 for (int k = 0; k < 4; k++)
1741 perio_list_tr[n_transforms*4*rank_id + 4*tr_id + k]
1742 = h->perio_lst[h->n_c_domains*4*tr_id + 4*rank_id + k];
1743 }
1744 }
1745 memcpy(h->perio_lst,
1746 perio_list_tr,
1747 h->n_c_domains*n_transforms*4*sizeof(cs_lnum_t));
1748 BFT_FREE(perio_list_tr);
1749 }
1750
1751 /* Reallocate arrays for receiving rank and append data */
1752
1753 if (g->merge_sub_rank == 0) {
1754
1755 BFT_MALLOC(new_src_cell_id, counts[2], cs_lnum_t);
1756 for (ii = g->n_rows, jj = 0; ii < g->n_cols_ext; ii++, jj++)
1757 new_src_cell_id[jj] = new_cell_id[ii];
1758
1759 BFT_REALLOC(h->c_domain_rank, counts[0], int);
1760 BFT_REALLOC(h->index, counts[0]*2 + 1, cs_lnum_t);
1761 BFT_REALLOC(h->perio_lst, counts[0]*n_transforms*4, cs_lnum_t);
1762
1763 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1764
1765 int n_c_domains_r = recv_count[rank_id*3 + 0];
1766 cs_lnum_t n_recv = recv_count[rank_id*3 + 2];
1767
1768 cs_lnum_t index_shift = h->index[2*h->n_c_domains];
1769 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1770
1771 MPI_Recv(h->c_domain_rank + h->n_c_domains, n_c_domains_r,
1772 MPI_INT, dist_rank, tag, comm, &status);
1773 MPI_Recv(new_src_cell_id + h->n_elts[0], n_recv,
1774 CS_MPI_LNUM, dist_rank, tag, comm, &status);
1775 MPI_Recv(h->index + h->n_c_domains*2, n_c_domains_r*2+1,
1776 CS_MPI_LNUM, dist_rank, tag, comm, &status);
1777
1778 for (ii = 0, jj = h->n_c_domains*2;
1779 ii < n_c_domains_r*2+1;
1780 ii++, jj++)
1781 h->index[jj] += index_shift;
1782
1783 if (n_transforms > 0) {
1784 MPI_Recv(h->perio_lst + h->n_c_domains*n_transforms*4,
1785 n_c_domains_r*n_transforms*4,
1786 CS_MPI_LNUM, dist_rank, tag, comm, &status);
1787 for (ii = 0, jj = h->n_c_domains*n_transforms*4;
1788 ii < n_c_domains_r*n_transforms*4;
1789 ii+=2, jj+=2)
1790 h->perio_lst[jj] += index_shift;
1791 }
1792
1793 /* Update halo sizes */
1794
1795 h->n_local_elts += recv_count[rank_id*3 + 1];
1796 h->n_c_domains += n_c_domains_r;
1797 h->n_elts[0] += n_recv;
1798 h->n_elts[1] = h->n_elts[0];
1799 }
1800
1801 }
1802 else if (g->merge_sub_size > 1) {
1803
1804 BFT_MALLOC(new_src_cell_id, h->n_elts[0], cs_lnum_t);
1805 for (ii = g->n_rows, jj = 0; ii < g->n_cols_ext; ii++, jj++)
1806 new_src_cell_id[jj] = new_cell_id[ii];
1807
1808 MPI_Send(h->c_domain_rank, h->n_c_domains, MPI_INT,
1809 g->merge_sub_root, tag, comm);
1810 MPI_Send(new_src_cell_id, h->n_elts[0], CS_MPI_LNUM,
1811 g->merge_sub_root, tag, comm);
1812 MPI_Send(h->index, h->n_c_domains*2+1, CS_MPI_LNUM,
1813 g->merge_sub_root, tag, comm);
1814
1815 if (n_transforms > 0)
1816 MPI_Send(h->perio_lst, h->n_c_domains*n_transforms*4,
1817 CS_MPI_LNUM, g->merge_sub_root, tag, comm);
1818
1819 _empty_halo(h);
1820 }
1821
1822 /* Cleanup halo and set pointer for coarsening (sub_rooot) ranks*/
1823
1824 if (h != NULL) {
1825
1826 /* In case of periodic transforms, transpose perio list back to its
1827 standard order */
1828
1829 if (n_transforms > 0) {
1830 cs_lnum_t *perio_list_tr;
1831 BFT_MALLOC(perio_list_tr, h->n_c_domains*n_transforms*4, cs_lnum_t);
1832 for (int tr_id = 0; tr_id < n_transforms; tr_id++) {
1833 for (rank_id = 0; rank_id < h->n_c_domains; rank_id++) {
1834 for (int k = 0; k < 4; k++)
1835 perio_list_tr[h->n_c_domains*4*tr_id + 4*rank_id + k]
1836 = h->perio_lst[n_transforms*4*rank_id + 4*tr_id + k];
1837 }
1838 }
1839 memcpy(h->perio_lst,
1840 perio_list_tr,
1841 h->n_c_domains*n_transforms*4*sizeof(cs_lnum_t));
1842 BFT_FREE(perio_list_tr);
1843 }
1844
1845 /* Now merge data */
1846
1847 BFT_MALLOC(new_halo_cell_id, h->n_elts[0], cs_lnum_t);
1848
1849 _merge_halo_data(h,
1850 cs_glob_rank_id,
1851 counts[1],
1852 new_src_cell_id,
1853 new_halo_cell_id);
1854
1855 _rebuild_halo_send_lists(h, new_src_cell_id);
1856
1857 }
1858
1859 if (new_src_cell_id != NULL)
1860 BFT_FREE(new_src_cell_id);
1861
1862 g->halo = h;
1863 g->_halo = h;
1864
1865 /* Finally, update halo section of cell renumbering array */
1866
1867 if (g->merge_sub_rank == 0) {
1868
1869 cs_lnum_t n_send = recv_count[2];
1870 cs_lnum_t send_shift = n_send;
1871
1872 for (ii = 0; ii < n_send; ii++)
1873 new_cell_id[g->n_rows + ii] = new_halo_cell_id[ii];
1874
1875 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1876 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1877 n_send = recv_count[rank_id*3 + 2];
1878 MPI_Send(new_halo_cell_id + send_shift, n_send, CS_MPI_LNUM,
1879 dist_rank, tag, comm);
1880 send_shift += n_send;
1881 }
1882
1883 BFT_FREE(recv_count);
1884 }
1885 else if (g->merge_sub_size > 1)
1886 MPI_Recv(new_cell_id + g->n_rows, g->n_cols_ext - g->n_rows,
1887 CS_MPI_LNUM, g->merge_sub_root, tag, comm, &status);
1888
1889 BFT_FREE(new_halo_cell_id);
1890 }
1891
1892 /*----------------------------------------------------------------------------
1893 * Append cell arrays and counts for grid grouping and merging.
1894 *
1895 * parameters:
1896 * g <-> pointer to grid structure being merged
1897 *----------------------------------------------------------------------------*/
1898
1899 static void
_append_cell_data(cs_grid_t * g)1900 _append_cell_data(cs_grid_t *g)
1901 {
1902 int rank_id;
1903
1904 MPI_Status status;
1905 MPI_Comm comm = cs_glob_mpi_comm;
1906
1907 static const int tag = 'a'+'p'+'p'+'e'+'n'+'d'+'_'+'c';
1908
1909 /* Reallocate arrays for receiving rank and append data */
1910
1911 if (g->merge_sub_rank == 0) {
1912
1913 g->n_rows = g->merge_cell_idx[g->merge_sub_size];
1914 g->n_cols_ext = g->n_rows + g->halo->n_elts[0];
1915
1916 if (g->relaxation > 0) {
1917 BFT_REALLOC(g->_cell_cen, g->n_cols_ext * 3, cs_real_t);
1918 BFT_REALLOC(g->_cell_vol, g->n_cols_ext, cs_real_t);
1919 }
1920 BFT_REALLOC(g->_da, g->n_cols_ext*g->db_size[3], cs_real_t);
1921
1922 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
1923
1924 cs_lnum_t n_recv = ( g->merge_cell_idx[rank_id+1]
1925 - g->merge_cell_idx[rank_id]);
1926 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
1927
1928 if (g->relaxation > 0) {
1929 MPI_Recv(g->_cell_cen + g->merge_cell_idx[rank_id]*3, n_recv*3,
1930 CS_MPI_REAL, dist_rank, tag, comm, &status);
1931
1932 MPI_Recv(g->_cell_vol + g->merge_cell_idx[rank_id], n_recv,
1933 CS_MPI_REAL, dist_rank, tag, comm, &status);
1934 }
1935
1936 MPI_Recv(g->_da + g->merge_cell_idx[rank_id]*g->db_size[3],
1937 n_recv*g->db_size[3],
1938 CS_MPI_REAL, dist_rank, tag, comm, &status);
1939
1940 }
1941
1942 }
1943 else {
1944 if (g->relaxation > 0) {
1945 MPI_Send(g->_cell_cen, g->n_rows*3, CS_MPI_REAL, g->merge_sub_root,
1946 tag, comm);
1947 BFT_FREE(g->_cell_cen);
1948
1949 MPI_Send(g->_cell_vol, g->n_rows, CS_MPI_REAL, g->merge_sub_root,
1950 tag, comm);
1951 BFT_FREE(g->_cell_vol);
1952 }
1953
1954 MPI_Send(g->_da, g->n_rows*g->db_size[3], CS_MPI_REAL,
1955 g->merge_sub_root, tag, comm);
1956 BFT_FREE(g->_da);
1957
1958 g->n_rows = 0;
1959 g->n_cols_ext = 0;
1960 }
1961
1962 if (g->relaxation > 0) {
1963 g->cell_cen = g->_cell_cen;
1964 g->cell_vol = g->_cell_vol;
1965 }
1966
1967 g->da = g->_da;
1968 }
1969
1970 /*----------------------------------------------------------------------------
1971 * Synchronize cell array values for grid grouping and merging.
1972 *
1973 * parameters:
1974 * g <-> pointer to grid structure being merged
1975 *----------------------------------------------------------------------------*/
1976
1977 static void
_sync_merged_cell_data(cs_grid_t * g)1978 _sync_merged_cell_data(cs_grid_t *g)
1979 {
1980 if (g->halo != NULL) {
1981
1982 if (g->relaxation > 0) {
1983 cs_halo_sync_var_strided(g->halo, CS_HALO_STANDARD, g->_cell_cen, 3);
1984 if (g->halo->n_transforms > 0)
1985 cs_halo_perio_sync_coords(g->halo, CS_HALO_STANDARD, g->_cell_cen);
1986
1987 cs_halo_sync_var(g->halo, CS_HALO_STANDARD, g->_cell_vol);
1988 }
1989
1990 cs_halo_sync_var_strided(g->halo, CS_HALO_STANDARD,
1991 g->_da, g->db_size[3]);
1992
1993 }
1994 }
1995
1996 /*----------------------------------------------------------------------------
1997 * Append face arrays and counts for grid grouping and merging.
1998 *
1999 * parameters:
2000 * g <-> pointer to grid structure being merged
2001 * n_faces <-- number of faces to append
2002 * new_cel_num <-> new cell numbering for local cells
2003 * in: defined for local cells
2004 * out: updated also for halo cells
2005 *----------------------------------------------------------------------------*/
2006
2007 static void
_append_face_data(cs_grid_t * g,cs_lnum_t n_faces,cs_lnum_t * face_list)2008 _append_face_data(cs_grid_t *g,
2009 cs_lnum_t n_faces,
2010 cs_lnum_t *face_list)
2011 {
2012 int rank_id;
2013
2014 cs_lnum_t *recv_count = NULL;
2015
2016 MPI_Status status;
2017 MPI_Comm comm = cs_glob_mpi_comm;
2018
2019 static const int tag = 'a'+'p'+'p'+'e'+'n'+'d'+'_'+'f';
2020
2021 /* Exchange counters needed for concatenation */
2022
2023 if (g->merge_sub_rank == 0) {
2024 BFT_MALLOC(recv_count, g->merge_sub_size, cs_lnum_t);
2025 recv_count[0] = g->n_faces;
2026 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2027 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2028 MPI_Recv(recv_count + rank_id, 1, CS_MPI_LNUM, dist_rank,
2029 tag, comm, &status);
2030 }
2031 }
2032 else
2033 MPI_Send(&n_faces, 1, CS_MPI_LNUM, g->merge_sub_root, tag, comm);
2034
2035 /* Reallocate arrays for receiving rank and append data */
2036
2037 BFT_FREE(g->coarse_face);
2038
2039 if (g->merge_sub_rank == 0) {
2040
2041 cs_lnum_t n_faces_tot = 0;
2042
2043 for (rank_id = 0; rank_id < g->merge_sub_size; rank_id++)
2044 n_faces_tot += recv_count[rank_id];
2045
2046 BFT_REALLOC(g->_face_cell, n_faces_tot, cs_lnum_2_t);
2047
2048 if (g->symmetric == true)
2049 BFT_REALLOC(g->_xa, n_faces_tot, cs_real_t);
2050 else
2051 BFT_REALLOC(g->_xa, n_faces_tot*2, cs_real_t);
2052
2053 if (g->relaxation > 0) {
2054
2055 BFT_REALLOC(g->_face_normal, n_faces_tot*3, cs_real_t);
2056
2057 BFT_REALLOC(g->_xa0, n_faces_tot, cs_real_t);
2058 BFT_REALLOC(g->xa0ij, n_faces_tot*3, cs_real_t);
2059
2060 }
2061
2062 g->n_faces = recv_count[0];
2063
2064 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2065
2066 cs_lnum_t n_recv = recv_count[rank_id];
2067 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2068
2069 MPI_Recv(g->_face_cell + g->n_faces, n_recv*2,
2070 CS_MPI_LNUM, dist_rank, tag, comm, &status);
2071
2072 if (g->symmetric == true)
2073 MPI_Recv(g->_xa + g->n_faces, n_recv,
2074 CS_MPI_REAL, dist_rank, tag, comm, &status);
2075 else
2076 MPI_Recv(g->_xa + g->n_faces*2, n_recv*2,
2077 CS_MPI_REAL, dist_rank, tag, comm, &status);
2078
2079 if (g->relaxation > 0) {
2080
2081 MPI_Recv(g->_face_normal + g->n_faces*3, n_recv*3,
2082 CS_MPI_REAL, dist_rank, tag, comm, &status);
2083
2084 MPI_Recv(g->_xa0 + g->n_faces, n_recv,
2085 CS_MPI_REAL, dist_rank, tag, comm, &status);
2086
2087 MPI_Recv(g->xa0ij + g->n_faces*3, n_recv*3,
2088 CS_MPI_REAL, dist_rank, tag, comm, &status);
2089
2090 }
2091
2092 g->n_faces += recv_count[rank_id];
2093 }
2094
2095 BFT_FREE(recv_count);
2096
2097 }
2098 else {
2099
2100 cs_lnum_t face_id = 0;
2101
2102 /* Filter face connectivity then send it */
2103
2104 for (face_id = 0; face_id < n_faces; face_id++) {
2105 cs_lnum_t p_face_id = face_list[face_id];
2106 g->_face_cell[face_id][0] = g->_face_cell[p_face_id][0];
2107 g->_face_cell[face_id][1] = g->_face_cell[p_face_id][1];
2108 if (g->symmetric == true)
2109 g->_xa[face_id] = g->_xa[p_face_id];
2110 else {
2111 g->_xa[face_id*2] = g->_xa[p_face_id*2];
2112 g->_xa[face_id*2+1] = g->_xa[p_face_id*2+1];
2113 }
2114 if (g->relaxation > 0) {
2115 g->_face_normal[face_id*3] = g->_face_normal[p_face_id*3];
2116 g->_face_normal[face_id*3 + 1] = g->_face_normal[p_face_id*3 + 1];
2117 g->_face_normal[face_id*3 + 2] = g->_face_normal[p_face_id*3 + 2];
2118 g->_xa0[face_id] = g->_xa0[p_face_id];
2119 g->xa0ij[face_id*3] = g->xa0ij[p_face_id*3];
2120 g->xa0ij[face_id*3 + 1] = g->xa0ij[p_face_id*3 + 1];
2121 g->xa0ij[face_id*3 + 2] = g->xa0ij[p_face_id*3 + 2];
2122 }
2123 }
2124
2125 MPI_Send(g->_face_cell, n_faces*2, CS_MPI_LNUM,
2126 g->merge_sub_root, tag, comm);
2127 BFT_FREE(g->_face_cell);
2128
2129 if (g->symmetric == true)
2130 MPI_Send(g->_xa, n_faces, CS_MPI_REAL,
2131 g->merge_sub_root, tag, comm);
2132 else
2133 MPI_Send(g->_xa, n_faces*2, CS_MPI_REAL,
2134 g->merge_sub_root, tag, comm);
2135 BFT_FREE(g->_xa);
2136
2137 if (g->relaxation > 0) {
2138 MPI_Send(g->_face_normal, n_faces*3, CS_MPI_REAL,
2139 g->merge_sub_root, tag, comm);
2140 BFT_FREE(g->_face_normal);
2141
2142 MPI_Send(g->_xa0, n_faces, CS_MPI_REAL,
2143 g->merge_sub_root, tag, comm);
2144 BFT_FREE(g->_xa0);
2145
2146 MPI_Send(g->xa0ij, n_faces*3, CS_MPI_REAL,
2147 g->merge_sub_root, tag, comm);
2148 BFT_FREE(g->xa0ij);
2149 }
2150
2151 g->n_faces = 0;
2152 }
2153
2154 g->face_cell = (const cs_lnum_2_t *)(g->_face_cell);
2155 g->xa = g->_xa;
2156 if (g->relaxation > 0) {
2157 g->face_normal = g->_face_normal;
2158 g->xa0 = g->_xa0;
2159 }
2160 }
2161
2162 /*----------------------------------------------------------------------------
2163 * Merge grids on several ranks to a grid on a single rank
2164 *
2165 * The coarse grid must previously have been initialized with _coarse_init()
2166 * and its coarsening indicator determined (at least for the local cells;
2167 * it is extended to halo cells here if necessary).
2168 *
2169 * - Periodic faces are not handled yet
2170 *
2171 * parameters:
2172 * g <-- Pointer to grid structure
2173 * merge_stride <-- Associated merge stride
2174 * verbosity <-- verbosity level
2175 *----------------------------------------------------------------------------*/
2176
2177 static void
_merge_grids(cs_grid_t * g,int merge_stride,int verbosity)2178 _merge_grids(cs_grid_t *g,
2179 int merge_stride,
2180 int verbosity)
2181 {
2182 int i, rank_id, t_id;
2183 cs_lnum_t j, face_id;
2184 int base_rank = cs_glob_rank_id;
2185 cs_lnum_t cell_shift = 0;
2186 cs_lnum_t n_faces = 0;
2187 cs_lnum_t *new_cell_id = NULL, *face_list = NULL;
2188 bool *halo_cell_flag = NULL;
2189 MPI_Comm comm = cs_glob_mpi_comm;
2190 MPI_Status status;
2191
2192 static const int tag = 'm'+'e'+'r'+'g'+'e';
2193
2194 if (merge_stride < 2)
2195 return;
2196
2197 /* Determine rank in merged group */
2198
2199 g->merge_sub_size = merge_stride;
2200 g->merge_stride = g->next_merge_stride;
2201 g->next_merge_stride *= merge_stride;
2202
2203 if (base_rank % g->merge_stride != 0) {
2204 g->merge_sub_size = 0;
2205 g->merge_sub_root = -1;
2206 g->merge_sub_rank = -1;
2207 }
2208 else {
2209 int merge_rank = base_rank / g->merge_stride;
2210 int merge_size = (cs_glob_n_ranks/g->merge_stride);
2211 int merge_sub_root = (merge_rank / merge_stride) * merge_stride;
2212 if (cs_glob_n_ranks % g->merge_stride > 0)
2213 merge_size += 1;
2214 g->merge_sub_root = merge_sub_root * g->merge_stride;
2215 g->merge_sub_rank = merge_rank % merge_stride;
2216 if (merge_sub_root + g->merge_sub_size > merge_size)
2217 g->merge_sub_size = merge_size - merge_sub_root;
2218 }
2219
2220 if (g->next_merge_stride > 1) {
2221 if (g->n_ranks % merge_stride)
2222 g->n_ranks = (g->n_ranks/merge_stride) + 1;
2223 else
2224 g->n_ranks = (g->n_ranks/merge_stride);
2225 g->comm = cs_base_get_rank_step_comm(g->next_merge_stride);
2226 }
2227
2228 if (verbosity > 2) {
2229 bft_printf("\n"
2230 " merging level %2d grid:\n"
2231 " merge_stride = %d; new ranks = %d\n",
2232 g->level, g->merge_stride, g->n_ranks);
2233 if (verbosity > 3)
2234 bft_printf(" sub_root = %d; sub_rank = %d; sub_size = %d\n",
2235 g->merge_sub_root, g->merge_sub_rank, g->merge_sub_size);
2236 }
2237
2238 /* Compute number of coarse cells in new grid;
2239 local cell numbers will be shifted by cell_shift in the merged grid */
2240
2241 if (g->merge_sub_size > 1) {
2242
2243 if (g->merge_sub_rank == 0) {
2244 BFT_MALLOC(g->merge_cell_idx, g->merge_sub_size + 1, cs_lnum_t);
2245 g->merge_cell_idx[0] = 0; g->merge_cell_idx[1] = g->n_rows;
2246 for (i = 1; i < g->merge_sub_size; i++) {
2247 cs_lnum_t recv_val;
2248 int dist_rank = g->merge_sub_root + g->merge_stride*i;
2249 MPI_Recv(&recv_val, 1, CS_MPI_LNUM, dist_rank, tag, comm, &status);
2250 g->merge_cell_idx[i + 1] = recv_val + g->merge_cell_idx[i];
2251 }
2252 }
2253 else {
2254 cs_lnum_t send_val = g->n_rows;
2255 MPI_Send(&send_val, 1, CS_MPI_LNUM, g->merge_sub_root, tag, comm);
2256 }
2257
2258 /* Now return computed info to grids that will be merged */
2259
2260 if (g->merge_sub_rank == 0) {
2261 for (i = 1; i < g->merge_sub_size; i++) {
2262 cs_lnum_t send_val;
2263 send_val = g->merge_cell_idx[i];
2264 MPI_Send(&send_val, 1, CS_MPI_LNUM,
2265 g->merge_sub_root + g->merge_stride*i, tag, comm);
2266 }
2267 }
2268 else
2269 MPI_Recv(&cell_shift, 1, CS_MPI_LNUM,
2270 g->merge_sub_root, tag, comm, &status);
2271 }
2272
2273 /* Compute and exchange new cell numbers */
2274
2275 BFT_MALLOC(new_cell_id, g->n_cols_ext, cs_lnum_t);
2276 for (j = 0; j < g->n_rows; j++)
2277 new_cell_id[j] = cell_shift + j;
2278 for (j = g->n_rows; j < g->n_cols_ext; j++)
2279 new_cell_id[j] = -1;
2280
2281 cs_halo_sync_untyped(g->halo,
2282 CS_HALO_STANDARD,
2283 sizeof(cs_lnum_t),
2284 new_cell_id);
2285
2286 /* Now build face filter list (before halo is modified) */
2287
2288 if (g->merge_sub_size > 1 && g->merge_sub_rank > 0 && g->n_faces > 0) {
2289
2290 cs_lnum_t n_ghost_cells = g->n_cols_ext - g->n_rows;
2291
2292 /* Mark which faces should be merged: to avoid duplicates, a face
2293 connected to a cell on a lower rank in the same merge set is
2294 discarded, as it has already been accounted for by that rank. */
2295
2296 BFT_MALLOC(face_list, g->n_faces, cs_lnum_t);
2297 BFT_MALLOC(halo_cell_flag, n_ghost_cells, bool);
2298 for (j = 0; j < n_ghost_cells; j++)
2299 halo_cell_flag[j] = false;
2300
2301 for (i = 0; i < g->halo->n_c_domains; i++) {
2302 rank_id = g->halo->c_domain_rank[i];
2303 if (rank_id >= g->merge_sub_root && rank_id < cs_glob_rank_id) {
2304 for (t_id = 0; t_id < g->halo->n_transforms; t_id++) {
2305 int t_shift = 4 * g->halo->n_c_domains * t_id;
2306 cs_lnum_t t_start = g->halo->perio_lst[t_shift + 4*i];
2307 cs_lnum_t t_end = t_start + g->halo->perio_lst[t_shift + 4*i + 1];
2308 for (j = t_start; j < t_end; j++)
2309 halo_cell_flag[j] = true;
2310 }
2311 }
2312 else {
2313 for (j = g->halo->index[2*i]; j < g->halo->index[2*i+2]; j++)
2314 halo_cell_flag[j] = true;
2315 }
2316 }
2317
2318 for (face_id = 0; face_id < g->n_faces; face_id++) {
2319 bool use_face = true;
2320 cs_lnum_t ii = g->face_cell[face_id][0] - g->n_rows + 1;
2321 cs_lnum_t jj = g->face_cell[face_id][1] - g->n_rows + 1;
2322 if (ii > 0) {
2323 if (halo_cell_flag[ii - 1] == false)
2324 use_face = false;
2325 }
2326 else if (jj > 0) {
2327 if (halo_cell_flag[jj - 1] == false)
2328 use_face = false;
2329 }
2330 if (use_face == true)
2331 face_list[n_faces++] = face_id;
2332 }
2333
2334 BFT_FREE(halo_cell_flag);
2335 }
2336
2337 /* Append and merge halos */
2338
2339 _append_halos(g, new_cell_id);
2340
2341 /* Update face ->cells connectivity */
2342
2343 for (face_id = 0; face_id < g->n_faces; face_id++) {
2344 cs_lnum_t ii = g->face_cell[face_id][0];
2345 cs_lnum_t jj = g->face_cell[face_id][1];
2346 assert(ii != jj && new_cell_id[ii] != new_cell_id[jj]);
2347 g->_face_cell[face_id][0] = new_cell_id[ii];
2348 g->_face_cell[face_id][1] = new_cell_id[jj];
2349 }
2350
2351 BFT_FREE(new_cell_id);
2352
2353 /* Merge cell and face data */
2354
2355 if (g->merge_sub_size > 1) {
2356 _append_cell_data(g);
2357 _append_face_data(g, n_faces, face_list);
2358 }
2359 _sync_merged_cell_data(g);
2360
2361 BFT_FREE(face_list);
2362
2363 if (verbosity > 3)
2364 bft_printf(" merged to %ld (from %ld) rows\n\n",
2365 (long)g->n_rows, (long)g->n_elts_r[0]);
2366 }
2367
2368 /*----------------------------------------------------------------------------
2369 * Scatter coarse cell integer data in case of merged grids
2370 *
2371 * parameters:
2372 * g <-- Grid structure
2373 * num <-> Variable defined on merged grid cells in,
2374 * defined on scattered to grid cells out
2375 *----------------------------------------------------------------------------*/
2376
2377 static void
_scatter_row_int(const cs_grid_t * g,int * num)2378 _scatter_row_int(const cs_grid_t *g,
2379 int *num)
2380 {
2381 assert(g != NULL);
2382
2383 /* If grid merging has taken place, scatter coarse data */
2384
2385 if (g->merge_sub_size > 1) {
2386
2387 MPI_Comm comm = cs_glob_mpi_comm;
2388 static const int tag = 'p'+'r'+'o'+'l'+'o'+'n'+'g';
2389
2390 /* Append data */
2391
2392 if (g->merge_sub_rank == 0) {
2393 int rank_id;
2394 assert(cs_glob_rank_id == g->merge_sub_root);
2395 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2396 cs_lnum_t n_send = ( g->merge_cell_idx[rank_id+1]
2397 - g->merge_cell_idx[rank_id]);
2398 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2399 MPI_Send(num + g->merge_cell_idx[rank_id], n_send, CS_MPI_LNUM,
2400 dist_rank, tag, comm);
2401 }
2402 }
2403 else {
2404 MPI_Status status;
2405 MPI_Recv(num, g->n_elts_r[0], CS_MPI_LNUM,
2406 g->merge_sub_root, tag, comm, &status);
2407 }
2408 }
2409
2410 }
2411
2412 /*----------------------------------------------------------------------------
2413 * Scatter coarse cell integer data in case of merged grids
2414 *
2415 * parameters:
2416 * g <-- Grid structure
2417 * num <-> Variable defined on merged grid cells in,
2418 * defined on scattered to grid cells out
2419 *----------------------------------------------------------------------------*/
2420
2421 static void
_scatter_row_num(const cs_grid_t * g,cs_lnum_t * num)2422 _scatter_row_num(const cs_grid_t *g,
2423 cs_lnum_t *num)
2424 {
2425 assert(g != NULL);
2426
2427 /* If grid merging has taken place, scatter coarse data */
2428
2429 if (g->merge_sub_size > 1) {
2430
2431 MPI_Comm comm = cs_glob_mpi_comm;
2432 static const int tag = 'p'+'r'+'o'+'l'+'o'+'n'+'g';
2433
2434 /* Append data */
2435
2436 if (g->merge_sub_rank == 0) {
2437 int rank_id;
2438 assert(cs_glob_rank_id == g->merge_sub_root);
2439 for (rank_id = 1; rank_id < g->merge_sub_size; rank_id++) {
2440 cs_lnum_t n_send = ( g->merge_cell_idx[rank_id+1]
2441 - g->merge_cell_idx[rank_id]);
2442 int dist_rank = g->merge_sub_root + g->merge_stride*rank_id;
2443 MPI_Send(num + g->merge_cell_idx[rank_id], n_send, CS_MPI_LNUM,
2444 dist_rank, tag, comm);
2445 }
2446 }
2447 else {
2448 MPI_Status status;
2449 MPI_Recv(num, g->n_elts_r[0], CS_MPI_LNUM,
2450 g->merge_sub_root, tag, comm, &status);
2451 }
2452 }
2453
2454 }
2455
2456 #endif /* defined(HAVE_MPI) */
2457
2458 /*----------------------------------------------------------------------------
2459 * Remove an element from a list (queue) of elements listed by adjacency size.
2460 *
2461 * parameters:
2462 * s <-> structure associated with graph traversal and update
2463 * a_m_e <-- number of adjacencies considered for this element
2464 * elt_id <-- id of element to remove
2465 *----------------------------------------------------------------------------*/
2466
2467 static inline void
_graph_m_ptr_remove_m(cs_graph_m_ptr_t * s,cs_lnum_t a_m_e,cs_lnum_t elt_id)2468 _graph_m_ptr_remove_m(cs_graph_m_ptr_t *s,
2469 cs_lnum_t a_m_e,
2470 cs_lnum_t elt_id)
2471 {
2472 cs_lnum_t _m = a_m_e;
2473
2474 cs_lnum_t s_p = s->prev[elt_id];
2475 cs_lnum_t s_n = s->next[elt_id];
2476 if (s_p > -1) { /* not head of list */
2477 s->next[s_p] = s_n;
2478 if (s_n > -1)
2479 s->prev[s_n] = s_p;
2480 }
2481 else { /* head of list */
2482 assert(s->m_head[_m] == elt_id);
2483 s->m_head[_m] = s_n;
2484 if (s_n > -1)
2485 s->prev[s_n] = -1;
2486 if (s->m_head[_m] < 0) { /* Update min and max m */
2487 if (_m == s->m_min) {
2488 while (s->m_min < s->m_max && s->m_head[s->m_min] < 0)
2489 s->m_min++;
2490 }
2491 else if (_m == s->m_max) {
2492 s->m_max -= 1;
2493 while (s->m_max > s->m_min) {
2494 if (s->m_head[s->m_max] > -1)
2495 break;
2496 else
2497 s->m_max--;
2498 }
2499 }
2500 }
2501 }
2502
2503 s->prev[elt_id] = -1;
2504 s->next[elt_id] = -1;
2505 }
2506
2507 /*----------------------------------------------------------------------------
2508 * Insert an element at the head of a list (queue) of elements listed
2509 * by adjacency size.
2510 *
2511 * Elements inserted through this function must have been removed from
2512 * a list with higher adjacency, so a_m_e < s->m_max.
2513 *
2514 * parameters:
2515 * s <-> structure associated with graph traversal and update
2516 * a_m_e <-> number of adjacencies considered for this element
2517 * elt_id <-- id of element to remove
2518 *----------------------------------------------------------------------------*/
2519
2520 static inline void
_graph_m_ptr_insert_m(cs_graph_m_ptr_t * s,cs_lnum_t a_m_e,cs_lnum_t elt_id)2521 _graph_m_ptr_insert_m(cs_graph_m_ptr_t *s,
2522 cs_lnum_t a_m_e,
2523 cs_lnum_t elt_id)
2524 {
2525 cs_lnum_t _m = a_m_e;
2526
2527 cs_lnum_t s_p = s->m_head[_m];
2528 if (s_p > -1) { /* list not empty */
2529 assert(s->prev[s_p] == -1);
2530 s->prev[s_p] = elt_id;
2531 s->next[elt_id] = s_p;
2532 }
2533 else { /* Update min and max m */
2534 if (_m < s->m_min)
2535 s->m_min = _m;
2536 else if (_m > s->m_max) {
2537 s->m_max = _m;
2538 if (s->m_head[s->m_min] < 0)
2539 s->m_min = _m;
2540 }
2541 assert(_m <= s->m_max);
2542 }
2543 s->m_head[_m] = elt_id;
2544 }
2545
2546 /*----------------------------------------------------------------------------*/
2547 /*!
2548 * \brief Apply one step of the pairwise aggregation algorithm for a
2549 * matrix expected to be an M-matrix.
2550 *
2551 * This assumes positive diagonal entries, and negative off-diagonal entries.
2552 * The matrix is provided in scalar MSR (modified CSR with separate diagonal)
2553 * form. To use block matrices with this function, their blocks must be
2554 * condensed to equivalent scalars.
2555 *
2556 * \param[in] f_n_rows number of rows in fine grid
2557 * \param[in] beta aggregation criterion
2558 * \param[in] dd_threshold diagonal dominance threshold; if > 0, ignore rows
2559 * whose diagonal dominance is above this threshold
2560 * \param[in] row_index matrix row index (separate diagonal)
2561 * \param[in] col_id matrix column ids
2562 * \param[in] d_val matrix diagonal values (scalar)
2563 * \param[in] x_val matrix extradiagonal values (scalar)
2564 * \param[out] f_c_row fine to coarse rows mapping
2565 *
2566 * \return local number of resulting coarse rows
2567 */
2568 /*----------------------------------------------------------------------------*/
2569
2570 static cs_lnum_t
_pairwise_msr(cs_lnum_t f_n_rows,const cs_real_t beta,const cs_real_t dd_threshold,const cs_lnum_t row_index[restrict],const cs_lnum_t col_id[restrict],const cs_real_t d_val[restrict],const cs_real_t x_val[restrict],cs_lnum_t * f_c_row)2571 _pairwise_msr(cs_lnum_t f_n_rows,
2572 const cs_real_t beta,
2573 const cs_real_t dd_threshold,
2574 const cs_lnum_t row_index[restrict],
2575 const cs_lnum_t col_id[restrict],
2576 const cs_real_t d_val[restrict],
2577 const cs_real_t x_val[restrict],
2578 cs_lnum_t *f_c_row)
2579 {
2580 cs_lnum_t c_n_rows = 0;
2581
2582 /* Mark all elements of fine to coarse rows as uninitialized */
2583 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
2584 f_c_row[ii] = -2;
2585
2586 /* Allocate working arrays */
2587
2588 short int *a_m; /* active m for row */
2589 cs_real_t *a_max; /* max per line */
2590
2591 BFT_MALLOC(a_m, f_n_rows, short int);
2592 BFT_MALLOC(a_max, f_n_rows, cs_real_t);
2593
2594 /* Computation of the maximum over line ii and test if the line ii is
2595 * ignored. Be careful that the sum has to be the sum of the
2596 * absolute value of every extra-diagonal coefficient, but the maximum is only
2597 * on the negative coefficient. */
2598
2599 cs_lnum_t m_max = -1;
2600
2601 cs_lnum_t n_remain = f_n_rows;
2602
2603 if (dd_threshold > 0) {
2604
2605 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2606
2607 cs_real_t sum = 0.0;
2608
2609 cs_lnum_t s_id = row_index[ii];
2610 cs_lnum_t e_id = row_index[ii+1];
2611 a_max[ii] = 0.0;
2612
2613 for (cs_lnum_t jj = s_id; jj < e_id; jj++) {
2614 cs_real_t xv = x_val[jj];
2615 sum += CS_ABS(xv);
2616 if (xv < 0)
2617 a_max[ii] = CS_MAX(a_max[ii], -xv);
2618 }
2619
2620 /* Check if the line seems ignored or not. */
2621 if (d_val[ii] > dd_threshold * sum) {
2622 a_m[ii] = -1;
2623 n_remain -= 1;
2624 f_c_row[ii] = -1;
2625 }
2626 else {
2627 a_m[ii] = 0;
2628 for (cs_lnum_t jj = e_id-1; jj >= s_id; jj--) {
2629 if (col_id[jj] < f_n_rows && x_val[jj] < beta*sum)
2630 a_m[ii] += 1;
2631 }
2632 }
2633
2634 if (m_max < a_m[ii])
2635 m_max = a_m[ii];
2636
2637 }
2638
2639 }
2640 else { /* variant with no diagonal dominance check */
2641
2642 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2643
2644 cs_lnum_t s_id = row_index[ii];
2645 cs_lnum_t e_id = row_index[ii+1];
2646 a_max[ii] = 0.0;
2647
2648 for (cs_lnum_t jj = s_id; jj < e_id; jj++) {
2649 cs_real_t xv = x_val[jj];
2650 if (xv < 0)
2651 a_max[ii] = CS_MAX(a_max[ii], -xv);
2652 }
2653
2654 a_m[ii] = 0;
2655 for (cs_lnum_t jj = e_id-1; jj >= s_id; jj--) {
2656 if (col_id[jj] < f_n_rows)
2657 a_m[ii] += 1;
2658 }
2659
2660 if (m_max < a_m[ii])
2661 m_max = a_m[ii];
2662
2663 }
2664
2665 }
2666
2667 if (m_max < 0)
2668 return 0;
2669
2670 /* Build pointers to lists of rows by a_m
2671 (to allow access to row with lowest m) */
2672
2673 cs_graph_m_ptr_t s;
2674
2675 s.m_min = 0;
2676 s.m_max = m_max;
2677
2678 BFT_MALLOC(s.m_head, s.m_max+1, cs_lnum_t);
2679 BFT_MALLOC(s.next, f_n_rows*2, cs_lnum_t);
2680 s.prev = s.next + f_n_rows;
2681
2682 for (cs_lnum_t ii = 0; ii < s.m_max+1; ii++)
2683 s.m_head[ii] = -1;
2684 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2685 s.next[ii] = -1;
2686 s.prev[ii] = -1;
2687 }
2688
2689 for (cs_lnum_t ii = f_n_rows-1; ii >= 0; ii--) {
2690 cs_lnum_t _m = a_m[ii];
2691 if (_m >= 0) {
2692 cs_lnum_t prev_head = s.m_head[_m];
2693 s.m_head[_m] = ii;
2694 s.next[ii] = prev_head;
2695 if (prev_head > -1)
2696 s.prev[prev_head] = ii;
2697 }
2698 }
2699
2700 /* Now build pairs */
2701
2702 while (s.m_min < s.m_max) {
2703 if (s.m_head[s.m_min] < 0)
2704 s.m_min++;
2705 else
2706 break;
2707 }
2708
2709 while (s.m_min > -1) {
2710
2711 /* Select remaining ii with minimal a_m */
2712
2713 cs_lnum_t ii = s.m_head[s.m_min];
2714 assert(ii > -1);
2715
2716 cs_lnum_t gg[2] = {ii, -1};
2717
2718 /* Select remaining jj such that aij = min_over_k_aik */
2719
2720 cs_lnum_t s_id = row_index[ii];
2721 cs_lnum_t e_id = row_index[ii+1];
2722
2723 f_c_row[ii] = c_n_rows; /* Add i to "pair" in all cases */
2724
2725 if (e_id > s_id) {
2726
2727 cs_lnum_t jj = -1;
2728 cs_real_t _a_min = HUGE_VAL;
2729 for (cs_lnum_t kk_idx = s_id; kk_idx < e_id; kk_idx++) {
2730 cs_lnum_t kk = col_id[kk_idx];
2731 if (kk < f_n_rows && f_c_row[kk] == -2) { /* not aggregated yet */
2732 cs_real_t xv = x_val[kk_idx];
2733 if (xv < _a_min) {
2734 _a_min = xv;
2735 jj = kk;
2736 }
2737 }
2738 }
2739
2740 /* keep jj only if within threshold */
2741
2742 if (_a_min >= -beta*a_max[ii])
2743 jj = -1;
2744 else {
2745 f_c_row[jj] = c_n_rows; /* Add jj to "pair" */
2746 gg[1] = jj;
2747 }
2748
2749 }
2750
2751 c_n_rows++;
2752
2753 /* Now update search set */
2754
2755 for (int ip = 0; ip < 2; ip++) {
2756 cs_lnum_t i = gg[ip];
2757 if (i > -1) {
2758 cs_lnum_t _m = a_m[i];
2759 assert(_m > -1);
2760 _graph_m_ptr_remove_m(&s, _m, i);
2761 a_m[i] = -1;
2762 cs_lnum_t _s_id = row_index[i];
2763 cs_lnum_t _e_id = row_index[i+1];
2764 for (cs_lnum_t k = _e_id-1; k >= _s_id; k--) {
2765 cs_lnum_t j = col_id[k];
2766 if (j >= f_n_rows)
2767 continue;
2768 _m = a_m[j];
2769 if (_m >= 0) {
2770 _graph_m_ptr_remove_m(&s, _m, j);
2771 if (_m > 0)
2772 _graph_m_ptr_insert_m(&s, _m-1, j);
2773 a_m[j] = _m - 1;
2774 }
2775 }
2776 n_remain--;
2777 }
2778 }
2779 if (s.m_min >= s.m_max) { /* check if list has become empty */
2780 if (s.m_head[s.m_min] < 0) {
2781 s.m_min = -1;
2782 s.m_max = -1;
2783 }
2784 }
2785
2786 }
2787
2788 /* We might have remaining cells */
2789
2790 for (int ii = 0; ii < f_n_rows; ii++) {
2791 if (f_c_row[ii] < -1)
2792 f_c_row[ii] = c_n_rows++;
2793 }
2794
2795 /* Free working arrays */
2796
2797 BFT_FREE(s.next);
2798 BFT_FREE(s.m_head);
2799 BFT_FREE(a_max);
2800 BFT_FREE(a_m);
2801
2802 return c_n_rows;
2803 }
2804
2805 /*----------------------------------------------------------------------------
2806 * Build a coarse grid level from the previous level using an
2807 * automatic criterion and pairwise aggregation variant 1,
2808 * with a matrix in MSR format.
2809 *
2810 * parameters:
2811 * f <-- Fine grid structure
2812 * verbosity <-- Verbosity level
2813 * f_c_row --> Fine row -> coarse row connectivity
2814 *----------------------------------------------------------------------------*/
2815
2816 static void
_automatic_aggregation_pw_msr(const cs_grid_t * f,int verbosity,cs_lnum_t * f_c_row)2817 _automatic_aggregation_pw_msr(const cs_grid_t *f,
2818 int verbosity,
2819 cs_lnum_t *f_c_row)
2820 {
2821 const cs_real_t beta = 0.25;
2822
2823 const cs_real_t dd_threshold = (f->level == 0) ? _dd_threshold_pw : -1;
2824
2825 const cs_lnum_t f_n_rows = f->n_rows;
2826
2827 /* Access matrix MSR vectors */
2828
2829 const cs_lnum_t *row_index, *col_id;
2830 const cs_real_t *d_val, *x_val;
2831 cs_real_t *_d_val = NULL, *_x_val = NULL;
2832
2833 cs_matrix_get_msr_arrays(f->matrix,
2834 &row_index,
2835 &col_id,
2836 &d_val,
2837 &x_val);
2838
2839 const cs_lnum_t *db_size = f->db_size;
2840 const cs_lnum_t *eb_size = f->eb_size;
2841
2842 if (db_size[0] > 1) {
2843 BFT_MALLOC(_d_val, f_n_rows, cs_real_t);
2844 _reduce_block(f_n_rows, db_size, d_val, _d_val);
2845 d_val = _d_val;
2846 }
2847
2848 if (eb_size[0] > 1) {
2849 cs_lnum_t f_n_enz = row_index[f_n_rows];
2850 BFT_MALLOC(_x_val, f_n_enz, cs_real_t);
2851 _reduce_block(f_n_enz, eb_size, x_val, _x_val);
2852 x_val = _x_val;
2853 }
2854
2855 if (verbosity > 3)
2856 bft_printf("\n %s: beta %5.3e; diag_dominance_threshold: %5.3e\n",
2857 __func__, beta, dd_threshold);
2858
2859 _pairwise_msr(f_n_rows,
2860 beta,
2861 dd_threshold,
2862 row_index,
2863 col_id,
2864 d_val,
2865 x_val,
2866 f_c_row);
2867
2868 /* Free working arrays */
2869
2870 BFT_FREE(_d_val);
2871 BFT_FREE(_x_val);
2872 }
2873
2874 /*----------------------------------------------------------------------------
2875 * Build a coarse grid level from the previous level using an
2876 * automatic criterion and pairwise aggregation variant 1,
2877 * with a matrix in native format.
2878 *
2879 * parameters:
2880 * f <-- Fine grid structure
2881 * max_aggregation <-- Max fine rows per coarse row
2882 * verbosity <-- Verbosity level
2883 * f_c_row --> Fine row -> coarse row connectivity
2884 *----------------------------------------------------------------------------*/
2885
2886 static void
_automatic_aggregation_mx_native(const cs_grid_t * f,cs_lnum_t max_aggregation,int verbosity,cs_lnum_t * f_c_row)2887 _automatic_aggregation_mx_native(const cs_grid_t *f,
2888 cs_lnum_t max_aggregation,
2889 int verbosity,
2890 cs_lnum_t *f_c_row)
2891 {
2892 const cs_lnum_t f_n_rows = f->n_rows;
2893
2894 /* Algorithm parameters */
2895 const cs_real_t beta = 0.25; /* 0.5 for HHO */
2896 const int ncoarse = 8;
2897 int npass_max = 10;
2898
2899 int _max_aggregation = 1, npass = 0;
2900 cs_lnum_t aggr_count = f_n_rows;
2901 cs_lnum_t c_n_rows = 0;
2902
2903 cs_lnum_t *c_aggr_count = NULL;
2904 bool *penalize = NULL;
2905 cs_real_t *maxi = NULL;
2906
2907 /* Access matrix MSR vectors */
2908
2909 cs_lnum_t n_edges = 0;
2910 const cs_lnum_2_t *edges;
2911 const cs_real_t *d_val, *x_val;
2912 cs_real_t *_d_val = NULL, *_x_val = NULL;
2913
2914 bool symmetric = true;
2915 cs_lnum_t isym = 2;
2916
2917 cs_matrix_get_native_arrays(f->matrix,
2918 &symmetric,
2919 &n_edges,
2920 &edges,
2921 &d_val,
2922 &x_val);
2923
2924 if (f->symmetric)
2925 isym = 1;
2926
2927 const cs_lnum_t *db_size = f->db_size;
2928 const cs_lnum_t *eb_size = f->eb_size;
2929
2930 if (db_size[0] > 1) {
2931 BFT_MALLOC(_d_val, f_n_rows, cs_real_t);
2932 _reduce_block(f_n_rows, db_size, d_val, _d_val);
2933 d_val = _d_val;
2934 }
2935
2936 if (eb_size[0] > 1) {
2937 BFT_MALLOC(_x_val, n_edges*isym, cs_real_t);
2938 _reduce_block(n_edges*isym, eb_size, x_val, _x_val);
2939 x_val = _x_val;
2940 }
2941
2942 /* Allocate working arrays */
2943
2944 BFT_MALLOC(c_aggr_count, f_n_rows, cs_lnum_t);
2945 BFT_MALLOC(maxi, f_n_rows, cs_real_t);
2946 BFT_MALLOC(penalize, f_n_rows, bool);
2947
2948 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++){
2949 c_aggr_count[ii] = 1;
2950 penalize[ii] = false;
2951 }
2952
2953 /* Computation of the maximum over line ii and test if the line ii is
2954 * penalized. Be careful that the sum has to be the sum of the
2955 * absolute value of every extra-diagonal coefficient, but the maximum
2956 * is only on the negative coefficient. */
2957
2958 cs_real_t *sum;
2959 BFT_MALLOC(sum, f_n_rows, cs_real_t);
2960
2961 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2962 sum[ii] = 0;
2963 maxi[ii] = 0.0;
2964 }
2965
2966 for (cs_lnum_t e_id = 0; e_id < n_edges; e_id++) {
2967 cs_lnum_t ii = edges[e_id][0];
2968 cs_lnum_t jj = edges[e_id][1];
2969 cs_real_t xv0 = x_val[e_id*isym];
2970 cs_real_t xv1 = x_val[(e_id+1)*isym-1];
2971 if (ii < f_n_rows) {
2972 sum[ii] += CS_ABS(xv0);
2973 if (xv0 < 0)
2974 maxi[ii] = CS_MAX(maxi[ii], -xv0);
2975 }
2976 if (jj < f_n_rows) {
2977 sum[jj] += CS_ABS(xv1);
2978 if (xv1 < 0)
2979 maxi[jj] = CS_MAX(maxi[jj], -xv1);
2980 }
2981 }
2982
2983 /* Check if the line seems penalized or not. */
2984 if (f->level == 0) {
2985 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
2986 if (d_val[ii] > _penalization_threshold * sum[ii])
2987 penalize[ii] = true;
2988 }
2989 }
2990 BFT_FREE(sum);
2991
2992 /* Passes */
2993
2994 if (verbosity > 3)
2995 bft_printf("\n %s:\n", __func__);
2996
2997 do {
2998
2999 npass++;
3000 _max_aggregation++;
3001 _max_aggregation = CS_MIN(_max_aggregation, max_aggregation);
3002
3003 /* Pairwise aggregation */
3004
3005 for (cs_lnum_t e_id = 0; e_id < n_edges; e_id++) {
3006
3007 cs_lnum_t ii = edges[e_id][0];
3008 cs_lnum_t jj = edges[e_id][1];
3009
3010 /* Exclude rows on parallel or periodic boundary, so as not to */
3011 /* coarsen the grid across those boundaries (which would change */
3012 /* the communication pattern and require a more complex algorithm). */
3013
3014 /* ii or jj is candidate to aggregation only if it is not penalized */
3015
3016 if ( ii >= f_n_rows || penalize[ii]
3017 || jj >= f_n_rows || penalize[jj])
3018 continue;
3019
3020 cs_real_t xv = x_val[e_id*isym];
3021
3022 if (isym == 2)
3023 xv = CS_MAX(xv, x_val[e_id*2 + 1]);
3024
3025 /* Test if ii and jj are strongly negatively coupled and at */
3026 /* least one of them is not already in an aggregate. */
3027
3028 if ( xv < -beta*maxi[ii]
3029 && (f_c_row[ii] < 0 || f_c_row[jj] < 0)) {
3030
3031 if (f_c_row[ii] > -1 && f_c_row[jj] < 0 ) {
3032 if (c_aggr_count[f_c_row[ii]] < _max_aggregation +1) {
3033 f_c_row[jj] = f_c_row[ii];
3034 c_aggr_count[f_c_row[ii]] += 1;
3035 }
3036 }
3037 else if (f_c_row[ii] < 0 && f_c_row[jj] > -1) {
3038 if (c_aggr_count[f_c_row[jj]] < _max_aggregation +1) {
3039 f_c_row[ii] = f_c_row[jj];
3040 c_aggr_count[f_c_row[jj]] += 1;
3041 }
3042 }
3043 else if (f_c_row[ii] < 0 && f_c_row[jj] < 0) {
3044 f_c_row[ii] = c_n_rows;
3045 f_c_row[jj] = c_n_rows;
3046 c_aggr_count[c_n_rows] += 1;
3047 c_n_rows++;
3048 }
3049 }
3050
3051 }
3052
3053 /* Check the number of coarse rows created */
3054 aggr_count = 0;
3055 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3056 if (f_c_row[ii] < 0)
3057 aggr_count++;
3058 }
3059
3060 /* Additional passes if aggregation is insufficient */
3061 if (aggr_count == 0 || (c_n_rows + aggr_count)*ncoarse < f_n_rows)
3062 npass_max = npass;
3063
3064 } while (npass < npass_max); /* Loop on passes */
3065
3066 /* Finish assembly: rows that are not diagonally dominant and not in an
3067 * aggregate form their own aggregate */
3068 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3069 if (!penalize[ii] && f_c_row[ii] < 0) {
3070 f_c_row[ii] = c_n_rows;
3071 c_n_rows++;
3072 }
3073 }
3074
3075 /* Free working arrays */
3076
3077 BFT_FREE(_d_val);
3078 BFT_FREE(_x_val);
3079 BFT_FREE(c_aggr_count);
3080 BFT_FREE(maxi);
3081 BFT_FREE(penalize);
3082 }
3083
3084 /*----------------------------------------------------------------------------
3085 * Build a coarse grid level from the previous level using an
3086 * automatic criterion and pairwise aggregation variant 1,
3087 * with a matrix in MSR format.
3088 *
3089 * parameters:
3090 * f <-- Fine grid structure
3091 * max_aggregation <-- Max fine rows per coarse row
3092 * verbosity <-- Verbosity level
3093 * f_c_row --> Fine row -> coarse row connectivity
3094 *----------------------------------------------------------------------------*/
3095
3096 static void
_automatic_aggregation_mx_msr(const cs_grid_t * f,cs_lnum_t max_aggregation,int verbosity,cs_lnum_t * f_c_row)3097 _automatic_aggregation_mx_msr(const cs_grid_t *f,
3098 cs_lnum_t max_aggregation,
3099 int verbosity,
3100 cs_lnum_t *f_c_row)
3101 {
3102 const cs_lnum_t f_n_rows = f->n_rows;
3103
3104 int npass_max = 10;
3105 int _max_aggregation = 1, npass = 0;
3106 cs_lnum_t aggr_count = f_n_rows;
3107 cs_lnum_t c_n_rows = 0;
3108
3109 cs_lnum_t *c_aggr_count = NULL;
3110 bool *penalize = NULL;
3111 cs_real_t *maxi = NULL;
3112
3113 /* Algorithm parameters */
3114 const cs_real_t beta = 0.25; /* 0.5 for HHO */
3115 const int ncoarse = 8;
3116 const cs_real_t p_test = (f->level == 0) ? 1. : -1;
3117
3118 if (verbosity > 3)
3119 bft_printf("\n %s: npass_max: %d; n_coarse: %d;"
3120 " beta %5.3e; pena_thd: %5.3e, p_test: %g\n",
3121 __func__, npass_max, ncoarse, beta, _penalization_threshold,
3122 p_test);
3123
3124 /* Access matrix MSR vectors */
3125
3126 const cs_lnum_t *row_index, *col_id;
3127 const cs_real_t *d_val, *x_val;
3128 cs_real_t *_d_val = NULL, *_x_val = NULL;
3129
3130 cs_matrix_get_msr_arrays(f->matrix,
3131 &row_index,
3132 &col_id,
3133 &d_val,
3134 &x_val);
3135
3136 const cs_lnum_t *db_size = f->db_size;
3137 const cs_lnum_t *eb_size = f->eb_size;
3138
3139 if (db_size[0] > 1) {
3140 BFT_MALLOC(_d_val, f_n_rows, cs_real_t);
3141 _reduce_block(f_n_rows, db_size, d_val, _d_val);
3142 d_val = _d_val;
3143 }
3144
3145 if (eb_size[0] > 1) {
3146 cs_lnum_t f_n_enz = row_index[f_n_rows];
3147 BFT_MALLOC(_x_val, f_n_enz, cs_real_t);
3148 _reduce_block(f_n_enz, eb_size, x_val, _x_val);
3149 x_val = _x_val;
3150 }
3151
3152 /* Allocate working arrays */
3153
3154 BFT_MALLOC(c_aggr_count, f_n_rows, cs_lnum_t);
3155 BFT_MALLOC(maxi, f_n_rows, cs_real_t);
3156 BFT_MALLOC(penalize, f_n_rows, bool);
3157
3158 # pragma omp parallel if (f_n_rows > CS_THR_MIN)
3159 {
3160 # pragma omp for
3161 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++){
3162 c_aggr_count[ii] = 1;
3163 penalize[ii] = false;
3164 }
3165
3166 /* Computation of the maximum over line ii and test if the line ii is
3167 * penalized. Be careful that the sum has to be the sum of the absolute
3168 * value of every extra-diagonal coefficient, but the maximum is only on the
3169 * negative coefficient. */
3170
3171 # pragma omp for
3172 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3173
3174 maxi[ii] = 0.0;
3175
3176 cs_real_t sum = 0.0;
3177 for (cs_lnum_t jj = row_index[ii]; jj < row_index[ii+1]; jj++) {
3178
3179 const cs_real_t xv = x_val[jj];
3180 if (xv < 0) {
3181 sum -= xv;
3182 maxi[ii] = CS_MAX(maxi[ii], -xv);
3183 }
3184 else {
3185 sum += xv;
3186 }
3187 }
3188
3189 /* Check if the line seems penalized or not. */
3190 if (d_val[ii]*p_test > _penalization_threshold * sum)
3191 penalize[ii] = true;
3192
3193 } /* Loop on f_n_rows */
3194
3195 } /* OpenMP block */
3196
3197 /* Passes */
3198
3199 do {
3200
3201 npass++;
3202 _max_aggregation++;
3203 _max_aggregation = CS_MIN(_max_aggregation, max_aggregation);
3204
3205 /* Pairwise aggregation */
3206
3207 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3208
3209 /* ii is candidate to aggregation only if it is not penalized */
3210
3211 if (penalize[ii])
3212 continue;
3213
3214 const cs_real_t row_criterion = -beta*maxi[ii];
3215
3216 for (cs_lnum_t jidx = row_index[ii]; jidx < row_index[ii+1]; jidx++) {
3217
3218 cs_lnum_t jj = col_id[jidx];
3219
3220 /* Exclude rows on parallel or periodic boundary, so as not to */
3221 /* coarsen the grid across those boundaries (which would change */
3222 /* the communication pattern and require a more complex algorithm). */
3223
3224 if (jj < f_n_rows) {
3225 if (!penalize[jj]) {
3226
3227 /* Test if ii and jj are strongly negatively coupled and at */
3228 /* least one of them is not already in an aggregate. */
3229
3230 if ( x_val[jidx] < row_criterion
3231 && (f_c_row[ii] < 0 || f_c_row[jj] < 0)) {
3232
3233 if (f_c_row[ii] > -1 && f_c_row[jj] < 0 ) {
3234 if (c_aggr_count[f_c_row[ii]] < _max_aggregation +1) {
3235 f_c_row[jj] = f_c_row[ii];
3236 c_aggr_count[f_c_row[ii]] += 1;
3237 }
3238 }
3239 else if (f_c_row[ii] < 0 && f_c_row[jj] > -1) {
3240 if (c_aggr_count[f_c_row[jj]] < _max_aggregation +1) {
3241 f_c_row[ii] = f_c_row[jj];
3242 c_aggr_count[f_c_row[jj]] += 1;
3243 }
3244 }
3245 else if (f_c_row[ii] < 0 && f_c_row[jj] < 0) {
3246 f_c_row[ii] = c_n_rows;
3247 f_c_row[jj] = c_n_rows;
3248 c_aggr_count[c_n_rows] += 1;
3249 c_n_rows++;
3250 }
3251 }
3252
3253 } /* Column is not penalized */
3254 } /* The current rank is owner of the column */
3255
3256 } /* Loop on columns */
3257
3258 } /* Loop on rows */
3259
3260 /* Check the number of coarse rows created */
3261 aggr_count = 0;
3262 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3263 if (f_c_row[ii] < 0)
3264 aggr_count++;
3265 }
3266
3267 /* Additional passes if aggregation is insufficient */
3268 if (aggr_count == 0 || (c_n_rows + aggr_count)*ncoarse < f_n_rows)
3269 npass_max = npass;
3270
3271 } while (npass < npass_max); /* Loop on passes */
3272
3273 /* Finish assembly: rows that are not diagonally dominant and not in an
3274 * aggregate form their own aggregate */
3275 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
3276 if (!penalize[ii] && f_c_row[ii] < 0) {
3277 f_c_row[ii] = c_n_rows;
3278 c_n_rows++;
3279 }
3280 }
3281
3282 /* Free working arrays */
3283
3284 BFT_FREE(_d_val);
3285 BFT_FREE(_x_val);
3286 BFT_FREE(c_aggr_count);
3287 BFT_FREE(maxi);
3288 BFT_FREE(penalize);
3289 }
3290
3291 /*----------------------------------------------------------------------------
3292 * Build a coarse grid level from the previous level using
3293 * an automatic criterion, using the face to cells adjacency.
3294 *
3295 * parameters:
3296 * f <-- Fine grid structure
3297 * coarsening_type <-- Coarsening type
3298 * max_aggregation <-- Max fine cells per coarse cell
3299 * relaxation_parameter <-- P0/P1 relaxation factor
3300 * verbosity <-- Verbosity level
3301 * f_c_cell --> Fine cell -> coarse cell connectivity
3302 *----------------------------------------------------------------------------*/
3303
3304 static void
_automatic_aggregation_fc(const cs_grid_t * f,cs_grid_coarsening_t coarsening_type,cs_lnum_t max_aggregation,double relaxation_parameter,int verbosity,cs_lnum_t * f_c_cell)3305 _automatic_aggregation_fc(const cs_grid_t *f,
3306 cs_grid_coarsening_t coarsening_type,
3307 cs_lnum_t max_aggregation,
3308 double relaxation_parameter,
3309 int verbosity,
3310 cs_lnum_t *f_c_cell)
3311 {
3312 cs_lnum_t n_faces;
3313
3314 cs_lnum_t isym = 2;
3315 int ncoarse = 8, npass_max = 10, inc_nei = 1;
3316 int _max_aggregation = 1, npass = 0;
3317
3318 cs_lnum_t f_n_cells = f->n_rows;
3319 cs_lnum_t f_n_cells_ext = f->n_cols_ext;
3320 cs_lnum_t f_n_faces = f->n_faces;
3321
3322 cs_lnum_t aggr_count = f_n_cells;
3323
3324 cs_lnum_t r_n_faces = f_n_faces;
3325 cs_lnum_t c_n_cells = 0;
3326
3327 cs_real_t epsilon = 1.e-6;
3328
3329 cs_lnum_t *c_cardinality = NULL, *f_c_face = NULL;
3330 cs_lnum_t *merge_flag = NULL, *c_aggr_count = NULL;
3331 cs_lnum_t *i_work_array = NULL;
3332
3333 const cs_lnum_t *db_size = f->db_size;
3334 const cs_lnum_t *eb_size = f->eb_size;
3335 const cs_lnum_2_t *f_face_cells = f->face_cell;
3336 const cs_real_t *f_da = f->da;
3337 const cs_real_t *f_xa = f->xa;
3338
3339 /* Reduce block to scalar equivalents */
3340
3341 const cs_real_t *_f_da = f_da;
3342 const cs_real_t *_f_xa = f_xa;
3343
3344 cs_real_t *s_da = NULL, *s_xa = NULL;
3345
3346 if (db_size[0] > 1) {
3347 BFT_MALLOC(s_da, f_n_cells, cs_real_t);
3348 _reduce_block(f_n_cells, db_size, f_da, s_da);
3349 _f_da = s_da;
3350 }
3351
3352 if (eb_size[0] > 1) {
3353 BFT_MALLOC(s_xa, f_n_faces*isym, cs_real_t);
3354 _reduce_block(f_n_faces*isym, db_size, f_xa, s_xa);
3355 _f_xa = s_xa;
3356 }
3357
3358 /* Allocate working arrays */
3359
3360 BFT_MALLOC(i_work_array, f_n_cells_ext*2 + f_n_faces*3, cs_lnum_t);
3361
3362 c_cardinality = i_work_array;
3363 c_aggr_count = i_work_array + f_n_cells_ext;
3364 f_c_face = i_work_array + 2*f_n_cells_ext; /* fine face -> coarse face */
3365 merge_flag = i_work_array + 2*f_n_cells_ext + f_n_faces;
3366
3367 /* Initialization */
3368
3369 if (f->symmetric == true)
3370 isym = 1;
3371
3372 # pragma omp parallel for if(f_n_cells_ext > CS_THR_MIN)
3373 for (cs_lnum_t ii = 0; ii < f_n_cells_ext; ii++) {
3374 c_cardinality[ii] = -1;
3375 f_c_cell[ii] = -1;
3376 c_aggr_count[ii] = 1;
3377 }
3378
3379 # pragma omp parallel for if(f_n_faces > CS_THR_MIN)
3380 for (cs_lnum_t face_id = 0; face_id < f_n_faces; face_id++) {
3381 merge_flag[face_id] = face_id +1;
3382 f_c_face[face_id] = 0;
3383 }
3384
3385 /* Compute cardinality (number of neighbors for each cell -1) */
3386
3387 for (cs_lnum_t face_id = 0; face_id < f_n_faces; face_id++) {
3388 cs_lnum_t ii = f_face_cells[face_id][0];
3389 cs_lnum_t jj = f_face_cells[face_id][1];
3390
3391 c_cardinality[ii] += 1;
3392 c_cardinality[jj] += 1;
3393 }
3394
3395 /* Passes */
3396
3397 if (verbosity > 3)
3398 bft_printf("\n %s:\n", __func__);
3399
3400 do {
3401
3402 npass++;
3403 n_faces = r_n_faces;
3404 _max_aggregation++;
3405 _max_aggregation = CS_MIN(_max_aggregation, max_aggregation);
3406
3407 # pragma omp parallel for if(n_faces > CS_THR_MIN)
3408 for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++) {
3409 f_c_face[face_id] = merge_flag[face_id];
3410 merge_flag[face_id] = 0;
3411 }
3412
3413 if (n_faces < f_n_faces) {
3414 # pragma omp parallel for if(f_n_faces > CS_THR_MIN)
3415 for (cs_lnum_t face_id = n_faces; face_id < f_n_faces; face_id++) {
3416 merge_flag[face_id] = 0;
3417 f_c_face[face_id] = 0;
3418 }
3419 }
3420
3421 if (verbosity > 3)
3422 bft_printf(" pass %3d; r_n_faces = %10ld; aggr_count = %10ld\n",
3423 npass, (long)r_n_faces, (long)aggr_count);
3424
3425 /* Increment number of neighbors */
3426
3427 # pragma omp parallel for if(f_n_cells > CS_THR_MIN)
3428 for (cs_lnum_t ii = 0; ii < f_n_cells; ii++)
3429 c_cardinality[ii] += inc_nei;
3430
3431 /* Initialize non-eliminated faces */
3432 r_n_faces = 0;
3433
3434 /* Loop on non-eliminated faces */
3435
3436 cs_real_t ag_mult = 1.;
3437 cs_real_t ag_threshold = 1. - epsilon;
3438 if (coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
3439 ag_mult = -1.;
3440 ag_threshold = - (1. - epsilon) * pow(relaxation_parameter, npass);
3441 // ag_threshold = (1. - epsilon) * pow(relaxation_parameter, npass);
3442 }
3443
3444 for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++) {
3445
3446 cs_lnum_t c_face = f_c_face[face_id] -1;
3447
3448 cs_lnum_t ii = f_face_cells[c_face][0];
3449 cs_lnum_t jj = f_face_cells[c_face][1];
3450
3451 /* Exclude faces on parallel or periodic boundary, so as not to */
3452 /* coarsen the grid across those boundaries (which would change */
3453 /* the communication pattern and require a more complex algorithm). */
3454
3455 if ( (ii < f_n_cells && jj < f_n_cells)
3456 && (f_c_cell[ii] < 0 || f_c_cell[jj] < 0)) {
3457
3458 cs_lnum_t count = 0;
3459
3460 cs_lnum_t ix0 = c_face*isym, ix1 = (c_face +1)*isym -1;
3461
3462 cs_real_t f_da0_da1 = (_f_da[ii] * _f_da[jj])
3463 / (c_cardinality[ii]*c_cardinality[jj]);
3464
3465 cs_real_t aggr_crit;
3466
3467 if (coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
3468 cs_real_t f_xa0 = CS_MAX(-_f_xa[ix0], 1.e-15);
3469 cs_real_t f_xa1 = CS_MAX(-_f_xa[ix1], 1.e-15);
3470 aggr_crit = CS_MAX(f_xa0, f_xa1)
3471 / CS_MAX(sqrt(f_da0_da1), 1.e-15);
3472 }
3473 else {
3474 cs_real_t f_xa0_xa1 = _f_xa[ix0] * _f_xa[ix1];
3475 /* TODO: replace this test, or adimensionalize it */
3476 f_xa0_xa1 = CS_MAX(f_xa0_xa1, 1.e-30);
3477
3478 aggr_crit = f_da0_da1 / f_xa0_xa1;
3479 }
3480
3481 if (ag_mult*aggr_crit < ag_threshold) {
3482
3483 if (f_c_cell[ii] > -1 && f_c_cell[jj] < 0 ) {
3484 if (c_aggr_count[f_c_cell[ii]] < _max_aggregation +1) {
3485 f_c_cell[jj] = f_c_cell[ii];
3486 c_aggr_count[f_c_cell[ii]] += 1;
3487 count++;
3488 }
3489 }
3490 else if (f_c_cell[ii] < 0 && f_c_cell[jj] > -1) {
3491 if (c_aggr_count[f_c_cell[jj]] < _max_aggregation +1) {
3492 f_c_cell[ii] = f_c_cell[jj];
3493 c_aggr_count[f_c_cell[jj]] += 1;
3494 count++;
3495 }
3496 }
3497 else if (f_c_cell[ii] < 0 && f_c_cell[jj] < 0) {
3498 f_c_cell[ii] = c_n_cells;
3499 f_c_cell[jj] = c_n_cells;
3500 c_aggr_count[c_n_cells] += 1;
3501 c_n_cells++;
3502 count++;
3503 }
3504 }
3505
3506 if (count == 0 && (f_c_cell[ii] < 0 || f_c_cell[jj] < 0)) {
3507 merge_flag[r_n_faces] = c_face +1;
3508 r_n_faces++;
3509 }
3510
3511 }
3512
3513 }
3514
3515 /* Check the number of coarse cells created */
3516 aggr_count = 0;
3517 # pragma omp parallel for reduction(+:aggr_count) if(f_n_cells > CS_THR_MIN)
3518 for (cs_lnum_t i = 0; i < f_n_cells; i++) {
3519 if (f_c_cell[i] < 0)
3520 aggr_count++;
3521 }
3522
3523 /* Additional passes if aggregation is insufficient */
3524
3525 if ( aggr_count == 0 || (c_n_cells + aggr_count)*ncoarse < f_n_cells
3526 || r_n_faces == 0)
3527 npass_max = npass;
3528
3529 } while (npass < npass_max); /* Loop on passes */
3530
3531 BFT_FREE(s_da);
3532 BFT_FREE(s_xa);
3533
3534 /* Finish assembly */
3535 for (cs_lnum_t i = 0; i < f_n_cells; i++) {
3536 if (f_c_cell[i] < 0) {
3537 f_c_cell[i] = c_n_cells;
3538 c_n_cells++;
3539 }
3540 }
3541
3542 /* Free working arrays */
3543
3544 BFT_FREE(i_work_array);
3545 }
3546
3547 /*----------------------------------------------------------------------------
3548 * Compute volume and center of coarse cells.
3549 *
3550 * parameters:
3551 * fine_grid <-- Fine grid structure
3552 * coarse_grid <-> Coarse grid structure
3553 *----------------------------------------------------------------------------*/
3554
3555 static void
_compute_coarse_cell_quantities(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid)3556 _compute_coarse_cell_quantities(const cs_grid_t *fine_grid,
3557 cs_grid_t *coarse_grid)
3558 {
3559 cs_lnum_t ic, ii;
3560
3561 cs_lnum_t f_n_cells = fine_grid->n_rows;
3562
3563 cs_lnum_t c_n_cells = coarse_grid->n_rows;
3564 cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
3565
3566 cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
3567
3568 cs_real_t *c_cell_vol = coarse_grid->_cell_vol;
3569 cs_real_t *c_cell_cen = coarse_grid->_cell_cen;
3570
3571 const cs_real_t *f_cell_vol = fine_grid->cell_vol;
3572 const cs_real_t *f_cell_cen = fine_grid->cell_cen;
3573
3574 /* Compute volume and center of coarse cells */
3575
3576 # pragma omp parallel for if(c_n_cells_ext > CS_THR_MIN)
3577 for (ic = 0; ic < c_n_cells_ext; ic++) {
3578 c_cell_vol[ic] = 0.;
3579 c_cell_cen[3*ic] = 0.;
3580 c_cell_cen[3*ic +1] = 0.;
3581 c_cell_cen[3*ic +2] = 0.;
3582 }
3583
3584 for (ii = 0; ii < f_n_cells; ii++) {
3585 ic = c_coarse_row[ii];
3586 c_cell_vol[ic] += f_cell_vol[ii];
3587 c_cell_cen[3*ic] += f_cell_vol[ii]*f_cell_cen[3*ii];
3588 c_cell_cen[3*ic +1] += f_cell_vol[ii]*f_cell_cen[3*ii +1];
3589 c_cell_cen[3*ic +2] += f_cell_vol[ii]*f_cell_cen[3*ii +2];
3590 }
3591
3592 # pragma omp parallel for if(c_n_cells > CS_THR_MIN)
3593 for (ic = 0; ic < c_n_cells; ic++) {
3594 c_cell_cen[3*ic] /= c_cell_vol[ic];
3595 c_cell_cen[3*ic +1] /= c_cell_vol[ic];
3596 c_cell_cen[3*ic +2] /= c_cell_vol[ic];
3597 }
3598 }
3599
3600 /*----------------------------------------------------------------------------
3601 * Verification for matrix quantities.
3602 *
3603 * parameters:
3604 * g <-- pointer to grid structure
3605 *----------------------------------------------------------------------------*/
3606
3607 static void
_verify_matrix(const cs_grid_t * g)3608 _verify_matrix(const cs_grid_t *g)
3609 {
3610 const cs_lnum_t n_cols = cs_matrix_get_n_columns(g->matrix);
3611 const cs_lnum_t n_rows = cs_matrix_get_n_rows(g->matrix);
3612 const cs_lnum_t *db_size = g->db_size;
3613
3614 cs_real_t *val;
3615 BFT_MALLOC(val, n_cols*db_size[1], cs_real_t);
3616
3617 /* Evaluate fine and coarse grids diagonal dominance */
3618
3619 cs_matrix_diag_dominance(g->matrix, val);
3620
3621 cs_real_t vmin = HUGE_VAL, vmax = -HUGE_VAL;
3622
3623 const cs_lnum_t n_ents = n_rows*db_size[1];
3624
3625 for (cs_lnum_t i = 0; i < n_ents; i++) {
3626 if (val[i] < vmin)
3627 vmin = val[i];
3628 else if (val[i] > vmax)
3629 vmax = val[i];
3630 }
3631
3632 BFT_FREE(val);
3633
3634 #if defined(HAVE_MPI)
3635 if (cs_glob_mpi_comm != MPI_COMM_NULL) {
3636 cs_real_t _vmin = vmin, _vmax = vmax;
3637 MPI_Allreduce(&_vmin, &vmin, 1, CS_MPI_REAL, MPI_MIN, g->comm);
3638 MPI_Allreduce(&_vmax, &vmax, 1, CS_MPI_REAL, MPI_MAX, g->comm);
3639 }
3640 #endif
3641
3642 bft_printf(_(" grid level %2d diag. dominance: min = %12.5e\n"
3643 " max = %12.5e\n\n"),
3644 g->level, vmin, vmax);
3645 }
3646
3647 /*----------------------------------------------------------------------------
3648 * Verification for coarse quantities computed from fine quantities.
3649 *
3650 * parameters:
3651 * fine_grid <-- Fine grid structure
3652 * coarse_grid <-- Coarse grid structure
3653 * n_clips_min <-- number of clippings to minimum value
3654 * n_clips_max <-- number of clippings to maximum value
3655 * interp <-- 0 for no intepolation, > 0 for interpolation
3656 *----------------------------------------------------------------------------*/
3657
3658 static void
_verify_coarse_quantities(const cs_grid_t * fine_grid,const cs_grid_t * coarse_grid,cs_gnum_t n_clips_min,cs_gnum_t n_clips_max,int interp)3659 _verify_coarse_quantities(const cs_grid_t *fine_grid,
3660 const cs_grid_t *coarse_grid,
3661 cs_gnum_t n_clips_min,
3662 cs_gnum_t n_clips_max,
3663 int interp)
3664 {
3665 cs_lnum_t ic, jc, ii, jj, c_face, face_id;
3666
3667 int isym = 2;
3668
3669 cs_lnum_t f_n_cells = fine_grid->n_rows;
3670 cs_lnum_t f_n_cells_ext = fine_grid->n_cols_ext;
3671 cs_lnum_t f_n_faces = fine_grid->n_faces;
3672
3673 cs_lnum_t c_n_cells = coarse_grid->n_rows;
3674 cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
3675 cs_lnum_t c_n_faces = coarse_grid->n_faces;
3676
3677 const cs_real_t *c_xa0 = coarse_grid->_xa0;
3678 const cs_real_t *c_xa = coarse_grid->_xa;
3679
3680 cs_real_t *w1 = NULL;
3681
3682 const cs_lnum_t *db_size = fine_grid->db_size;
3683
3684 const cs_lnum_2_t *f_face_cell = fine_grid->face_cell;
3685 const cs_lnum_2_t *c_face_cell = coarse_grid->face_cell;
3686
3687 const cs_real_t *f_xa = fine_grid->xa;
3688
3689 BFT_MALLOC(w1, f_n_cells_ext*db_size[3], cs_real_t);
3690
3691 if (fine_grid->symmetric == true)
3692 isym = 1;
3693
3694 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
3695 MPI_Comm comm = fine_grid->comm;
3696 if (comm != MPI_COMM_NULL) {
3697 cs_gnum_t n_clips[2] = {n_clips_min, n_clips_max};
3698 MPI_Allreduce(MPI_IN_PLACE, n_clips, 2, CS_MPI_GNUM, MPI_SUM, comm);
3699 n_clips_min = n_clips[0];
3700 n_clips_max = n_clips[0];
3701 }
3702 #endif
3703
3704 if (n_clips_min+n_clips_max > 0)
3705 bft_printf("\n %s:\n"
3706 " coarse_matrix < xag0 on %10llu faces\n"
3707 " > 0 on %10llu faces\n",
3708 __func__,
3709 (unsigned long long)n_clips_min,
3710 (unsigned long long)n_clips_max);
3711
3712 cs_real_t *w2, *w3, *w4;
3713 double anmin[2] = {HUGE_VAL, HUGE_VAL};
3714 double anmax[2] = {-HUGE_VAL, -HUGE_VAL};
3715
3716 BFT_MALLOC(w2, f_n_cells_ext*db_size[3], cs_real_t);
3717 BFT_MALLOC(w3, c_n_cells_ext*db_size[3], cs_real_t);
3718 BFT_MALLOC(w4, c_n_cells_ext*db_size[3], cs_real_t);
3719
3720 /* Evaluate anisotropy of fine and coarse grids */
3721
3722 for (ii = 0; ii < f_n_cells_ext; ii++) {
3723 w1[ii] = -HUGE_VAL;
3724 w2[ii] = HUGE_VAL;
3725 }
3726
3727 for (ic = 0; ic < c_n_cells_ext; ic++) {
3728 w3[ic] = -HUGE_VAL;
3729 w4[ic] = HUGE_VAL;
3730 }
3731
3732 for (face_id = 0; face_id < f_n_faces; face_id++) {
3733 ii = f_face_cell[face_id][0];
3734 jj = f_face_cell[face_id][1];
3735 w1[ii] = CS_MAX(fabs(f_xa[face_id*isym]), w1[ii]);
3736 w2[ii] = CS_MIN(fabs(f_xa[face_id*isym]), w2[ii]);
3737 w1[jj] = CS_MAX(fabs(f_xa[(face_id +1)*isym -1]), w1[jj]);
3738 w2[jj] = CS_MIN(fabs(f_xa[(face_id +1)*isym -1]), w2[jj]);
3739 }
3740
3741 for (c_face = 0; c_face < c_n_faces; c_face++) {
3742 ic = c_face_cell[c_face][0];
3743 jc = c_face_cell[c_face][1];
3744 w3[ic] = CS_MAX(fabs(c_xa[c_face*isym]), w3[ic]);
3745 w4[ic] = CS_MIN(fabs(c_xa[c_face*isym]), w4[ic]);
3746 w3[jc] = CS_MAX(fabs(c_xa[(c_face +1)*isym -1]), w3[jc]);
3747 w4[jc] = CS_MIN(fabs(c_xa[(c_face +1)*isym -1]), w4[jc]);
3748 }
3749
3750 for (ii = 0; ii < f_n_cells; ii++)
3751 w1[ii] = w2[ii] / w1[ii];
3752
3753 for (ic = 0; ic < c_n_cells; ic++)
3754 w3[ic] = w4[ic] / w3[ic];
3755
3756 anmin[0] = HUGE_VAL; anmin[1] = HUGE_VAL;
3757 anmax[0] = -HUGE_VAL; anmax[1] = -HUGE_VAL;
3758
3759 for (ii = 0; ii < f_n_cells; ii++) {
3760 if (w1[ii] < anmin[0])
3761 anmin[0] = w1[ii];
3762 else if (w1[ii] > anmax[0])
3763 anmax[0] = w1[ii];
3764 }
3765
3766 for (ic = 0; ic < c_n_cells; ic++) {
3767 if (w3[ic] < anmin[1])
3768 anmin[1] = w3[ic];
3769 else if (w3[ic] > anmax[1])
3770 anmax[1] = w3[ic];
3771 }
3772
3773 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
3774 if (comm != MPI_COMM_NULL) {
3775 MPI_Allreduce(MPI_IN_PLACE, anmin, 2, MPI_DOUBLE, MPI_MIN, comm);
3776 MPI_Allreduce(MPI_IN_PLACE, anmax, 2, MPI_DOUBLE, MPI_MAX, comm);
3777 }
3778 #endif
3779
3780 bft_printf(_(" fine grid anisotropy: min = %12.5e\n"
3781 " max = %12.5e\n"
3782 " coarse grid anisotropy: min = %12.5e\n"
3783 " max = %12.5e\n"),
3784 anmin[0], anmax[0], anmin[1], anmax[1]);
3785
3786 BFT_FREE(w2);
3787 BFT_FREE(w4);
3788
3789 if (interp == 1) {
3790 double rmin = HUGE_VAL, rmax = -HUGE_VAL;
3791 for (c_face = 0; c_face < c_n_faces; c_face++) {
3792 rmin = CS_MIN(rmin, c_xa[c_face*isym] / c_xa0[c_face]);
3793 rmax = CS_MAX(rmax, c_xa[c_face*isym] / c_xa0[c_face]);
3794 }
3795 #if defined(HAVE_MPI) && defined(HAVE_MPI_IN_PLACE)
3796 if (comm != MPI_COMM_NULL) {
3797 MPI_Allreduce(MPI_IN_PLACE, &rmin, 1, MPI_DOUBLE, MPI_MIN, comm);
3798 MPI_Allreduce(MPI_IN_PLACE, &rmax, 1, MPI_DOUBLE, MPI_MAX, comm);
3799 }
3800 #endif
3801 bft_printf(_(" minimum xag_p1 / xag_p0 = %12.5e\n"
3802 " maximum xag_p1 / xag_p0 = %12.5e\n"),
3803 rmin, rmax);
3804 }
3805
3806 BFT_FREE(w3);
3807 BFT_FREE(w1);
3808 }
3809
3810 /*----------------------------------------------------------------------------
3811 * Build coarse grid matrix in the MSR format given the 4 necessary arrays.
3812 *
3813 * parameters:
3814 * c <-> coarse grid structure
3815 * symmetric <-- symmetry flag
3816 * c_row_index <-- MSR row index (0 to n-1)
3817 * c_col_id <-- MSR column id (0 to n-1)
3818 * c_d_val <-- diagonal values
3819 * c_x_val <-- extradiagonal values
3820 *----------------------------------------------------------------------------*/
3821
3822 static void
_build_coarse_matrix_msr(cs_grid_t * c,bool symmetric,cs_lnum_t * c_row_index,cs_lnum_t * c_col_id,cs_real_t * c_d_val,cs_real_t * c_x_val)3823 _build_coarse_matrix_msr(cs_grid_t *c,
3824 bool symmetric,
3825 cs_lnum_t *c_row_index,
3826 cs_lnum_t *c_col_id,
3827 cs_real_t *c_d_val,
3828 cs_real_t *c_x_val)
3829 {
3830 cs_matrix_structure_t *ms
3831 = cs_matrix_structure_create_msr(CS_MATRIX_MSR,
3832 true, /* transfer */
3833 true, /* have_diag */
3834 c->n_rows,
3835 c->n_cols_ext,
3836 &c_row_index,
3837 &c_col_id,
3838 c->halo,
3839 NULL);
3840
3841 c->matrix_struct = ms;
3842
3843 c->_matrix = cs_matrix_create(c->matrix_struct);
3844 c->matrix = c->_matrix;
3845
3846 const cs_lnum_t *_c_row_index, *_c_col_id;
3847
3848 cs_matrix_get_msr_arrays(c->matrix,
3849 &_c_row_index, &_c_col_id,
3850 NULL, NULL);
3851
3852 cs_matrix_transfer_coefficients_msr(c->_matrix,
3853 symmetric,
3854 NULL,
3855 NULL,
3856 _c_row_index,
3857 _c_col_id,
3858 &c_d_val,
3859 &c_x_val);
3860 }
3861
3862 /*----------------------------------------------------------------------------
3863 * Build a coarse level from a finer level using face->cells connectivity
3864 *
3865 * parameters:
3866 * fine_grid <-- fine grid structure
3867 * coarse_grid <-> coarse grid structure
3868 * verbosity <-- verbosity level
3869 *----------------------------------------------------------------------------*/
3870
3871 static void
_compute_coarse_quantities_native(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid,int verbosity)3872 _compute_coarse_quantities_native(const cs_grid_t *fine_grid,
3873 cs_grid_t *coarse_grid,
3874 int verbosity)
3875 {
3876 cs_lnum_t ic, jc, ii, jj, kk, face_id;
3877
3878 cs_real_t dsigjg, dsxaij, agij;
3879
3880 int isym = 2;
3881
3882 cs_lnum_t f_n_cells = fine_grid->n_rows;
3883 cs_lnum_t f_n_cells_ext = fine_grid->n_cols_ext;
3884 cs_lnum_t f_n_faces = fine_grid->n_faces;
3885
3886 cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
3887 cs_lnum_t c_n_faces = coarse_grid->n_faces;
3888
3889 cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
3890 cs_lnum_t *c_coarse_face = coarse_grid->coarse_face;
3891
3892 cs_gnum_t n_clips_min = 0, n_clips_max = 0;
3893
3894 cs_real_t *f_xa0ij = fine_grid->xa0ij;
3895
3896 cs_real_t *c_cell_cen = coarse_grid->_cell_cen;
3897 cs_real_t *c_face_normal = coarse_grid->_face_normal;
3898
3899 cs_real_t *c_xa0 = coarse_grid->_xa0;
3900 cs_real_t *c_xa0ij = coarse_grid->xa0ij;
3901 cs_real_t *c_da = coarse_grid->_da;
3902 cs_real_t *c_xa = coarse_grid->_xa;
3903
3904 const cs_lnum_t *db_size = fine_grid->db_size;
3905
3906 const cs_lnum_2_t *f_face_cell = fine_grid->face_cell;
3907 const cs_lnum_2_t *c_face_cell = coarse_grid->face_cell;
3908
3909 const cs_real_t *f_face_normal = fine_grid->face_normal;
3910 const cs_real_t *f_xa0 = fine_grid->xa0;
3911 const cs_real_t *f_da = fine_grid->da;
3912 const cs_real_t *f_xa = fine_grid->xa;
3913
3914 const cs_real_t relax_param = coarse_grid->relaxation;
3915
3916 if (fine_grid->symmetric == true)
3917 isym = 1;
3918
3919 /* Finalize computation of matrix in c_da, c_xa */
3920 /* relax_param <= 0 : P0 restriction / P0 prolongation => c_xa = c_xa0 */
3921 /* relax_parm > 0 : P0 restriction / P1 prolongation => c_xa = c_xa0ij/icjc */
3922
3923 /* Extradiagonal terms */
3924
3925 cs_lnum_t c_n_vals = c_n_faces*isym;
3926
3927 # pragma omp parallel for if(c_n_vals*6 > CS_THR_MIN)
3928 for (cs_lnum_t c_val = 0; c_val < c_n_vals; c_val++) {
3929 c_xa[c_val] = 0.;
3930 }
3931
3932 if (relax_param <= 0) {
3933
3934 if (isym == 1) {
3935
3936 for (face_id = 0; face_id < f_n_faces; face_id++) {
3937 cs_lnum_t c_face = c_coarse_face[face_id];
3938 if (c_face > 0) {
3939 c_face = c_face - 1;
3940 c_xa[c_face] += f_xa[face_id];
3941 }
3942 else if (c_face < 0) {
3943 c_face = -c_face - 1;
3944 c_xa[c_face] += f_xa[face_id];
3945 }
3946 }
3947
3948 }
3949
3950 else if (isym == 2) {
3951
3952 for (face_id = 0; face_id < f_n_faces; face_id++) {
3953 cs_lnum_t c_face = c_coarse_face[face_id];
3954 if (c_face > 0) {
3955 c_face = c_face - 1;
3956 c_xa[c_face*2] += f_xa[face_id*2];
3957 c_xa[c_face*2 + 1] += f_xa[face_id*2 + 1];
3958 }
3959 else if (c_face < 0) {
3960 c_face = -c_face - 1;
3961 c_xa[c_face*2] += f_xa[face_id*2];
3962 c_xa[c_face*2 + 1] += f_xa[face_id*2 + 1];
3963 }
3964 }
3965
3966 }
3967
3968 }
3969
3970 else if (relax_param > 0) {
3971
3972 /* P0 restriction of matrices, "interior" surface: */
3973 /* xag0(nfacg), surfag(3,nfacgl), xagxg0(2,nfacg) */
3974
3975 # pragma omp parallel for if(c_n_faces*6 > CS_THR_MIN)
3976 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
3977 c_xa0[c_face] = 0.;
3978 c_face_normal[3*c_face] = 0.;
3979 c_face_normal[3*c_face +1] = 0.;
3980 c_face_normal[3*c_face +2] = 0.;
3981 c_xa0ij[3*c_face] = 0.;
3982 c_xa0ij[3*c_face +1] = 0.;
3983 c_xa0ij[3*c_face +2] = 0.;
3984 }
3985
3986 for (face_id = 0; face_id < f_n_faces; face_id++) {
3987
3988 if (c_coarse_face[face_id] > 0 ) {
3989 cs_lnum_t c_face = c_coarse_face[face_id] -1;
3990
3991 c_xa0[c_face] += f_xa0[face_id];
3992 c_face_normal[3*c_face] += f_face_normal[3*face_id];
3993 c_face_normal[3*c_face +1] += f_face_normal[3*face_id +1];
3994 c_face_normal[3*c_face +2] += f_face_normal[3*face_id +2];
3995 c_xa0ij[3*c_face] += f_xa0ij[3*face_id];
3996 c_xa0ij[3*c_face +1] += f_xa0ij[3*face_id +1];
3997 c_xa0ij[3*c_face +2] += f_xa0ij[3*face_id +2];
3998 }
3999 else if (c_coarse_face[face_id] < 0) {
4000 cs_lnum_t c_face = -c_coarse_face[face_id] -1;
4001
4002 c_xa0[c_face] += f_xa0[face_id];
4003 c_face_normal[3*c_face] -= f_face_normal[3*face_id];
4004 c_face_normal[3*c_face +1] -= f_face_normal[3*face_id +1];
4005 c_face_normal[3*c_face +2] -= f_face_normal[3*face_id +2];
4006 c_xa0ij[3*c_face] -= f_xa0ij[3*face_id];
4007 c_xa0ij[3*c_face +1] -= f_xa0ij[3*face_id +1];
4008 c_xa0ij[3*c_face +2] -= f_xa0ij[3*face_id +2];
4009 }
4010
4011 }
4012
4013 /* Matrix initialized to c_xa0 */
4014
4015 if (isym == 1) {
4016 # pragma omp parallel for if(c_n_faces > CS_THR_MIN)
4017 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++)
4018 c_xa[c_face] = c_xa0[c_face];
4019 }
4020 else {
4021 # pragma omp parallel for if(c_n_faces*2 > CS_THR_MIN)
4022 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4023 c_xa[2*c_face] = c_xa0[c_face];
4024 c_xa[2*c_face +1] = c_xa0[c_face];
4025 }
4026 }
4027
4028 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4029
4030 ic = c_face_cell[c_face][0];
4031 jc = c_face_cell[c_face][1];
4032
4033 dsigjg = ( c_cell_cen[3*jc]
4034 - c_cell_cen[3*ic]) * c_face_normal[3*c_face]
4035 + ( c_cell_cen[3*jc +1]
4036 - c_cell_cen[3*ic +1]) * c_face_normal[3*c_face +1]
4037 + ( c_cell_cen[3*jc +2]
4038 - c_cell_cen[3*ic +2]) * c_face_normal[3*c_face +2];
4039
4040 dsxaij = c_xa0ij[3*c_face] * c_face_normal[3*c_face]
4041 + c_xa0ij[3*c_face +1] * c_face_normal[3*c_face +1]
4042 + c_xa0ij[3*c_face +2] * c_face_normal[3*c_face +2];
4043
4044 if (fabs(dsigjg) > EPZERO) {
4045
4046 agij = dsxaij/dsigjg;
4047
4048 /* Standard */
4049 c_xa[c_face*isym] = agij;
4050 c_xa[(c_face +1)*isym -1] = agij;
4051
4052 /* Clipped matrix */
4053 if (agij < c_xa0[c_face] || agij > 0.) {
4054 c_xa[c_face*isym] = c_xa0[c_face];
4055 c_xa[(c_face +1)*isym -1] = c_xa0[c_face];
4056 if (agij < c_xa0[c_face]) n_clips_min++;
4057 if (agij > 0.) n_clips_max++;
4058 }
4059
4060 }
4061 }
4062
4063 /* Possible P1 matrix / P0 matrix relaxation defined
4064 by the user in cs_user_parameters.c
4065 (using cs_multigrid_set_coarsening_options) */
4066
4067 if (isym == 1) {
4068 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++)
4069 c_xa[c_face] = relax_param*c_xa[c_face]
4070 + (1. - relax_param)*c_xa0[c_face];
4071 }
4072 else {
4073 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4074 c_xa[2*c_face] = relax_param * c_xa[2*c_face]
4075 + (1. - relax_param) * c_xa0[c_face];
4076 c_xa[2*c_face +1] = relax_param * c_xa[2*c_face +1]
4077 + (1. - relax_param) * c_xa0[c_face];
4078 }
4079 }
4080
4081 } /* relax_param > 0 */
4082
4083 /* Initialize non differential fine grid term saved in w1 */
4084
4085 cs_real_t *w1 = NULL;
4086 BFT_MALLOC(w1, f_n_cells_ext*db_size[3], cs_real_t);
4087
4088 if (db_size[0] == 1) {
4089 # pragma omp parallel for if(f_n_cells > CS_THR_MIN)
4090 for (ii = 0; ii < f_n_cells; ii++)
4091 w1[ii] = f_da[ii];
4092 }
4093 else {
4094 # pragma omp parallel for private(jj, kk) if(f_n_cells > CS_THR_MIN)
4095 for (ii = 0; ii < f_n_cells; ii++) {
4096 for (jj = 0; jj < db_size[0]; jj++) {
4097 for (kk = 0; kk < db_size[0]; kk++)
4098 w1[ii*db_size[3] + db_size[2]*jj + kk]
4099 = f_da[ii*db_size[3] + db_size[2]*jj + kk];
4100 }
4101 }
4102 }
4103 # pragma omp parallel for if(f_n_cells_ext - f_n_cells > CS_THR_MIN)
4104 for (ii = f_n_cells*db_size[3]; ii < f_n_cells_ext*db_size[3]; ii++)
4105 w1[ii] = 0.;
4106
4107 for (face_id = 0; face_id < f_n_faces; face_id++) {
4108 ii = f_face_cell[face_id][0];
4109 jj = f_face_cell[face_id][1];
4110 for (kk = 0; kk < db_size[0]; kk++) {
4111 w1[ii*db_size[3] + db_size[2]*kk + kk] += f_xa[face_id*isym];
4112 w1[jj*db_size[3] + db_size[2]*kk + kk] += f_xa[(face_id +1)*isym -1];
4113 }
4114 }
4115
4116 /* Diagonal term */
4117
4118 # pragma omp parallel for if(c_n_cells_ext > CS_THR_MIN)
4119 for (ic = 0; ic < c_n_cells_ext*db_size[3]; ic++)
4120 c_da[ic] = 0.;
4121
4122 if (db_size[0] == 1) {
4123 for (ii = 0; ii < f_n_cells; ii++) {
4124 ic = c_coarse_row[ii];
4125 if (ic > -1)
4126 c_da[ic] += w1[ii];
4127 }
4128 }
4129 else {
4130 for (ii = 0; ii < f_n_cells; ii++) {
4131 ic = c_coarse_row[ii];
4132 if (ic > -1) {
4133 for (jj = 0; jj < db_size[0]; jj++) {
4134 for (kk = 0; kk < db_size[0]; kk++)
4135 c_da[ic*db_size[3] + db_size[2]*jj + kk]
4136 += w1[ii*db_size[3] + db_size[2]*jj + kk];
4137 }
4138 }
4139 }
4140 }
4141
4142 for (cs_lnum_t c_face = 0; c_face < c_n_faces; c_face++) {
4143 ic = c_face_cell[c_face][0];
4144 jc = c_face_cell[c_face][1];
4145 for (kk = 0; kk < db_size[0]; kk++) {
4146 c_da[ic*db_size[3] + db_size[2]*kk + kk] -= c_xa[c_face*isym];
4147 c_da[jc*db_size[3] + db_size[2]*kk + kk] -= c_xa[(c_face +1)*isym -1];
4148 }
4149 }
4150
4151 BFT_FREE(w1);
4152
4153 /* Optional verification */
4154
4155 if (verbosity > 3) {
4156 int interp = (relax_param > 0) ? 1 : 0;
4157 _verify_coarse_quantities(fine_grid,
4158 coarse_grid,
4159 n_clips_min,
4160 n_clips_max,
4161 interp);
4162 }
4163
4164 }
4165
4166 /*----------------------------------------------------------------------------
4167 * Build a coarse level from a finer level for convection/diffusion case.
4168 *
4169 * parameters:
4170 * fine_grid <-- fine grid structure
4171 * coarse_grid <-> coarse grid structure
4172 * verbosity <-- verbosity level
4173 *----------------------------------------------------------------------------*/
4174
4175 static void
_compute_coarse_quantities_conv_diff(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid,int verbosity)4176 _compute_coarse_quantities_conv_diff(const cs_grid_t *fine_grid,
4177 cs_grid_t *coarse_grid,
4178 int verbosity)
4179 {
4180 cs_lnum_t ic, jc, ii, jj, kk, c_face, face_id;
4181
4182 cs_real_t dsigjg, dsxaij, agij;
4183
4184 cs_lnum_t f_n_cells = fine_grid->n_rows;
4185 cs_lnum_t f_n_cells_ext = fine_grid->n_cols_ext;
4186 cs_lnum_t f_n_faces = fine_grid->n_faces;
4187
4188 cs_lnum_t c_n_cells_ext = coarse_grid->n_cols_ext;
4189 cs_lnum_t c_n_faces = coarse_grid->n_faces;
4190
4191 cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
4192 cs_lnum_t *c_coarse_face = coarse_grid->coarse_face;
4193
4194 cs_gnum_t n_clips_min = 0, n_clips_max = 0;
4195
4196 cs_real_t *f_xa0ij = fine_grid->xa0ij;
4197
4198 cs_real_t *c_cell_cen = coarse_grid->_cell_cen;
4199 cs_real_t *c_face_normal = coarse_grid->_face_normal;
4200
4201 cs_real_t *c_xa0 = coarse_grid->_xa0;
4202 cs_real_t *c_xa0_diff = coarse_grid->_xa0_diff;
4203 cs_real_t *c_xa0ij = coarse_grid->xa0ij;
4204 cs_real_t *c_da = coarse_grid->_da;
4205 cs_real_t *c_xa = coarse_grid->_xa;
4206 cs_real_t *c_da_conv = coarse_grid->_da_conv;
4207 cs_real_t *c_xa_conv = coarse_grid->_xa_conv;
4208 cs_real_t *c_da_diff = coarse_grid->_da_diff;
4209 cs_real_t *c_xa_diff = coarse_grid->_xa_diff;
4210
4211 cs_real_t *w1 = NULL, *w1_conv = NULL, *w1_diff = NULL;
4212
4213 const cs_lnum_t *db_size = fine_grid->db_size;
4214
4215 const cs_lnum_2_t *f_face_cell = fine_grid->face_cell;
4216 const cs_lnum_2_t *c_face_cell = coarse_grid->face_cell;
4217
4218 const cs_real_t *f_face_normal = fine_grid->face_normal;
4219 const cs_real_t *f_xa0 = fine_grid->xa0;
4220 const cs_real_t *f_da = fine_grid->da;
4221 const cs_real_t *f_da_conv = fine_grid->da_conv;
4222 const cs_real_t *f_xa_conv = fine_grid->xa_conv;
4223 const cs_real_t *f_xa0_diff = fine_grid->xa0_diff;
4224 const cs_real_t *f_da_diff = fine_grid->da_diff;
4225 const cs_real_t *f_xa_diff = fine_grid->xa_diff;
4226
4227 BFT_MALLOC(w1, 2*f_n_cells_ext*db_size[3], cs_real_t);
4228 BFT_MALLOC(w1_conv, 2*f_n_cells_ext*db_size[3], cs_real_t);
4229 BFT_MALLOC(w1_diff, 2*f_n_cells_ext*db_size[3], cs_real_t);
4230
4231 assert(fine_grid->symmetric == false);
4232
4233 /* P0 restriction of matrices, "interior" surface: */
4234 /* xag0(nfacg), surfag(3,nfacgl), xagxg0(2,nfacg) */
4235
4236 # pragma omp parallel for if(c_n_faces*6 > CS_THR_MIN)
4237 for (c_face = 0; c_face < c_n_faces; c_face++) {
4238 c_xa0[2*c_face] = 0.;
4239 c_xa0[2*c_face +1] = 0.;
4240 c_xa0_diff[c_face] = 0.;
4241 c_face_normal[3*c_face] = 0.;
4242 c_face_normal[3*c_face +1] = 0.;
4243 c_face_normal[3*c_face +2] = 0.;
4244 c_xa0ij[3*c_face] = 0.;
4245 c_xa0ij[3*c_face +1] = 0.;
4246 c_xa0ij[3*c_face +2] = 0.;
4247 c_xa_conv[2*c_face] = 0.;
4248 c_xa_conv[2*c_face +1] = 0.;
4249 }
4250
4251 for (face_id = 0; face_id < f_n_faces; face_id++) {
4252
4253 if (c_coarse_face[face_id] > 0 ) {
4254 c_face = c_coarse_face[face_id] -1;
4255
4256 c_xa0[2*c_face] += f_xa0[2*face_id];
4257 c_xa0[2*c_face +1] += f_xa0[2*face_id +1];
4258 c_xa_conv[2*c_face] += f_xa_conv[2*face_id];
4259 c_xa_conv[2*c_face +1] += f_xa_conv[2*face_id +1];
4260 c_xa0_diff[c_face] += f_xa0_diff[face_id];
4261 c_face_normal[3*c_face] += f_face_normal[3*face_id];
4262 c_face_normal[3*c_face +1] += f_face_normal[3*face_id +1];
4263 c_face_normal[3*c_face +2] += f_face_normal[3*face_id +2];
4264 c_xa0ij[3*c_face] += f_xa0ij[3*face_id];
4265 c_xa0ij[3*c_face +1] += f_xa0ij[3*face_id +1];
4266 c_xa0ij[3*c_face +2] += f_xa0ij[3*face_id +2];
4267 }
4268 else if (c_coarse_face[face_id] < 0) {
4269 c_face = -c_coarse_face[face_id] -1;
4270
4271 c_xa0[2*c_face] += f_xa0[2*face_id +1];
4272 c_xa0[2*c_face +1] += f_xa0[2*face_id];
4273 c_xa_conv[2*c_face] += f_xa_conv[2*face_id +1];
4274 c_xa_conv[2*c_face +1] += f_xa_conv[2*face_id];
4275 c_xa0_diff[c_face] += f_xa0_diff[face_id];
4276 c_face_normal[3*c_face] -= f_face_normal[3*face_id];
4277 c_face_normal[3*c_face +1] -= f_face_normal[3*face_id +1];
4278 c_face_normal[3*c_face +2] -= f_face_normal[3*face_id +2];
4279 c_xa0ij[3*c_face] -= f_xa0ij[3*face_id];
4280 c_xa0ij[3*c_face +1] -= f_xa0ij[3*face_id +1];
4281 c_xa0ij[3*c_face +2] -= f_xa0ij[3*face_id +2];
4282 }
4283
4284 }
4285
4286 /* Finalize computation of matrix in c_da, c_xa */
4287 /* interp = 0 : P0 restriction / P0 prolongation => c_xa = c_xa0 */
4288 /* interp = 1 : P0 restriction / P1 prolongation => c_xa = c_xa0ij/icjc */
4289
4290 /* Initialization */
4291
4292 const cs_real_t relax_param = coarse_grid->relaxation;
4293
4294 int interp = 1;
4295
4296 /* Initialize non differential fine grid term saved in w1 */
4297
4298 if (db_size[0] == 1) {
4299 # pragma omp parallel for if(f_n_cells > CS_THR_MIN)
4300 for (ii = 0; ii < f_n_cells; ii++) {
4301 w1_conv[ii] = f_da_conv[ii];
4302 w1_diff[ii] = f_da_diff[ii];
4303 w1[ii] = f_da[ii] - f_da_conv[ii] - f_da_diff[ii];
4304 }
4305 }
4306 else {
4307 # pragma omp parallel for private(jj, kk) if(f_n_cells > CS_THR_MIN)
4308 for (ii = 0; ii < f_n_cells; ii++) {
4309 for (jj = 0; jj < db_size[0]; jj++) {
4310 for (kk = 0; kk < db_size[0]; kk++) {
4311 w1_conv[ii*db_size[3] + db_size[2]*jj + kk]
4312 = f_da_conv[ii*db_size[3] + db_size[2]*jj + kk];
4313 w1_diff[ii*db_size[3] + db_size[2]*jj + kk]
4314 = f_da_diff[ii*db_size[3] + db_size[2]*jj + kk];
4315 w1[ii*db_size[3] + db_size[2]*jj + kk]
4316 = f_da[ii*db_size[3] + db_size[2]*jj + kk]
4317 - f_da_conv[ii*db_size[3] + db_size[2]*jj + kk]
4318 - f_da_diff[ii*db_size[3] + db_size[2]*jj + kk];
4319 }
4320 }
4321 }
4322 }
4323
4324 # pragma omp parallel for if(f_n_cells_ext - f_n_cells > CS_THR_MIN)
4325 for (ii = f_n_cells*db_size[3]; ii < f_n_cells_ext*db_size[3]; ii++)
4326 w1[ii] = 0.;
4327
4328 if (db_size[0] == 1) {
4329 for (face_id = 0; face_id < f_n_faces; face_id++) {
4330 ii = f_face_cell[face_id][0];
4331 jj = f_face_cell[face_id][1];
4332 w1_conv[ii] += f_xa_conv[2*face_id];
4333 w1_conv[jj] += f_xa_conv[2*face_id +1];
4334 w1_diff[ii] += f_xa_diff[face_id];
4335 w1_diff[jj] += f_xa_diff[face_id];
4336 }
4337 }
4338 else {
4339 for (face_id = 0; face_id < f_n_faces; face_id++) {
4340 ii = f_face_cell[face_id][0];
4341 jj = f_face_cell[face_id][1];
4342 for (kk = 0; kk < db_size[0]; kk++) {
4343 w1_conv[ii*db_size[3] + db_size[2]*kk + kk]
4344 += f_xa_conv[2*face_id];
4345 w1_conv[jj*db_size[3] + db_size[2]*kk + kk]
4346 += f_xa_conv[2*face_id +1];
4347 w1_diff[ii*db_size[3] + db_size[2]*kk + kk]
4348 += f_xa_diff[face_id];
4349 w1_diff[jj*db_size[3] + db_size[2]*kk + kk]
4350 += f_xa_diff[face_id];
4351 }
4352 }
4353 }
4354
4355 /* Initialize coarse matrix storage on (c_da, c_xa) */
4356
4357 # pragma omp parallel for if(c_n_cells_ext > CS_THR_MIN)
4358 for (ic = 0; ic < c_n_cells_ext*db_size[3]; ic++) {
4359 c_da[ic] = 0.;
4360 c_da_conv[ic] = 0.;
4361 c_da_diff[ic] = 0.;
4362 }
4363
4364 /* Extradiagonal terms */
4365 /* (symmetric matrices for now, even with non symmetric storage isym = 2) */
4366
4367 /* Matrix initialized to c_xa0 (interp=0) */
4368
4369 # pragma omp parallel for if(c_n_faces*2 > CS_THR_MIN)
4370 for (c_face = 0; c_face < c_n_faces; c_face++) {
4371 c_xa[2*c_face] = c_xa0[2*c_face];
4372 c_xa[2*c_face +1] = c_xa0[2*c_face +1];
4373 c_xa_diff[c_face] = c_xa0_diff[c_face];
4374 }
4375
4376 if (interp == 1) {
4377 for (c_face = 0; c_face < c_n_faces; c_face++) {
4378
4379 ic = c_face_cell[c_face][0];
4380 jc = c_face_cell[c_face][1];
4381
4382 dsigjg = ( c_cell_cen[3*jc]
4383 - c_cell_cen[3*ic]) * c_face_normal[3*c_face]
4384 + ( c_cell_cen[3*jc +1]
4385 - c_cell_cen[3*ic +1]) * c_face_normal[3*c_face +1]
4386 + ( c_cell_cen[3*jc +2]
4387 - c_cell_cen[3*ic +2]) * c_face_normal[3*c_face +2];
4388
4389 dsxaij = c_xa0ij[3*c_face] * c_face_normal[3*c_face]
4390 + c_xa0ij[3*c_face +1] * c_face_normal[3*c_face +1]
4391 + c_xa0ij[3*c_face +2] * c_face_normal[3*c_face +2];
4392
4393 if (fabs(dsigjg) > EPZERO) {
4394
4395 agij = dsxaij/dsigjg;
4396
4397 /* Standard */
4398 c_xa_diff[c_face] = agij;
4399
4400 /* Clipped matrix */
4401 if (agij < c_xa0_diff[c_face] || agij > 0.) {
4402 c_xa_diff[c_face] = c_xa0_diff[c_face];
4403 if (agij < c_xa0_diff[c_face]) n_clips_min++;
4404 if (agij > 0.) n_clips_max++;
4405 }
4406
4407 }
4408 }
4409
4410 /* Possible P1 matrix / P0 matrix relaxation defined
4411 by the user in cs_user_parameters.c
4412 (using cs_multigrid_set_coarsening_options) */
4413 for (c_face = 0; c_face < c_n_faces; c_face++) {
4414 c_xa_diff[c_face] = relax_param * c_xa_diff[c_face]
4415 + (1. - relax_param) * c_xa0_diff[c_face];
4416 }
4417
4418 } /* endif interp == 1 */
4419
4420 if (interp != 0 && interp != 1)
4421 bft_error(__FILE__, __LINE__, 0, "interp incorrectly defined.");
4422
4423 /* Diagonal term */
4424
4425 if (db_size[0] == 1) {
4426 for (ii = 0; ii < f_n_cells; ii++) {
4427 ic = c_coarse_row[ii];
4428 c_da_conv[ic] += w1_conv[ii];
4429 c_da_diff[ic] += w1_diff[ii];
4430 }
4431 }
4432 else {
4433 for (ii = 0; ii < f_n_cells; ii++) {
4434 ic = c_coarse_row[ii];
4435 for (jj = 0; jj < db_size[0]; jj++) {
4436 for (kk = 0; kk < db_size[0]; kk++) {
4437 c_da_conv[ic*db_size[3] + db_size[2]*jj + kk]
4438 += w1_conv[ii*db_size[3] + db_size[2]*jj + kk];
4439 c_da_diff[ic*db_size[3] + db_size[2]*jj + kk]
4440 += w1_diff[ii*db_size[3] + db_size[2]*jj + kk];
4441 }
4442 }
4443 }
4444 }
4445
4446 for (c_face = 0; c_face < c_n_faces; c_face++) {
4447 ic = c_face_cell[c_face][0];
4448 jc = c_face_cell[c_face][1];
4449 c_da_conv[ic] -= c_xa_conv[2*c_face];
4450 c_da_conv[jc] -= c_xa_conv[2*c_face +1];
4451 c_da_diff[ic] -= c_xa_diff[c_face];
4452 c_da_diff[jc] -= c_xa_diff[c_face];
4453 }
4454
4455 /* Convection/diffusion matrix */
4456
4457 for (c_face = 0; c_face < c_n_faces; c_face++) {
4458 c_xa[2*c_face] = c_xa_conv[2*c_face] + c_xa_diff[c_face];
4459 c_xa[2*c_face +1] = c_xa_conv[2*c_face +1] + c_xa_diff[c_face];
4460 }
4461
4462 if (db_size[0] == 1) {
4463 for (ii = 0; ii < f_n_cells; ii++) {
4464 ic = c_coarse_row[ii];
4465 c_da[ic] += w1[ii];
4466 }
4467 }
4468 else {
4469 for (ii = 0; ii < f_n_cells; ii++) {
4470 ic = c_coarse_row[ii];
4471 for (jj = 0; jj < db_size[0]; jj++) {
4472 for (kk = 0; kk < db_size[0]; kk++)
4473 c_da[ic*db_size[3] + db_size[2]*jj + kk]
4474 += w1[ii*db_size[3] + db_size[2]*jj + kk];
4475 }
4476 }
4477 }
4478
4479 for (ic = 0; ic < c_n_cells_ext; ic++)
4480 c_da[ic] += c_da_conv[ic] + c_da_diff[ic];
4481
4482 BFT_FREE(w1);
4483 BFT_FREE(w1_conv);
4484 BFT_FREE(w1_diff);
4485
4486 /* Optional verification */
4487
4488 if (verbosity > 3)
4489 _verify_coarse_quantities(fine_grid,
4490 coarse_grid,
4491 n_clips_min,
4492 n_clips_max,
4493 interp);
4494
4495 }
4496
4497 /*----------------------------------------------------------------------------
4498 * Build a coarse level from a finer level with an MSR matrix.
4499 *
4500 * parameters:
4501 * fine_grid <-- Fine grid structure
4502 * coarse_grid <-> Coarse grid structure
4503 *----------------------------------------------------------------------------*/
4504
4505 static void
_compute_coarse_quantities_msr(const cs_grid_t * fine_grid,cs_grid_t * coarse_grid)4506 _compute_coarse_quantities_msr(const cs_grid_t *fine_grid,
4507 cs_grid_t *coarse_grid)
4508
4509 {
4510 const cs_lnum_t *db_size = fine_grid->db_size;
4511
4512 const cs_lnum_t f_n_rows = fine_grid->n_rows;
4513
4514 const cs_lnum_t c_n_rows = coarse_grid->n_rows;
4515 const cs_lnum_t c_n_cols = coarse_grid->n_cols_ext;
4516 const cs_lnum_t *c_coarse_row = coarse_grid->coarse_row;
4517
4518 /* Fine matrix in the MSR format */
4519
4520 const cs_lnum_t *f_row_index, *f_col_id;
4521 const cs_real_t *f_d_val, *f_x_val;
4522
4523 cs_matrix_get_msr_arrays(fine_grid->matrix,
4524 &f_row_index,
4525 &f_col_id,
4526 &f_d_val,
4527 &f_x_val);
4528
4529 /* Coarse matrix elements in the MSR format */
4530
4531 cs_lnum_t *restrict c_row_index, *restrict c_col_id;
4532 cs_real_t *restrict c_d_val, *restrict c_x_val;
4533
4534 /* Diagonal elements
4535 ----------------- */
4536
4537 BFT_MALLOC(c_d_val, c_n_rows*db_size[3], cs_real_t);
4538
4539 for (cs_lnum_t i = 0; i < c_n_rows*db_size[3]; i++)
4540 c_d_val[i] = 0.0;
4541
4542 /* Careful here, we exclude penalized rows
4543 from the aggregation process */
4544
4545 if (db_size[0] == 1) {
4546 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4547 cs_lnum_t ic = c_coarse_row[ii];
4548 if (ic > -1 && ic < c_n_rows)
4549 c_d_val[ic] += f_d_val[ii];
4550 }
4551 }
4552 else {
4553 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4554 cs_lnum_t ic = c_coarse_row[ii];
4555 if (ic > -1 && ic < c_n_rows) {
4556 for (cs_lnum_t jj = 0; jj < db_size[0]; jj++) {
4557 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4558 c_d_val[ic*db_size[3] + db_size[2]*jj + kk]
4559 += f_d_val[ii*db_size[3] + db_size[2]*jj + kk];
4560 }
4561 }
4562 }
4563 }
4564
4565 /* Extradiagonal elements
4566 ---------------------- */
4567
4568 BFT_MALLOC(c_row_index, c_n_rows+1, cs_lnum_t);
4569
4570 /* Prepare to traverse fine rows by increasing associated coarse row */
4571
4572 cs_lnum_t f_n_active_rows = 0;
4573 cs_lnum_t *f_row_id = NULL;
4574 {
4575 cs_lnum_t *cf_row_idx;
4576 BFT_MALLOC(cf_row_idx, c_n_rows+1, cs_lnum_t);
4577
4578 for (cs_lnum_t i = 0; i <= c_n_rows; i++)
4579 cf_row_idx[i] = 0;
4580
4581 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4582 cs_lnum_t i = c_coarse_row[ii];
4583 if (i > -1 && i < c_n_rows)
4584 cf_row_idx[i+1] += 1;
4585 }
4586
4587 for (cs_lnum_t i = 0; i < c_n_rows; i++)
4588 cf_row_idx[i+1] += cf_row_idx[i];
4589
4590 f_n_active_rows = cf_row_idx[c_n_rows];
4591
4592 BFT_MALLOC(f_row_id, f_n_active_rows, cs_lnum_t);
4593 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4594 cs_lnum_t i = c_coarse_row[ii];
4595 if (i > -1 && i < c_n_rows) {
4596 f_row_id[cf_row_idx[i]] = ii;
4597 cf_row_idx[i] += 1;
4598 }
4599 }
4600
4601 BFT_FREE(cf_row_idx);
4602 }
4603
4604 /* Counting pass */
4605
4606 {
4607 cs_lnum_t *last_row;
4608 BFT_MALLOC(last_row, c_n_cols, cs_lnum_t);
4609
4610 for (cs_lnum_t i = 0; i <= c_n_rows; i++)
4611 c_row_index[i] = 0;
4612
4613 for (cs_lnum_t i = 0; i < c_n_cols; i++)
4614 last_row[i] = -1;
4615
4616 for (cs_lnum_t ii_id = 0; ii_id < f_n_active_rows; ii_id++) {
4617
4618 cs_lnum_t ii = f_row_id[ii_id];
4619
4620 cs_lnum_t s_id = f_row_index[ii];
4621 cs_lnum_t e_id = f_row_index[ii+1];
4622
4623 for (cs_lnum_t jj_ind = s_id; jj_ind < e_id; jj_ind++) {
4624
4625 cs_lnum_t jj = f_col_id[jj_ind];
4626
4627 cs_lnum_t i = c_coarse_row[ii];
4628 cs_lnum_t j = c_coarse_row[jj];
4629
4630 assert(i > -1 && i < c_n_rows);
4631
4632 if (j > -1 && i != j && last_row[j] < i) {
4633 last_row[j] = i;
4634 c_row_index[i+1]++;
4635 }
4636
4637 }
4638
4639 }
4640
4641 BFT_FREE(last_row);
4642 }
4643
4644 /* Transform count to index */
4645
4646 for (cs_lnum_t i = 0; i < c_n_rows; i++)
4647 c_row_index[i+1] += c_row_index[i];
4648
4649 cs_lnum_t c_size = c_row_index[c_n_rows];
4650
4651 BFT_MALLOC(c_x_val, c_size, cs_real_t);
4652 BFT_MALLOC(c_col_id, c_size, cs_lnum_t);
4653
4654 /* Assignment pass */
4655
4656 {
4657 for (cs_lnum_t i = 0; i < c_size; i++)
4658 c_col_id[i] = -1;
4659
4660 cs_lnum_t *r_col_idx; /* col_idx for a given id in current row */
4661 BFT_MALLOC(r_col_idx, c_n_cols, cs_lnum_t);
4662
4663 for (cs_lnum_t i = 0; i < c_n_cols; i++)
4664 r_col_idx[i] = -1;
4665
4666 cs_lnum_t i_prev = -1;
4667 cs_lnum_t r_count = 0;
4668
4669 for (cs_lnum_t ii_id = 0; ii_id < f_n_active_rows; ii_id++) {
4670
4671 cs_lnum_t ii = f_row_id[ii_id];
4672
4673 cs_lnum_t i = c_coarse_row[ii];
4674
4675 if (i_prev != i) {
4676 r_count = 0;
4677 i_prev = i;
4678 }
4679
4680 assert(i > -1 && i < c_n_rows);
4681
4682 for (cs_lnum_t jj_ind = f_row_index[ii];
4683 jj_ind < f_row_index[ii+1];
4684 jj_ind++) {
4685
4686 cs_lnum_t jj = f_col_id[jj_ind];
4687
4688 cs_lnum_t j = c_coarse_row[jj];
4689 if (j > -1 && i != j) {
4690 if (r_col_idx[j] < c_row_index[i]) {
4691 r_col_idx[j] = c_row_index[i] + r_count;
4692 c_col_id[r_col_idx[j]] = j;
4693 r_count++;
4694 }
4695 }
4696 }
4697 }
4698
4699 BFT_FREE(r_col_idx);
4700 }
4701
4702 BFT_FREE(f_row_id);
4703
4704 /* Order column ids in case some algorithms expect it */
4705
4706 cs_sort_indexed(c_n_rows, c_row_index, c_col_id);
4707
4708 /* Values assignment pass */
4709
4710 {
4711 for (cs_lnum_t i = 0; i < c_size; i++)
4712 c_x_val[i] = 0;
4713
4714 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4715
4716 cs_lnum_t i = c_coarse_row[ii];
4717
4718 if (i > -1 && i < c_n_rows) {
4719
4720 for (cs_lnum_t jj_ind = f_row_index[ii];
4721 jj_ind < f_row_index[ii+1];
4722 jj_ind++) {
4723
4724 cs_lnum_t jj = f_col_id[jj_ind];
4725
4726 cs_lnum_t j = c_coarse_row[jj];
4727
4728 if (j > -1) {
4729
4730 if (i != j) {
4731 cs_lnum_t s_id = c_row_index[i];
4732 cs_lnum_t n_cols = c_row_index[i+1] - s_id;
4733 /* ids are sorted, so binary search possible */
4734 cs_lnum_t k = _l_id_binary_search(n_cols, j, c_col_id + s_id);
4735 c_x_val[k + s_id] += f_x_val[jj_ind];
4736 }
4737 else { /* i == j */
4738 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4739 /* diagonal terms only */
4740 c_d_val[i*db_size[3] + db_size[2]*kk + kk]
4741 += f_x_val[jj_ind];
4742 }
4743 }
4744
4745 }
4746 }
4747
4748 }
4749
4750 }
4751
4752 }
4753
4754 _build_coarse_matrix_msr(coarse_grid, fine_grid->symmetric,
4755 c_row_index, c_col_id,
4756 c_d_val, c_x_val);
4757 }
4758
4759 /*----------------------------------------------------------------------------
4760 * Build edge-based matrix values from MSR matrix.
4761 *
4762 * parameters:
4763 * grid <-> grid structure
4764 *----------------------------------------------------------------------------*/
4765
4766 static void
_native_from_msr(cs_grid_t * g)4767 _native_from_msr(cs_grid_t *g)
4768
4769 {
4770 const cs_lnum_t db_stride = g->db_size[3];
4771 const cs_lnum_t eb_stride = g->eb_size[3];
4772
4773 const cs_lnum_t n_rows = g->n_rows;
4774 const cs_lnum_t n_cols_ext = g->n_cols_ext;
4775
4776 /* Matrix in the MSR format */
4777
4778 const cs_lnum_t *row_index, *col_id;
4779 const cs_real_t *d_val, *x_val;
4780
4781 cs_matrix_get_msr_arrays(g->matrix,
4782 &row_index,
4783 &col_id,
4784 &d_val,
4785 &x_val);
4786
4787 {
4788 BFT_REALLOC(g->_da, db_stride*n_cols_ext, cs_real_t);
4789 g->da = g->_da;
4790
4791 for (cs_lnum_t i = 0; i < n_rows; i++) {
4792 for (cs_lnum_t l = 0; l < eb_stride; l++)
4793 g->_da[i*db_stride + l] = d_val[i*db_stride + l];
4794 }
4795 }
4796
4797 if (g->symmetric) {
4798 BFT_REALLOC(g->_face_cell, row_index[n_rows], cs_lnum_2_t);
4799 BFT_REALLOC(g->_xa, eb_stride*row_index[n_rows], cs_real_t);
4800
4801 cs_lnum_t n_edges = 0;
4802
4803 for (cs_lnum_t i = 0; i < n_rows; i++) {
4804 cs_lnum_t s_id = row_index[i];
4805 cs_lnum_t e_id = row_index[i+1];
4806 for (cs_lnum_t j = s_id; j < e_id; j++) {
4807 cs_lnum_t k = col_id[j];
4808 if (k <= i)
4809 continue;
4810 g->_face_cell[n_edges][0] = i;
4811 g->_face_cell[n_edges][1] = k;
4812 for (cs_lnum_t l = 0; l < eb_stride; l++)
4813 g->_xa[n_edges*eb_stride + l] = x_val[j*eb_stride + l];
4814 n_edges += 1;
4815 }
4816 }
4817
4818 g->n_faces = n_edges;
4819 BFT_REALLOC(g->_face_cell, n_edges, cs_lnum_2_t);
4820 BFT_REALLOC(g->_xa, eb_stride*n_edges, cs_real_t);
4821 g->face_cell = (const cs_lnum_2_t *)(g->_face_cell);
4822 g->xa = g->_xa;
4823 }
4824 else
4825 bft_error(__FILE__, __LINE__, 0,
4826 "%s: currently only implemented for symmetric cases.",
4827 __func__);
4828
4829 /* Synchronize matrix diagonal values */
4830
4831 if (g->halo != NULL)
4832 cs_halo_sync_var_strided(g->halo, CS_HALO_STANDARD, g->_da, g->db_size[3]);
4833
4834 cs_matrix_destroy(&(g->_matrix));
4835 g->matrix = NULL;
4836 cs_matrix_structure_destroy(&(g->matrix_struct));
4837 }
4838
4839 /*----------------------------------------------------------------------------
4840 * Build MSR matrix from edge-based matrix values.
4841 *
4842 * parameters:
4843 * grid <-> grid structure
4844 *----------------------------------------------------------------------------*/
4845
4846 static void
_msr_from_native(cs_grid_t * g)4847 _msr_from_native(cs_grid_t *g)
4848
4849 {
4850 g->matrix_struct = cs_matrix_structure_create(CS_MATRIX_MSR,
4851 true,
4852 g->n_rows,
4853 g->n_cols_ext,
4854 g->n_faces,
4855 g->face_cell,
4856 g->halo,
4857 NULL);
4858
4859 g->_matrix = cs_matrix_create(g->matrix_struct);
4860
4861 cs_matrix_set_coefficients(g->_matrix,
4862 g->symmetric,
4863 g->db_size,
4864 g->eb_size,
4865 g->n_faces,
4866 g->face_cell,
4867 g->da,
4868 g->xa);
4869 }
4870
4871 /*----------------------------------------------------------------------------
4872 * Project coarse grid row numbers to parent grid.
4873 *
4874 * parameters:
4875 * c <-- coarse grid structure
4876 *----------------------------------------------------------------------------*/
4877
4878 static void
_project_coarse_row_to_parent(cs_grid_t * c)4879 _project_coarse_row_to_parent(cs_grid_t *c)
4880 {
4881 assert(c != NULL);
4882
4883 const cs_grid_t *f = c->parent;
4884
4885 assert(f != NULL);
4886
4887 if (f->parent != NULL) { /* level > 0*/
4888
4889 cs_lnum_t *c_coarse_row = c->coarse_row;
4890 cs_lnum_t *f_coarse_row = f->coarse_row;
4891
4892 cs_lnum_t f_n_cols = f->parent->n_cols_ext;
4893
4894 for (cs_lnum_t ii = 0; ii < f_n_cols; ii++) {
4895 cs_lnum_t ic = f_coarse_row[ii];
4896 if (ic >= 0)
4897 f_coarse_row[ii] = c_coarse_row[ic];
4898 }
4899
4900 }
4901 }
4902
4903 /*----------------------------------------------------------------------------
4904 * Build locally empty matrix.
4905 *
4906 * parameters:
4907 * c <-> coarse grid structure
4908 * matrix_type <-- matrix type
4909 *----------------------------------------------------------------------------*/
4910
4911 static void
_build_coarse_matrix_null(cs_grid_t * c,cs_matrix_type_t matrix_type)4912 _build_coarse_matrix_null(cs_grid_t *c,
4913 cs_matrix_type_t matrix_type)
4914 {
4915 cs_matrix_structure_t *ms
4916 = cs_matrix_structure_create(matrix_type,
4917 true,
4918 0,
4919 0,
4920 0,
4921 NULL,
4922 NULL,
4923 NULL);
4924
4925 c->matrix_struct = ms;
4926
4927 c->_matrix = cs_matrix_create(c->matrix_struct);
4928 c->matrix = c->_matrix;
4929
4930 cs_matrix_set_coefficients(c->_matrix,
4931 c->symmetric,
4932 c->db_size,
4933 c->eb_size,
4934 0,
4935 NULL,
4936 NULL,
4937 NULL);
4938 }
4939
4940 /*----------------------------------------------------------------------------
4941 * Compute fine row integer values from coarse row values
4942 *
4943 * parameters:
4944 * c <-- Fine grid structure
4945 * f <-- Fine grid structure
4946 * c_num --> Variable (rank) defined on coarse grid rows
4947 * f_num <-- Variable (rank) defined on fine grid rows
4948 *----------------------------------------------------------------------------*/
4949
4950 static void
_prolong_row_int(const cs_grid_t * c,const cs_grid_t * f,int * c_num,int * f_num)4951 _prolong_row_int(const cs_grid_t *c,
4952 const cs_grid_t *f,
4953 int *c_num,
4954 int *f_num)
4955 {
4956 const cs_lnum_t *coarse_row;
4957 const int *_c_num = c_num;
4958
4959 cs_lnum_t f_n_rows = f->n_rows;
4960
4961 assert(f != NULL);
4962 assert(c != NULL);
4963 assert(c->coarse_row != NULL || f_n_rows == 0);
4964 assert(f_num != NULL);
4965 assert(c_num != NULL);
4966
4967 #if defined(HAVE_MPI)
4968 _scatter_row_int(c, c_num);
4969 #endif
4970
4971 /* Set fine values (possible penalization at first level) */
4972
4973 coarse_row = c->coarse_row;
4974
4975 # pragma omp parallel for if(f_n_rows > CS_THR_MIN)
4976 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
4977 cs_lnum_t i = coarse_row[ii];
4978 if (i >= 0)
4979 f_num[ii] = _c_num[i];
4980 else
4981 f_num[ii] = -1;
4982 }
4983 }
4984
4985 /*============================================================================
4986 * Semi-private function definitions
4987 *
4988 * The following functions are intended to be used by the multigrid layer
4989 * (cs_multigrid.c), not directly by the user, so they are no more
4990 * documented than private static functions)
4991 *============================================================================*/
4992
4993 /*----------------------------------------------------------------------------
4994 * Create base grid by mapping from shared mesh values.
4995 *
4996 * Note that as arrays given as arguments are shared by the created grid
4997 * (which can only access them, not modify them), the grid should be
4998 * destroyed before those arrays.
4999 *
5000 * parameters:
5001 * n_faces <-- Local number of faces
5002 * db_size <-- Block sizes for diagonal, or NULL
5003 * eb_size <-- Block sizes for diagonal, or NULL
5004 * face_cell <-- Face -> cells connectivity
5005 * cell_cen <-- Cell center (size: 3.n_cells_ext)
5006 * cell_vol <-- Cell volume (size: n_cells_ext)
5007 * face_normal <-- Internal face normals (size: 3.n_faces)
5008 * a <-- Associated matrix
5009 * a_conv <-- Associated matrix (convection)
5010 * a_diff <-- Associated matrix (diffusion)
5011 *
5012 * returns:
5013 * base grid structure
5014 *----------------------------------------------------------------------------*/
5015
5016 cs_grid_t *
cs_grid_create_from_shared(cs_lnum_t n_faces,const cs_lnum_t * db_size,const cs_lnum_t * eb_size,const cs_lnum_2_t * face_cell,const cs_real_t * cell_cen,const cs_real_t * cell_vol,const cs_real_t * face_normal,const cs_matrix_t * a,const cs_matrix_t * a_conv,const cs_matrix_t * a_diff)5017 cs_grid_create_from_shared(cs_lnum_t n_faces,
5018 const cs_lnum_t *db_size,
5019 const cs_lnum_t *eb_size,
5020 const cs_lnum_2_t *face_cell,
5021 const cs_real_t *cell_cen,
5022 const cs_real_t *cell_vol,
5023 const cs_real_t *face_normal,
5024 const cs_matrix_t *a,
5025 const cs_matrix_t *a_conv,
5026 const cs_matrix_t *a_diff)
5027 {
5028 cs_lnum_t ii;
5029
5030 cs_grid_t *g = NULL;
5031
5032 /* Create empty structure and map base data */
5033
5034 g = _create_grid();
5035
5036 g->level = 0;
5037 g->conv_diff = false;
5038 g->symmetric = cs_matrix_is_symmetric(a);
5039
5040 if (db_size != NULL) {
5041 for (ii = 0; ii < 4; ii++)
5042 g->db_size[ii] = db_size[ii];
5043 }
5044 else {
5045 for (ii = 0; ii < 4; ii++)
5046 g->db_size[ii] = 1;
5047 }
5048
5049 if (eb_size != NULL) {
5050 for (ii = 0; ii < 4; ii++)
5051 g->eb_size[ii] = eb_size[ii];
5052 }
5053 else {
5054 for (ii = 0; ii < 4; ii++)
5055 g->eb_size[ii] = 1;
5056 }
5057
5058 g->n_rows = cs_matrix_get_n_rows(a);
5059 g->n_cols_ext = cs_matrix_get_n_columns(a);
5060 g->n_faces = n_faces;
5061 g->n_g_rows = g->n_rows;
5062
5063 #if defined(HAVE_MPI)
5064 if (cs_glob_n_ranks > 1) {
5065 cs_gnum_t _n_rows = g->n_rows;
5066 MPI_Allreduce(&_n_rows, &(g->n_g_rows), 1, CS_MPI_GNUM, MPI_SUM,
5067 cs_glob_mpi_comm);
5068 }
5069 #endif
5070
5071 if (cs_matrix_is_mapped_from_native(a))
5072 g->face_cell = face_cell;
5073
5074 g->relaxation = 0;
5075
5076 g->cell_cen = cell_cen;
5077 g->cell_vol = cell_vol;
5078 g->face_normal = face_normal;
5079
5080 g->halo = cs_matrix_get_halo(a);
5081
5082 /* Set shared matrix coefficients */
5083
5084 if (cs_matrix_is_mapped_from_native(a)) {
5085 g->da = cs_matrix_get_diagonal(a);
5086 g->xa= cs_matrix_get_extra_diagonal(a);
5087 }
5088
5089 if (a_conv != NULL || a_diff != NULL) {
5090 g->conv_diff = true;
5091 g->da_conv = cs_matrix_get_diagonal(a_conv);
5092 g->da_diff = cs_matrix_get_diagonal(a_diff);
5093 g->xa_conv = cs_matrix_get_extra_diagonal(a_conv);
5094 g->xa_diff = cs_matrix_get_extra_diagonal(a_diff);
5095 }
5096
5097 if (g->face_cell != NULL) {
5098
5099 /* Build symmetrized extra-diagonal terms if necessary,
5100 or point to existing terms if already symmetric */
5101
5102 if (g->symmetric == true) {
5103 g->xa0 = g->xa;
5104 g->_xa0 = NULL;
5105 }
5106 else if (g->conv_diff) {
5107 g->xa0 = g->xa;
5108 g->_xa0 = NULL;
5109 g->xa0_diff = g->xa_diff;
5110 g->_xa0_diff = NULL;
5111 }
5112 else {
5113 BFT_MALLOC(g->_xa0, n_faces, cs_real_t);
5114 for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++)
5115 g->_xa0[face_id] = 0.5 * (g->xa[face_id*2] + g->xa[face_id*2+1]);
5116 g->xa0 = g->_xa0;
5117 }
5118
5119 /* Compute multigrid-specific terms */
5120
5121 BFT_MALLOC(g->xa0ij, n_faces*3, cs_real_t);
5122
5123 const cs_real_t *restrict g_xa0 = g->xa0;
5124 if (g->conv_diff)
5125 g_xa0 = g->xa0_diff;
5126
5127 # pragma omp parallel for if(n_faces > CS_THR_MIN)
5128 for (cs_lnum_t face_id = 0; face_id < n_faces; face_id++) {
5129 cs_lnum_t i0 = face_cell[face_id][0];
5130 cs_lnum_t i1 = face_cell[face_id][1];
5131 for (cs_lnum_t kk = 0; kk < 3; kk++) {
5132 g->xa0ij[face_id*3 + kk] = g_xa0[face_id]
5133 * ( cell_cen[i1*3 + kk]
5134 - cell_cen[i0*3 + kk]);
5135 }
5136 }
5137
5138 }
5139
5140 g->matrix_struct = NULL;
5141 g->matrix = a;
5142 g->_matrix = NULL;
5143
5144 return g;
5145 }
5146
5147 /*----------------------------------------------------------------------------
5148 * Create base grid by mapping from parent (possibly shared) matrix.
5149 *
5150 * Note that as arrays given as arguments are shared by the created grid
5151 * (which can only access them, not modify them), the grid should be
5152 * destroyed before those arrays.
5153 *
5154 * parameters:
5155 * a <-- associated matrix
5156 * n_ranks <-- number of active ranks (<= 1 to restrict to local values)
5157 *
5158 * returns:
5159 * base grid structure
5160 *----------------------------------------------------------------------------*/
5161
5162 cs_grid_t *
cs_grid_create_from_parent(const cs_matrix_t * a,int n_ranks)5163 cs_grid_create_from_parent(const cs_matrix_t *a,
5164 int n_ranks)
5165 {
5166 cs_grid_t *g = NULL;
5167
5168 /* Create empty structure and map base data */
5169
5170 g = _create_grid();
5171
5172 bool local = true;
5173 const cs_halo_t *h = cs_matrix_get_halo(a);
5174 if (h != NULL) {
5175 local = false;
5176 if (h->n_c_domains == 1) {
5177 if (h->c_domain_rank[0] == cs_glob_rank_id)
5178 local = true;
5179 }
5180 }
5181
5182 if (n_ranks > 1 || local) {
5183 g->matrix = a;
5184 #if defined(HAVE_MPI)
5185 g->comm = cs_base_get_rank_step_comm(g->merge_stride);
5186 MPI_Comm_size(g->comm, &(g->n_ranks));
5187 #endif
5188 }
5189 else {
5190 g->_matrix = cs_matrix_create_by_local_restrict(a);
5191 g->matrix = g->_matrix;
5192 #if defined(HAVE_MPI)
5193 g->comm = cs_base_get_rank_step_comm(g->merge_stride);
5194 MPI_Comm_size(g->comm, &(g->n_ranks));
5195 #endif
5196 }
5197
5198 g->level = 0;
5199 g->symmetric = cs_matrix_is_symmetric(g->matrix);
5200
5201 const cs_lnum_t *db_size = cs_matrix_get_diag_block_size(g->matrix);
5202 const cs_lnum_t *eb_size = cs_matrix_get_extra_diag_block_size(g->matrix);
5203
5204 for (cs_lnum_t ii = 0; ii < 4; ii++)
5205 g->db_size[ii] = db_size[ii];
5206
5207 for (cs_lnum_t ii = 0; ii < 4; ii++)
5208 g->eb_size[ii] = eb_size[ii];
5209
5210 g->n_rows = cs_matrix_get_n_rows(g->matrix);
5211 g->n_cols_ext = cs_matrix_get_n_columns(g->matrix);
5212 g->halo = cs_matrix_get_halo(g->matrix);
5213
5214 g->n_g_rows = g->n_rows;
5215
5216 #if defined(HAVE_MPI)
5217 if (g->halo != NULL && g->comm != MPI_COMM_NULL) {
5218 cs_gnum_t _g_n_rows = g->n_rows;
5219 MPI_Allreduce(&_g_n_rows, &(g->n_g_rows), 1, CS_MPI_GNUM, MPI_SUM, g->comm);
5220 }
5221 #endif
5222
5223 return g;
5224 }
5225
5226 /*----------------------------------------------------------------------------
5227 * Destroy a grid structure.
5228 *
5229 * parameters:
5230 * grid <-> Pointer to grid structure pointer
5231 *----------------------------------------------------------------------------*/
5232
5233 void
cs_grid_destroy(cs_grid_t ** grid)5234 cs_grid_destroy(cs_grid_t **grid)
5235 {
5236 if (grid != NULL && *grid != NULL) {
5237
5238 cs_grid_t *g = *grid;
5239 cs_grid_free_quantities(g);
5240
5241 BFT_FREE(g->_face_cell);
5242
5243 BFT_FREE(g->coarse_row);
5244
5245 if (g->_halo != NULL)
5246 cs_halo_destroy(&(g->_halo));
5247
5248 BFT_FREE(g->_da);
5249 BFT_FREE(g->_xa);
5250
5251 cs_matrix_destroy(&(g->_matrix));
5252 cs_matrix_structure_destroy(&(g->matrix_struct));
5253
5254 #if defined(HAVE_MPI)
5255 BFT_FREE(g->merge_cell_idx);
5256 #endif
5257
5258 BFT_FREE(*grid);
5259 }
5260 }
5261
5262 /*----------------------------------------------------------------------------
5263 * Free a grid structure's associated quantities.
5264 *
5265 * The quantities required to compute a coarser grid with relaxation from a
5266 * given grid are not needed after that stage, so may be freed.
5267 *
5268 * parameters:
5269 * g <-> Pointer to grid structure
5270 *----------------------------------------------------------------------------*/
5271
5272 void
cs_grid_free_quantities(cs_grid_t * g)5273 cs_grid_free_quantities(cs_grid_t *g)
5274 {
5275 assert(g != NULL);
5276
5277 if (cs_matrix_get_type(g->matrix) != CS_MATRIX_NATIVE) {
5278 BFT_FREE(g->_face_cell);
5279 g->face_cell = NULL;
5280 BFT_FREE(g->_xa);
5281 g->_xa = NULL;
5282 if (cs_matrix_get_type(g->matrix) != CS_MATRIX_MSR) {
5283 BFT_FREE(g->_da);
5284 g->xa = NULL;
5285 }
5286 }
5287
5288 BFT_FREE(g->coarse_face);
5289
5290 BFT_FREE(g->_cell_cen);
5291 BFT_FREE(g->_cell_vol);
5292 BFT_FREE(g->_face_normal);
5293
5294 BFT_FREE(g->_da_conv);
5295 BFT_FREE(g->_da_diff);
5296 BFT_FREE(g->_xa_conv);
5297 BFT_FREE(g->_xa_diff);
5298 BFT_FREE(g->_xa0);
5299 BFT_FREE(g->_xa0_diff);
5300
5301 BFT_FREE(g->xa0ij);
5302 }
5303
5304 /*----------------------------------------------------------------------------
5305 * Get grid information.
5306 *
5307 * parameters:
5308 * g <-- Grid structure
5309 * level --> Level in multigrid hierarchy (or NULL)
5310 * symmetric --> Symmetric matrix coefficients indicator (or NULL)
5311 * db_size --> Size of the diagonal block (or NULL)
5312 * eb_size --> Size of the extra diagonal block (or NULL)
5313 * n_ranks --> number of ranks with data (or NULL)
5314 * n_rows --> Number of local rows (or NULL)
5315 * n_cols_ext --> Number of columns including ghosts (or NULL)
5316 * n_entries --> Number of entries (or NULL)
5317 * n_g_rows --> Number of global rows (or NULL)
5318 *----------------------------------------------------------------------------*/
5319
5320 void
cs_grid_get_info(const cs_grid_t * g,int * level,bool * symmetric,cs_lnum_t * db_size,cs_lnum_t * eb_size,int * n_ranks,cs_lnum_t * n_rows,cs_lnum_t * n_cols_ext,cs_lnum_t * n_entries,cs_gnum_t * n_g_rows)5321 cs_grid_get_info(const cs_grid_t *g,
5322 int *level,
5323 bool *symmetric,
5324 cs_lnum_t *db_size,
5325 cs_lnum_t *eb_size,
5326 int *n_ranks,
5327 cs_lnum_t *n_rows,
5328 cs_lnum_t *n_cols_ext,
5329 cs_lnum_t *n_entries,
5330 cs_gnum_t *n_g_rows)
5331 {
5332 assert(g != NULL);
5333
5334 if (level != NULL)
5335 *level = g->level;
5336
5337 if (symmetric != NULL)
5338 *symmetric = g->symmetric;
5339
5340 if (db_size != NULL) {
5341 db_size[0] = g->db_size[0];
5342 db_size[1] = g->db_size[1];
5343 db_size[2] = g->db_size[2];
5344 db_size[3] = g->db_size[3];
5345 }
5346
5347 if (eb_size != NULL) {
5348 eb_size[0] = g->eb_size[0];
5349 eb_size[1] = g->eb_size[1];
5350 eb_size[2] = g->eb_size[2];
5351 eb_size[3] = g->eb_size[3];
5352 }
5353
5354 if (n_ranks != NULL) {
5355 #if defined(HAVE_MPI)
5356 *n_ranks = g->n_ranks;
5357 #else
5358 *n_ranks = 1;
5359 #endif
5360 }
5361
5362 if (n_rows != NULL)
5363 *n_rows = g->n_rows;
5364 if (n_cols_ext != NULL)
5365 *n_cols_ext = g->n_cols_ext;
5366 assert(g->n_rows <= g->n_cols_ext);
5367 if (n_entries != NULL) {
5368 if (g->matrix != NULL)
5369 *n_entries = cs_matrix_get_n_entries(g->matrix);
5370 else
5371 *n_entries = 0;
5372 }
5373
5374 if (n_g_rows != NULL)
5375 *n_g_rows = g->n_g_rows;
5376 }
5377
5378 /*----------------------------------------------------------------------------
5379 * Get number of rows corresponding to a grid.
5380 *
5381 * parameters:
5382 * g <-- Grid structure
5383 *
5384 * returns:
5385 * number of rows of grid structure
5386 *----------------------------------------------------------------------------*/
5387
5388 cs_lnum_t
cs_grid_get_n_rows(const cs_grid_t * g)5389 cs_grid_get_n_rows(const cs_grid_t *g)
5390 {
5391 assert(g != NULL);
5392
5393 return g->n_rows;
5394 }
5395
5396 /*----------------------------------------------------------------------------
5397 * Get number of extended (local + ghost) columns corresponding to a grid.
5398 *
5399 * parameters:
5400 * g <-- Grid structure
5401 *
5402 * returns:
5403 * number of extended rows of grid structure
5404 *----------------------------------------------------------------------------*/
5405
5406 cs_lnum_t
cs_grid_get_n_cols_ext(const cs_grid_t * g)5407 cs_grid_get_n_cols_ext(const cs_grid_t *g)
5408 {
5409 assert(g != NULL);
5410
5411 return g->n_cols_ext;
5412 }
5413
5414 /*----------------------------------------------------------------------------
5415 * Get maximum number of extended (local + ghost) columns corresponding to
5416 * a grid, both with and without merging between ranks
5417 *
5418 * parameters:
5419 * g <-- Grid structure
5420 *
5421 * returns:
5422 * maximum number of extended cells of grid structure, with or without
5423 * merging
5424 *----------------------------------------------------------------------------*/
5425
5426 cs_lnum_t
cs_grid_get_n_cols_max(const cs_grid_t * g)5427 cs_grid_get_n_cols_max(const cs_grid_t *g)
5428 {
5429 cs_lnum_t retval = 0;
5430
5431 if (g != NULL)
5432 retval = CS_MAX(g->n_cols_ext, g->n_elts_r[1]);
5433
5434 return retval;
5435 }
5436
5437 /*----------------------------------------------------------------------------
5438 * Get global number of rows corresponding to a grid.
5439 *
5440 * parameters:
5441 * g <-- Grid structure
5442 *
5443 * returns:
5444 * global number of rows of grid structure
5445 *----------------------------------------------------------------------------*/
5446
5447 cs_gnum_t
cs_grid_get_n_g_rows(const cs_grid_t * g)5448 cs_grid_get_n_g_rows(const cs_grid_t *g)
5449 {
5450 assert(g != NULL);
5451
5452 return g->n_g_rows;
5453 }
5454
5455 /*----------------------------------------------------------------------------
5456 * Get grid's associated matrix information.
5457 *
5458 * parameters:
5459 * g <-- Grid structure
5460 *
5461 * returns:
5462 * pointer to matrix structure
5463 *----------------------------------------------------------------------------*/
5464
5465 const cs_matrix_t *
cs_grid_get_matrix(const cs_grid_t * g)5466 cs_grid_get_matrix(const cs_grid_t *g)
5467 {
5468 const cs_matrix_t *m = NULL;
5469
5470 assert(g != NULL);
5471
5472 m = g->matrix;
5473
5474 return m;
5475 }
5476
5477 #if defined(HAVE_MPI)
5478
5479 /*----------------------------------------------------------------------------
5480 * Get the MPI subcommunicator for a given grid.
5481 *
5482 * parameters:
5483 * g <-- Grid structure
5484 *
5485 * returns:
5486 * MPI communicator
5487 *----------------------------------------------------------------------------*/
5488
5489 MPI_Comm
cs_grid_get_comm(const cs_grid_t * g)5490 cs_grid_get_comm(const cs_grid_t *g)
5491 {
5492 assert(g != NULL);
5493
5494 return g->comm;
5495 }
5496
5497 /*----------------------------------------------------------------------------
5498 * Get the MPI subcommunicator for a given merge stride.
5499 *
5500 * parameters:
5501 * parent <-- parent MPI communicator
5502 * merge_stride <-- associated merge stride
5503 *
5504 * returns:
5505 * MPI communicator
5506 *----------------------------------------------------------------------------*/
5507
5508 MPI_Comm
cs_grid_get_comm_merge(MPI_Comm parent,int merge_stride)5509 cs_grid_get_comm_merge(MPI_Comm parent,
5510 int merge_stride)
5511 {
5512 MPI_Comm comm = MPI_COMM_NULL;
5513
5514 if (parent != MPI_COMM_NULL) {
5515 int size;
5516 MPI_Comm_size(parent, &size);
5517 int rank_step = cs_glob_n_ranks / size;
5518 if (cs_glob_n_ranks %size > 0)
5519 rank_step += 1;
5520 rank_step *= merge_stride;
5521 comm = cs_base_get_rank_step_comm(rank_step);
5522 }
5523
5524 return comm;
5525 }
5526
5527 #endif
5528
5529 /*----------------------------------------------------------------------------
5530 * Create coarse grid from fine grid.
5531 *
5532 * parameters:
5533 * f <-- Fine grid structure
5534 * coarsening_type <-- Coarsening criteria type
5535 * aggregation_limit <-- Maximum allowed fine rows per coarse rows
5536 * verbosity <-- Verbosity level
5537 * merge_stride <-- Associated merge stride
5538 * merge_rows_mean_threshold <-- mean number of rows under which
5539 * merging should be applied
5540 * merge_rows_glob_threshold <-- global number of rows under which
5541 * merging should be applied
5542 * relaxation_parameter <-- P0/P1 relaxation factor
5543 *
5544 * returns:
5545 * coarse grid structure
5546 *----------------------------------------------------------------------------*/
5547
5548 cs_grid_t *
cs_grid_coarsen(const cs_grid_t * f,int coarsening_type,int aggregation_limit,int verbosity,int merge_stride,int merge_rows_mean_threshold,cs_gnum_t merge_rows_glob_threshold,double relaxation_parameter)5549 cs_grid_coarsen(const cs_grid_t *f,
5550 int coarsening_type,
5551 int aggregation_limit,
5552 int verbosity,
5553 int merge_stride,
5554 int merge_rows_mean_threshold,
5555 cs_gnum_t merge_rows_glob_threshold,
5556 double relaxation_parameter)
5557 {
5558 int recurse = 0;
5559
5560 cs_lnum_t isym = 2;
5561 bool conv_diff = f->conv_diff;
5562
5563 /* By default, always use MSR structure, as it usually provides the
5564 best performance, and is required for the hybrid Gauss-Seidel-Jacobi
5565 smoothers. In multithreaded case, we also prefer to use a matrix
5566 structure allowing threading without a specific renumbering, as
5567 structures are rebuilt often (so only CSR and MSR can be considered) */
5568
5569 cs_matrix_type_t fine_matrix_type = cs_matrix_get_type(f->matrix);
5570 cs_matrix_type_t coarse_matrix_type = CS_MATRIX_MSR;
5571
5572 cs_matrix_variant_t *coarse_mv = NULL;
5573
5574 cs_grid_t *c = NULL;
5575
5576 const cs_lnum_t *db_size = f->db_size;
5577
5578 assert(f != NULL);
5579
5580 /* Initialization */
5581
5582 c = _coarse_init(f);
5583
5584 if (f->symmetric == true)
5585 isym = 1;
5586
5587 c->relaxation = relaxation_parameter;
5588 if (f->face_cell == NULL && c->relaxation > 0)
5589 c->relaxation = 0;
5590
5591 /* Ensure default is available */
5592
5593 if (coarsening_type == CS_GRID_COARSENING_DEFAULT) {
5594 if (f->face_cell != NULL) {
5595 if (f->conv_diff == false)
5596 coarsening_type = CS_GRID_COARSENING_SPD_DX;
5597 else
5598 coarsening_type = CS_GRID_COARSENING_CONV_DIFF_DX;
5599 }
5600 else
5601 coarsening_type = CS_GRID_COARSENING_SPD_MX;
5602 }
5603 else if (coarsening_type == CS_GRID_COARSENING_SPD_DX) {
5604 /* closest altenative */
5605 if (f->face_cell == NULL)
5606 coarsening_type = CS_GRID_COARSENING_SPD_MX;
5607 }
5608
5609 else if (coarsening_type == CS_GRID_COARSENING_SPD_PW) {
5610 /* closest altenative */
5611 if (fine_matrix_type == CS_MATRIX_NATIVE)
5612 coarsening_type = CS_GRID_COARSENING_SPD_MX;
5613 }
5614
5615 else if (coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
5616 /* closest altenative */
5617 if (f->face_cell == NULL)
5618 coarsening_type = CS_GRID_COARSENING_SPD_MX;
5619 }
5620
5621 /* Determine fine->coarse cell connectivity (aggregation) */
5622
5623 if ( coarsening_type == CS_GRID_COARSENING_SPD_DX
5624 || coarsening_type == CS_GRID_COARSENING_CONV_DIFF_DX) {
5625 if (f->face_cell != NULL)
5626 _automatic_aggregation_fc(f,
5627 coarsening_type,
5628 aggregation_limit,
5629 c->relaxation,
5630 verbosity,
5631 c->coarse_row);
5632 }
5633 else if (coarsening_type == CS_GRID_COARSENING_SPD_MX) {
5634 switch (fine_matrix_type) {
5635 case CS_MATRIX_NATIVE:
5636 _automatic_aggregation_mx_native(f, aggregation_limit, verbosity,
5637 c->coarse_row);
5638 break;
5639 case CS_MATRIX_MSR:
5640 _automatic_aggregation_mx_msr(f, aggregation_limit, verbosity,
5641 c->coarse_row);
5642 break;
5643 default:
5644 bft_error(__FILE__, __LINE__, 0,
5645 _("%s: selected coarsening type (%s)\n"
5646 "not available for %s structured matrices."),
5647 __func__, cs_grid_coarsening_type_name[coarsening_type],
5648 _(cs_matrix_get_type_name(f->matrix)));
5649 }
5650 }
5651 else if (coarsening_type == CS_GRID_COARSENING_SPD_PW) {
5652 switch (fine_matrix_type) {
5653 case CS_MATRIX_MSR:
5654 _automatic_aggregation_pw_msr(f, verbosity, c->coarse_row);
5655 if (aggregation_limit > 2)
5656 recurse = 2;
5657 break;
5658 default:
5659 bft_error(__FILE__, __LINE__, 0,
5660 _("%s: selected coarsening type (%s)\n"
5661 "not available for %s structured matrices."),
5662 __func__, cs_grid_coarsening_type_name[coarsening_type],
5663 _(cs_matrix_get_type_name(f->matrix)));
5664 }
5665 }
5666
5667 _coarsen(f, c);
5668
5669 if (verbosity > 3)
5670 _aggregation_stats_log(f, c, verbosity);
5671
5672 BFT_MALLOC(c->_da, c->n_cols_ext * c->db_size[3], cs_real_t);
5673 c->da = c->_da;
5674
5675 BFT_MALLOC(c->_xa, c->n_faces*isym, cs_real_t);
5676 c->xa = c->_xa;
5677
5678 if ( (fine_matrix_type == CS_MATRIX_NATIVE || f->face_cell != NULL)
5679 && c->relaxation > 0) {
5680
5681 /* Allocate permanent arrays in coarse grid */
5682
5683 BFT_MALLOC(c->_cell_cen, c->n_cols_ext*3, cs_real_t);
5684 c->cell_cen = c->_cell_cen;
5685
5686 BFT_MALLOC(c->_cell_vol, c->n_cols_ext, cs_real_t);
5687 c->cell_vol = c->_cell_vol;
5688
5689 BFT_MALLOC(c->_face_normal, c->n_faces*3, cs_real_t);
5690 c->face_normal = c->_face_normal;
5691
5692 if (conv_diff) {
5693 BFT_MALLOC(c->_da_conv, c->n_cols_ext * c->db_size[3], cs_real_t);
5694 c->da_conv = c->_da_conv;
5695 BFT_MALLOC(c->_da_diff, c->n_cols_ext * c->db_size[3], cs_real_t);
5696 c->da_diff = c->_da_diff;
5697 BFT_MALLOC(c->_xa_conv, c->n_faces*2, cs_real_t);
5698 c->xa_conv = c->_xa_conv;
5699 BFT_MALLOC(c->_xa_diff, c->n_faces, cs_real_t);
5700 c->xa_diff = c->_xa_diff;
5701 }
5702
5703 /* We could have xa0 point to xa if symmetric, but this would require
5704 caution in CRSTGR to avoid overwriting. */
5705
5706 BFT_MALLOC(c->_xa0, c->n_faces*isym, cs_real_t);
5707 c->xa0 = c->_xa0;
5708
5709 if (conv_diff) {
5710 BFT_MALLOC(c->_xa0_diff, c->n_faces, cs_real_t);
5711 c->xa0_diff = c->_xa0_diff;
5712 }
5713
5714 BFT_MALLOC(c->xa0ij, c->n_faces*3, cs_real_t);
5715
5716 /* Matrix-related data */
5717
5718 _compute_coarse_cell_quantities(f, c);
5719
5720 /* Synchronize grid's geometric quantities */
5721
5722 if (c->halo != NULL) {
5723
5724 cs_halo_sync_var_strided(c->halo, CS_HALO_STANDARD, c->_cell_cen, 3);
5725 if (c->halo->n_transforms > 0)
5726 cs_halo_perio_sync_coords(c->halo, CS_HALO_STANDARD, c->_cell_cen);
5727
5728 cs_halo_sync_var(c->halo, CS_HALO_STANDARD, c->_cell_vol);
5729
5730 }
5731
5732 }
5733
5734 if (fine_matrix_type == CS_MATRIX_MSR && c->relaxation <= 0) {
5735
5736 _compute_coarse_quantities_msr(f, c);
5737
5738 /* Merge grids if we are below the threshold */
5739 #if defined(HAVE_MPI)
5740 if (merge_stride > 1 && c->n_ranks > 1 && recurse == 0) {
5741 cs_gnum_t _n_ranks = c->n_ranks;
5742 cs_gnum_t _n_mean_g_rows = c->n_g_rows / _n_ranks;
5743 if ( _n_mean_g_rows < (cs_gnum_t)merge_rows_mean_threshold
5744 || c->n_g_rows < merge_rows_glob_threshold) {
5745 _native_from_msr(c);
5746 _merge_grids(c, merge_stride, verbosity);
5747 _msr_from_native(c);
5748 }
5749 }
5750 #endif
5751
5752 }
5753
5754 else if (f->face_cell != NULL) {
5755
5756 if (conv_diff)
5757 _compute_coarse_quantities_conv_diff(f, c, verbosity);
5758 else
5759 _compute_coarse_quantities_native(f, c, verbosity);
5760
5761 /* Synchronize matrix's geometric quantities */
5762
5763 if (c->halo != NULL)
5764 cs_halo_sync_var_strided(c->halo, CS_HALO_STANDARD, c->_da, db_size[3]);
5765
5766 /* Merge grids if we are below the threshold */
5767
5768 #if defined(HAVE_MPI)
5769 if (merge_stride > 1 && c->n_ranks > 1 && recurse == 0) {
5770 cs_gnum_t _n_ranks = c->n_ranks;
5771 cs_gnum_t _n_mean_g_rows = c->n_g_rows / _n_ranks;
5772 if ( _n_mean_g_rows < (cs_gnum_t)merge_rows_mean_threshold
5773 || c->n_g_rows < merge_rows_glob_threshold)
5774 _merge_grids(c, merge_stride, verbosity);
5775 }
5776 #endif
5777
5778 c->matrix_struct = cs_matrix_structure_create(coarse_matrix_type,
5779 true,
5780 c->n_rows,
5781 c->n_cols_ext,
5782 c->n_faces,
5783 c->face_cell,
5784 c->halo,
5785 NULL);
5786
5787 c->_matrix = cs_matrix_create(c->matrix_struct);
5788
5789 cs_matrix_set_coefficients(c->_matrix,
5790 c->symmetric,
5791 c->db_size,
5792 c->eb_size,
5793 c->n_faces,
5794 c->face_cell,
5795 c->da,
5796 c->xa);
5797
5798 c->matrix = c->_matrix;
5799
5800 /* Apply tuning if needed */
5801
5802 if (_grid_tune_max_level > 0) {
5803
5804 cs_matrix_fill_type_t mft
5805 = cs_matrix_get_fill_type(f->symmetric,
5806 f->db_size,
5807 f->eb_size);
5808
5809 if (_grid_tune_max_level > f->level) {
5810 int k = CS_MATRIX_N_FILL_TYPES*(f->level) + mft;
5811 coarse_mv = _grid_tune_variant[k];
5812
5813 /* Create tuned variant upon first pass for this level and
5814 fill type */
5815
5816 if ( coarse_mv == NULL
5817 && _grid_tune_max_fill_level[mft] > f->level) {
5818
5819 cs_log_printf(CS_LOG_PERFORMANCE,
5820 _("\n"
5821 "Tuning for coarse matrices of level %d and type: %s\n"
5822 "==========================\n"),
5823 f->level + 1, cs_matrix_fill_type_name[mft]);
5824
5825 int n_min_products;
5826 double t_measure;
5827
5828 cs_matrix_get_tuning_runs(&n_min_products, &t_measure);
5829
5830 coarse_mv = cs_matrix_variant_tuned(c->matrix,
5831 1,
5832 n_min_products,
5833 t_measure);
5834
5835 _grid_tune_variant[k] = coarse_mv;
5836
5837 if (_grid_tune_max_fill_level[mft] == f->level + 1) {
5838 cs_log_printf(CS_LOG_PERFORMANCE, "\n");
5839 cs_log_separator(CS_LOG_PERFORMANCE);
5840 }
5841 }
5842
5843 }
5844
5845 }
5846
5847 if (coarse_mv != NULL)
5848 cs_matrix_variant_apply(c->_matrix, coarse_mv);
5849 }
5850
5851 if (c->matrix == NULL) {
5852 assert(c->n_rows == 0);
5853 _build_coarse_matrix_null(c, coarse_matrix_type);
5854 }
5855
5856 /* Recurse if necessary */
5857
5858 if (recurse > 1) {
5859
5860 /* Build coarser grid from coarse grid */
5861 cs_grid_t *cc = cs_grid_coarsen(c,
5862 coarsening_type,
5863 aggregation_limit / recurse,
5864 verbosity,
5865 merge_stride,
5866 merge_rows_mean_threshold,
5867 merge_rows_glob_threshold,
5868 relaxation_parameter);
5869
5870 /* Project coarsening */
5871
5872 _project_coarse_row_to_parent(cc);
5873 BFT_FREE(cc->coarse_row);
5874 cc->coarse_row = c->coarse_row;
5875 c->coarse_row = NULL;
5876
5877 if (c->face_cell != NULL) {
5878 BFT_FREE(cc->coarse_face);
5879 BFT_FREE(cc->_face_cell);
5880 _coarsen_faces(f,
5881 cc->coarse_row,
5882 cc->n_rows,
5883 &(cc->n_faces),
5884 &(cc->coarse_face),
5885 &(cc->_face_cell));
5886 cc->face_cell = (const cs_lnum_2_t *)(cc->_face_cell);
5887 }
5888
5889 /* Keep coarsest grid only */
5890
5891 cc->level -=1;
5892 cc->parent = f;
5893
5894 assert(cc->parent == c->parent);
5895
5896 cs_grid_destroy(&c);
5897 c = cc;
5898 }
5899
5900 /* Optional verification */
5901
5902 if (verbosity > 3) {
5903 if (f->level == 0)
5904 _verify_matrix(f);
5905 _verify_matrix(c);
5906 }
5907
5908 /* Return new (coarse) grid */
5909
5910 return c;
5911 }
5912
5913 /*----------------------------------------------------------------------------
5914 * Create coarse grid with only one row per rank from fine grid.
5915 *
5916 * parameters:
5917 * f <-- Fine grid structure
5918 * merge_stride <-- Associated merge stride
5919 * verbosity <-- Verbosity level
5920 *
5921 * returns:
5922 * coarse grid structure
5923 *----------------------------------------------------------------------------*/
5924
5925 cs_grid_t *
cs_grid_coarsen_to_single(const cs_grid_t * f,int merge_stride,int verbosity)5926 cs_grid_coarsen_to_single(const cs_grid_t *f,
5927 int merge_stride,
5928 int verbosity)
5929 {
5930 cs_lnum_t isym = 2;
5931
5932 /* By default, always use MSR structure, as it often seems to provide the
5933 best performance, and is required for the hybrid Gauss-Seidel-Jacobi
5934 smoothers. In multithreaded case, we also prefer to use a matrix
5935 structure allowing threading without a specific renumbering, as
5936 structures are rebuilt often (so only CSR and MSR can be considered) */
5937
5938 cs_matrix_type_t fine_matrix_type = cs_matrix_get_type(f->matrix);
5939
5940 cs_grid_t *c = NULL;
5941
5942 const cs_lnum_t *db_size = f->db_size;
5943
5944 assert(f != NULL);
5945
5946 /* Initialization */
5947
5948 c = _coarse_init(f);
5949
5950 if (f->symmetric == true)
5951 isym = 1;
5952
5953 c->relaxation = 0;
5954
5955 /* All to a single row */
5956
5957 for (cs_lnum_t i = 0; i < f->n_rows; i++)
5958 c->coarse_row[i] = 0;
5959
5960 _coarsen(f, c);
5961
5962 if (verbosity > 3)
5963 _aggregation_stats_log(f, c, verbosity);
5964
5965 if (fine_matrix_type == CS_MATRIX_MSR) {
5966 _compute_coarse_quantities_msr(f, c);
5967
5968 #if defined(HAVE_MPI)
5969 if (c->n_ranks > 1 && merge_stride > 1) {
5970 _native_from_msr(c);
5971 _merge_grids(c, merge_stride, verbosity);
5972 _msr_from_native(c);
5973 }
5974 #endif
5975 }
5976
5977 else if (f->face_cell != NULL) {
5978
5979 BFT_MALLOC(c->_da, c->n_cols_ext * c->db_size[3], cs_real_t);
5980 c->da = c->_da;
5981
5982 BFT_MALLOC(c->_xa, c->n_faces*isym, cs_real_t);
5983 c->xa = c->_xa;
5984
5985 _compute_coarse_quantities_native(f, c, verbosity);
5986
5987 /* Synchronize matrix's geometric quantities */
5988
5989 if (c->halo != NULL)
5990 cs_halo_sync_var_strided(c->halo, CS_HALO_STANDARD, c->_da, db_size[3]);
5991
5992 /* Merge grids if we are below the threshold */
5993
5994 #if defined(HAVE_MPI)
5995 if (c->n_ranks > 1 && merge_stride > 1)
5996 _merge_grids(c, merge_stride, verbosity);
5997 #endif
5998
5999 _msr_from_native(c);
6000 }
6001
6002 c->matrix = c->_matrix;
6003
6004 /* Optional verification */
6005
6006 if (verbosity > 3) {
6007 if (f->level == 0)
6008 _verify_matrix(f);
6009 _verify_matrix(c);
6010 }
6011
6012 /* Return new (coarse) grid */
6013
6014 return c;
6015 }
6016
6017 /*----------------------------------------------------------------------------
6018 * Compute coarse row variable values from fine row values
6019 *
6020 * parameters:
6021 * f <-- Fine grid structure
6022 * c <-- Fine grid structure
6023 * f_var <-- Variable defined on fine grid rows
6024 * c_var --> Variable defined on coarse grid rows
6025 *----------------------------------------------------------------------------*/
6026
6027 void
cs_grid_restrict_row_var(const cs_grid_t * f,const cs_grid_t * c,const cs_real_t * f_var,cs_real_t * c_var)6028 cs_grid_restrict_row_var(const cs_grid_t *f,
6029 const cs_grid_t *c,
6030 const cs_real_t *f_var,
6031 cs_real_t *c_var)
6032 {
6033 cs_lnum_t f_n_rows = f->n_rows;
6034 cs_lnum_t c_n_cols_ext = c->n_elts_r[1];
6035
6036 const cs_lnum_t *coarse_row;
6037 const cs_lnum_t *db_size = f->db_size;
6038
6039 assert(f != NULL);
6040 assert(c != NULL);
6041 assert(c->coarse_row != NULL || f_n_rows == 0);
6042 assert(f_var != NULL || f_n_rows == 0);
6043 assert(c_var != NULL || c_n_cols_ext == 0);
6044
6045 /* Set coarse values */
6046
6047 coarse_row = c->coarse_row;
6048
6049 cs_lnum_t _c_n_cols_ext = c_n_cols_ext*db_size[0];
6050
6051 # pragma omp parallel for if(_c_n_cols_ext > CS_THR_MIN)
6052 for (cs_lnum_t ii = 0; ii < _c_n_cols_ext; ii++)
6053 c_var[ii] = 0.;
6054
6055 if (f->level == 0) { /* possible penalization at first level */
6056
6057 if (db_size[0] == 1) {
6058 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6059 cs_lnum_t i = coarse_row[ii];
6060 if (i >= 0)
6061 c_var[i] += f_var[ii];
6062 }
6063 }
6064 else {
6065 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6066 cs_lnum_t i = coarse_row[ii];
6067 if (i >= 0) {
6068 for (cs_lnum_t j = 0; j < db_size[0]; j++)
6069 c_var[i*db_size[1]+j] += f_var[ii*db_size[1]+j];
6070 }
6071 }
6072 }
6073 }
6074
6075 else {
6076 if (db_size[0] == 1) {
6077 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
6078 c_var[coarse_row[ii]] += f_var[ii];
6079 }
6080 else {
6081 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6082 cs_lnum_t i = coarse_row[ii];
6083 for (cs_lnum_t j = 0; j < db_size[0]; j++)
6084 c_var[i*db_size[1]+j] += f_var[ii*db_size[1]+j];
6085 }
6086 }
6087 }
6088
6089 #if defined(HAVE_MPI)
6090
6091 /* If grid merging has taken place, gather coarse data */
6092
6093 if (c->merge_sub_size > 1) {
6094
6095 MPI_Comm comm = cs_glob_mpi_comm;
6096 static const int tag = 'r'+'e'+'s'+'t'+'r'+'i'+'c'+'t';
6097
6098 /* Append data */
6099
6100 if (c->merge_sub_rank == 0) {
6101 int rank_id;
6102 MPI_Status status;
6103 assert(cs_glob_rank_id == c->merge_sub_root);
6104 for (rank_id = 1; rank_id < c->merge_sub_size; rank_id++) {
6105 cs_lnum_t n_recv = ( c->merge_cell_idx[rank_id+1]
6106 - c->merge_cell_idx[rank_id]);
6107 int dist_rank = c->merge_sub_root + c->merge_stride*rank_id;
6108 MPI_Recv(c_var + c->merge_cell_idx[rank_id]*db_size[1],
6109 n_recv*db_size[1], CS_MPI_REAL, dist_rank, tag, comm, &status);
6110 }
6111 }
6112 else
6113 MPI_Send(c_var, c->n_elts_r[0]*db_size[1], CS_MPI_REAL,
6114 c->merge_sub_root, tag, comm);
6115 }
6116
6117 #endif /* defined(HAVE_MPI) */
6118 }
6119
6120 /*----------------------------------------------------------------------------
6121 * Compute fine row variable values from coarse row values
6122 *
6123 * parameters:
6124 * c <-- Fine grid structure
6125 * f <-- Fine grid structure
6126 * c_var <-- Variable defined on coarse grid rows
6127 * f_var --> Variable defined on fine grid rows
6128 *----------------------------------------------------------------------------*/
6129
6130 void
cs_grid_prolong_row_var(const cs_grid_t * c,const cs_grid_t * f,cs_real_t * c_var,cs_real_t * f_var)6131 cs_grid_prolong_row_var(const cs_grid_t *c,
6132 const cs_grid_t *f,
6133 cs_real_t *c_var,
6134 cs_real_t *f_var)
6135 {
6136 const cs_lnum_t *coarse_row;
6137 const cs_real_t *_c_var = c_var;
6138
6139 const cs_lnum_t *db_size = f->db_size;
6140
6141 cs_lnum_t f_n_rows = f->n_rows;
6142
6143 assert(f != NULL);
6144 assert(c != NULL);
6145 assert(c->coarse_row != NULL || f_n_rows == 0);
6146 assert(f_var != NULL);
6147 assert(c_var != NULL);
6148
6149 #if defined(HAVE_MPI)
6150
6151 /* If grid merging has taken place, scatter coarse data */
6152
6153 if (c->merge_sub_size > 1) {
6154
6155 MPI_Comm comm = cs_glob_mpi_comm;
6156 static const int tag = 'p'+'r'+'o'+'l'+'o'+'n'+'g';
6157
6158 /* Append data */
6159
6160 if (c->merge_sub_rank == 0) {
6161 int rank_id;
6162 assert(cs_glob_rank_id == c->merge_sub_root);
6163 for (rank_id = 1; rank_id < c->merge_sub_size; rank_id++) {
6164 cs_lnum_t n_send = ( c->merge_cell_idx[rank_id+1]
6165 - c->merge_cell_idx[rank_id]);
6166 int dist_rank = c->merge_sub_root + c->merge_stride*rank_id;
6167 MPI_Send(c_var + c->merge_cell_idx[rank_id]*db_size[1],
6168 n_send*db_size[1], CS_MPI_REAL, dist_rank, tag, comm);
6169 }
6170 }
6171 else {
6172 MPI_Status status;
6173 MPI_Recv(c_var, c->n_elts_r[0]*db_size[1], CS_MPI_REAL,
6174 c->merge_sub_root, tag, comm, &status);
6175 }
6176 }
6177
6178 #endif /* defined(HAVE_MPI) */
6179
6180 /* Set fine values (possible penalization at first level) */
6181
6182 coarse_row = c->coarse_row;
6183
6184 if (f->level == 0) {
6185
6186 if (db_size[0] == 1) {
6187 # pragma omp parallel if(f_n_rows > CS_THR_MIN)
6188 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6189 cs_lnum_t ic = coarse_row[ii];
6190 if (ic >= 0) {
6191 f_var[ii] = _c_var[ic];
6192 }
6193 else
6194 f_var[ii] = 0;
6195 }
6196 }
6197 else {
6198 # pragma omp parallel if(f_n_rows > CS_THR_MIN)
6199 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6200 cs_lnum_t ic = coarse_row[ii];
6201 if (ic >= 0) {
6202 for (cs_lnum_t i = 0; i < db_size[0]; i++)
6203 f_var[ii*db_size[1]+i] = _c_var[ic*db_size[1]+i];
6204 }
6205 else {
6206 for (cs_lnum_t i = 0; i < db_size[0]; i++)
6207 f_var[ii*db_size[1]+i] = 0;
6208 }
6209 }
6210 }
6211
6212 }
6213
6214 else { /* coarser levels, no need for penaliation tests */
6215
6216 if (db_size[0] == 1) {
6217 # pragma omp parallel if(f_n_rows > CS_THR_MIN)
6218 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++)
6219 f_var[ii] = _c_var[coarse_row[ii]];
6220 }
6221 else {
6222 # pragma omp parallel if(f_n_rows > CS_THR_MIN)
6223 for (cs_lnum_t ii = 0; ii < f_n_rows; ii++) {
6224 cs_lnum_t ic = coarse_row[ii];
6225 for (cs_lnum_t i = 0; i < db_size[0]; i++)
6226 f_var[ii*db_size[1]+i] = _c_var[ic*db_size[1]+i];
6227 }
6228 }
6229
6230 }
6231 }
6232
6233 /*----------------------------------------------------------------------------
6234 * Project coarse grid row numbers to base grid.
6235 *
6236 * If a global coarse grid row number is larger than max_num, its
6237 * value modulo max_num is used.
6238 *
6239 * parameters:
6240 * g <-- Grid structure
6241 * n_base_rows <-- Number of rows in base grid
6242 * max_num <-- Values of c_row_num = global_num % max_num
6243 * c_row_num --> Global coarse row number (modulo max_num)
6244 *----------------------------------------------------------------------------*/
6245
6246 void
cs_grid_project_row_num(const cs_grid_t * g,cs_lnum_t n_base_rows,int max_num,int c_row_num[])6247 cs_grid_project_row_num(const cs_grid_t *g,
6248 cs_lnum_t n_base_rows,
6249 int max_num,
6250 int c_row_num[])
6251 {
6252 cs_lnum_t ii = 0;
6253 cs_gnum_t base_shift = 1;
6254 cs_gnum_t _max_num = max_num;
6255 cs_lnum_t n_max_rows = 0;
6256 cs_lnum_t *tmp_num_1 = NULL, *tmp_num_2 = NULL;
6257 const cs_grid_t *_g = g;
6258
6259 assert(g != NULL);
6260 assert(c_row_num != NULL);
6261
6262 /* Initialize array */
6263
6264 n_max_rows = g->n_rows;
6265 for (_g = g; _g != NULL; _g = _g->parent) {
6266 if (_g->n_rows > n_max_rows)
6267 n_max_rows = _g->n_rows;
6268 }
6269
6270 BFT_MALLOC(tmp_num_1, n_max_rows, cs_lnum_t);
6271
6272 /* Compute local base starting row number in parallel mode */
6273
6274 #if defined(HAVE_MPI)
6275 if (g->comm != MPI_COMM_NULL) {
6276 cs_gnum_t local_shift = g->n_rows;
6277 cs_gnum_t global_shift = 0;
6278 MPI_Scan(&local_shift, &global_shift, 1, CS_MPI_GNUM, MPI_SUM,
6279 g->comm);
6280 base_shift = 1 + global_shift - g->n_rows;
6281 }
6282 #endif
6283
6284 for (ii = 0; ii < g->n_rows; ii++)
6285 tmp_num_1[ii] = (cs_gnum_t)(ii + base_shift) % _max_num;
6286
6287 if (g->level > 0) {
6288
6289 /* Allocate temporary arrays */
6290
6291 BFT_MALLOC(tmp_num_2, n_max_rows, cs_lnum_t);
6292 for (cs_lnum_t i = 0; i < n_max_rows; i++)
6293 tmp_num_2[i] = -1; /* singleton */
6294
6295 for (_g = g; _g->level > 0; _g = _g->parent) {
6296
6297 cs_lnum_t n_parent_rows = _g->parent->n_rows;
6298
6299 #if defined(HAVE_MPI)
6300 _scatter_row_num(_g, tmp_num_1);
6301 #endif /* defined(HAVE_MPI) */
6302
6303 for (ii = 0; ii < n_parent_rows; ii++) {
6304 cs_lnum_t ic = _g->coarse_row[ii];
6305 if (ic >= 0)
6306 tmp_num_2[ii] = tmp_num_1[ic];
6307 }
6308
6309 for (ii = 0; ii < n_parent_rows; ii++)
6310 tmp_num_1[ii] = tmp_num_2[ii];
6311
6312 }
6313
6314 assert(_g->level == 0);
6315 assert(_g->n_rows == n_base_rows);
6316
6317 /* Free temporary arrays */
6318
6319 BFT_FREE(tmp_num_2);
6320 }
6321
6322 memcpy(c_row_num, tmp_num_1, n_base_rows*sizeof(int));
6323
6324 BFT_FREE(tmp_num_1);
6325 }
6326
6327 /*----------------------------------------------------------------------------
6328 * Project coarse grid row rank to base grid.
6329 *
6330 * parameters:
6331 * g <-- Grid structure
6332 * n_base_rows <-- Number of rows in base grid
6333 * f_row_rank --> Global coarse row rank projected to fine rows
6334 *----------------------------------------------------------------------------*/
6335
6336 void
cs_grid_project_row_rank(const cs_grid_t * g,cs_lnum_t n_base_rows,int f_row_rank[])6337 cs_grid_project_row_rank(const cs_grid_t *g,
6338 cs_lnum_t n_base_rows,
6339 int f_row_rank[])
6340 {
6341 cs_lnum_t ii;
6342 cs_lnum_t n_max_rows = 0;
6343 int *tmp_rank_1 = NULL, *tmp_rank_2 = NULL;
6344 const cs_grid_t *_g = g;
6345
6346 assert(g != NULL);
6347 assert(f_row_rank != NULL || g->n_rows == 0);
6348
6349 n_max_rows = g->n_rows;
6350 for (_g = g; _g != NULL; _g = _g->parent) {
6351 if (_g->n_rows > n_max_rows)
6352 n_max_rows = _g->n_rows;
6353 }
6354
6355 BFT_MALLOC(tmp_rank_1, n_max_rows, int);
6356
6357 for (ii = 0; ii < g->n_rows; ii++)
6358 tmp_rank_1[ii] = cs_glob_rank_id;
6359
6360 /* Project to finer levels */
6361
6362 if (g->level > 0) {
6363
6364 /* Allocate temporary arrays */
6365
6366 BFT_MALLOC(tmp_rank_2, n_max_rows, int);
6367
6368 for (_g = g; _g->level > 0; _g = _g->parent) {
6369
6370 cs_lnum_t n_parent_rows = _g->parent->n_rows;
6371
6372 _prolong_row_int(_g,
6373 _g->parent,
6374 tmp_rank_1,
6375 tmp_rank_2);
6376
6377 for (ii = 0; ii < n_parent_rows; ii++)
6378 tmp_rank_1[ii] = tmp_rank_2[ii];
6379
6380 }
6381
6382 assert(_g->level == 0);
6383 assert(_g->n_rows == n_base_rows);
6384
6385 /* Free temporary arrays */
6386
6387 BFT_FREE(tmp_rank_2);
6388 }
6389
6390 memcpy(f_row_rank, tmp_rank_1, n_base_rows*sizeof(int));
6391
6392 BFT_FREE(tmp_rank_1);
6393 }
6394
6395 /*----------------------------------------------------------------------------
6396 * Project variable from coarse grid to base grid
6397 *
6398 * parameters:
6399 * g <-- Grid structure
6400 * n_base_rows <-- Number of rows in base grid
6401 * c_var <-- Variable on coarse grid
6402 * f_var --> Variable projected to fine grid
6403 *----------------------------------------------------------------------------*/
6404
6405 void
cs_grid_project_var(const cs_grid_t * g,cs_lnum_t n_base_rows,const cs_real_t c_var[],cs_real_t f_var[])6406 cs_grid_project_var(const cs_grid_t *g,
6407 cs_lnum_t n_base_rows,
6408 const cs_real_t c_var[],
6409 cs_real_t f_var[])
6410 {
6411 cs_lnum_t ii;
6412 int i;
6413 cs_lnum_t n_max_rows = 0;
6414 cs_real_t *tmp_var_1 = NULL, *tmp_var_2 = NULL;
6415 const cs_grid_t *_g = g;
6416
6417 const cs_lnum_t *db_size = g->db_size;
6418
6419 assert(g != NULL);
6420 assert(c_var != NULL || g->n_rows == 0);
6421 assert(f_var != NULL);
6422
6423 n_max_rows = g->n_rows;
6424 for (_g = g; _g != NULL; _g = _g->parent) {
6425 if (_g->n_rows > n_max_rows)
6426 n_max_rows = _g->n_rows;
6427 }
6428
6429 BFT_MALLOC(tmp_var_1, n_max_rows*db_size[1], cs_real_t);
6430 memcpy(tmp_var_1, c_var, g->n_rows*db_size[1]*sizeof(cs_real_t));
6431
6432 /* Project to finer levels */
6433
6434 if (g->level > 0) {
6435
6436 /* Allocate temporary arrays */
6437
6438 BFT_MALLOC(tmp_var_2, n_max_rows*db_size[1], cs_real_t);
6439
6440 for (_g = g; _g->level > 0; _g = _g->parent) {
6441
6442 cs_lnum_t n_parent_rows = _g->parent->n_rows;
6443
6444 cs_grid_prolong_row_var(_g,
6445 _g->parent,
6446 tmp_var_1,
6447 tmp_var_2);
6448
6449 for (ii = 0; ii < n_parent_rows; ii++)
6450 for (i = 0; i < db_size[0]; i++)
6451 tmp_var_1[ii*db_size[1]+i] = tmp_var_2[ii*db_size[1]+i];
6452
6453 }
6454
6455 assert(_g->level == 0);
6456 assert(_g->n_rows == n_base_rows);
6457
6458 /* Free temporary arrays */
6459
6460 BFT_FREE(tmp_var_2);
6461 }
6462
6463 memcpy(f_var, tmp_var_1, n_base_rows*db_size[1]*sizeof(cs_real_t));
6464
6465 BFT_FREE(tmp_var_1);
6466 }
6467
6468 /*----------------------------------------------------------------------------
6469 * Compute diagonal dominance metric and project it to base grid
6470 *
6471 * parameters:
6472 * g <-- Grid structure
6473 * n_base_rows <-- Number of rows in base grid
6474 * diag_dom --> Diagonal dominance metric (on fine grid)
6475 *----------------------------------------------------------------------------*/
6476
6477 void
cs_grid_project_diag_dom(const cs_grid_t * g,cs_lnum_t n_base_rows,cs_real_t diag_dom[])6478 cs_grid_project_diag_dom(const cs_grid_t *g,
6479 cs_lnum_t n_base_rows,
6480 cs_real_t diag_dom[])
6481 {
6482 cs_real_t *dd = NULL;
6483 const cs_lnum_t *db_size = g->db_size;
6484
6485 assert(g != NULL);
6486 assert(diag_dom != NULL);
6487
6488 if (g->level == 0)
6489 dd = diag_dom;
6490 else
6491 BFT_MALLOC(dd, g->n_cols_ext*db_size[3], cs_real_t);
6492
6493 /* Compute coarse diagonal dominance */
6494
6495 cs_matrix_diag_dominance(g->matrix, dd);
6496
6497 /* Now project to finer levels */
6498
6499 if (dd != diag_dom) {
6500 cs_grid_project_var(g, n_base_rows, dd, diag_dom);
6501 BFT_FREE(dd);
6502 }
6503 }
6504
6505 /*----------------------------------------------------------------------------
6506 * Finalize global info related to multigrid solvers
6507 *----------------------------------------------------------------------------*/
6508
6509 void
cs_grid_finalize(void)6510 cs_grid_finalize(void)
6511 {
6512 if (_grid_tune_max_level > 0) {
6513
6514 for (int i = 0; i < _grid_tune_max_level; i++) {
6515 for (int j = 0; j < CS_MATRIX_N_FILL_TYPES; j++) {
6516 int k = CS_MATRIX_N_FILL_TYPES*i + j;
6517 if (_grid_tune_variant[k] != NULL)
6518 cs_matrix_variant_destroy(&(_grid_tune_variant[k]));
6519 }
6520 }
6521
6522 BFT_FREE(_grid_tune_variant);
6523 BFT_FREE(_grid_tune_max_fill_level);
6524
6525 _grid_tune_max_level = 0;
6526 }
6527 }
6528
6529 /*----------------------------------------------------------------------------
6530 * Dump grid structure
6531 *
6532 * parameters:
6533 * g <-- grid structure that should be dumped
6534 *----------------------------------------------------------------------------*/
6535
6536 void
cs_grid_dump(const cs_grid_t * g)6537 cs_grid_dump(const cs_grid_t *g)
6538 {
6539 cs_lnum_t i;
6540
6541 if (g == NULL) {
6542 bft_printf("\n\n grid: null\n");
6543 return;
6544 }
6545 bft_printf("\n"
6546 " grid: %p\n"
6547 " level: %d (parent: %p)\n"
6548 " n_rows: %d\n"
6549 " n_cols_ext: %d\n"
6550 " n_faces: %d\n"
6551 " n_g_cells: %d\n"
6552 " n_elts_r: [%d, %d]\n",
6553 (const void *)g, g->level, (const void *)(g->parent),
6554 (int)(g->n_rows), (int)(g->n_cols_ext),
6555 (int)(g->n_faces), (int)(g->n_g_rows),
6556 (int)(g->n_elts_r[0]), (int)(g->n_elts_r[1]));
6557
6558 #if defined(HAVE_MPI)
6559
6560 bft_printf("\n"
6561 " merge_sub_root: %d\n"
6562 " merge_sub_rank: %d\n"
6563 " merge_sub_size: %d\n"
6564 " merge_stride: %d\n"
6565 " next_merge_stride: %d\n"
6566 " n_ranks: %d\n",
6567 g->merge_sub_root, g->merge_sub_rank, g->merge_sub_size,
6568 g->merge_stride, g->next_merge_stride, g->n_ranks);
6569
6570 if (g->merge_cell_idx != NULL) {
6571 bft_printf(" merge_cell_idx\n");
6572 for (i = 0; i < g->merge_sub_size + 1; i++)
6573 bft_printf(" %ld: %ld\n", (long)i, (long)g->merge_cell_idx[i]);
6574 }
6575
6576 #endif
6577 bft_printf("\n"
6578 " face_cell: %p\n"
6579 " _face_cell: %p\n"
6580 " coarse_row: %p\n"
6581 " coarse_face: %p\n"
6582 " halo: %p\n",
6583 (const void *)g->face_cell, (const void *)g->_face_cell,
6584 (const void *)g->coarse_row, (const void *)g->coarse_face,
6585 (const void *)g->halo);
6586
6587 if (g->face_cell != NULL) {
6588 bft_printf("\n"
6589 " face -> cell connectivity;\n");
6590 for (i = 0; i < g->n_faces; i++)
6591 bft_printf(" %ld : %ld, %ld\n", (long)(i+1),
6592 (long)(g->face_cell[i][0]), (long)(g->face_cell[i][1]));
6593 }
6594
6595 if (g->coarse_row != NULL && g->parent != NULL) {
6596 bft_printf("\n"
6597 " coarse_row;\n");
6598 for (i = 0; i < g->parent->n_rows; i++)
6599 bft_printf(" %ld : %ld\n",
6600 (long)(i+1), (long)(g->coarse_row[i]));
6601 }
6602
6603 if (g->coarse_face != NULL && g->parent != NULL) {
6604 bft_printf("\n"
6605 " coarse_face;\n");
6606 for (i = 0; i < g->parent->n_faces; i++)
6607 bft_printf(" %ld : %ld\n",
6608 (long)(i+1), (long)(g->coarse_face[i]));
6609 }
6610
6611 cs_halo_dump(g->halo, 1);
6612 }
6613
6614 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
6615
6616 /*============================================================================
6617 * Public function definitions
6618 *============================================================================*/
6619
6620 /*----------------------------------------------------------------------------*/
6621 /*!
6622 * \brief Set matrix tuning behavior for multigrid coarse meshes.
6623 *
6624 * The finest mesh (level 0) is handled by the default tuning options,
6625 * so only coarser meshes are considered here.
6626 *
6627 * \param[in] fill_type associated matrix fill type
6628 * \param[in] max_level maximum level for which tuning is active
6629 */
6630 /*----------------------------------------------------------------------------*/
6631
6632 void
cs_grid_set_matrix_tuning(cs_matrix_fill_type_t fill_type,int max_level)6633 cs_grid_set_matrix_tuning(cs_matrix_fill_type_t fill_type,
6634 int max_level)
6635 {
6636 if (_grid_tune_max_level < max_level) {
6637
6638 if (_grid_tune_max_level == 0) {
6639 BFT_MALLOC(_grid_tune_max_fill_level, CS_MATRIX_N_FILL_TYPES, int);
6640 for (int i = 0; i < CS_MATRIX_N_FILL_TYPES; i++)
6641 _grid_tune_max_fill_level[i] = 0;
6642 }
6643
6644 BFT_REALLOC(_grid_tune_variant,
6645 CS_MATRIX_N_FILL_TYPES*max_level, cs_matrix_variant_t *);
6646
6647 for (int i = _grid_tune_max_level; i < max_level; i++) {
6648 for (int j = 0; j < CS_MATRIX_N_FILL_TYPES; j++) {
6649 _grid_tune_variant[CS_MATRIX_N_FILL_TYPES*i + j] = NULL;
6650 }
6651 }
6652
6653 _grid_tune_max_level = max_level;
6654 }
6655
6656 _grid_tune_max_fill_level[fill_type] = max_level;
6657 }
6658
6659 /*----------------------------------------------------------------------------*/
6660
6661 END_C_DECLS
6662