1 /*============================================================================
2  * Partitioner
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <assert.h>
34 #include <errno.h>
35 #include <locale.h>
36 #include <math.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 
41 #if defined(HAVE_MPI)
42 #include <mpi.h>
43 #endif
44 
45 /*----------------------------------------------------------------------------
46  * METIS library headers
47  *----------------------------------------------------------------------------*/
48 
49 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS)
50 
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54 
55 #if defined(HAVE_PARMETIS)
56 #include <parmetis.h>
57 #endif
58 
59 #if defined(HAVE_METIS)
60 #include <metis.h>
61 #endif
62 
63 #ifdef __cplusplus
64 }
65 #endif
66 
67 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) */
68 
69 /*----------------------------------------------------------------------------
70  * SCOTCH library headers
71  *----------------------------------------------------------------------------*/
72 
73 #if defined(HAVE_SCOTCH)
74 #include <stdint.h>
75 #include <scotch.h>
76 #elif defined(HAVE_PTSCOTCH)
77 #include <stdint.h>
78 #include <ptscotch.h>
79 #endif
80 
81 /*----------------------------------------------------------------------------
82  *  Local headers
83  *----------------------------------------------------------------------------*/
84 
85 #include "bft_error.h"
86 #include "bft_mem.h"
87 #include "bft_printf.h"
88 
89 #include "fvm_io_num.h"
90 
91 #include "cs_all_to_all.h"
92 #include "cs_base.h"
93 #include "cs_block_dist.h"
94 #include "cs_block_to_part.h"
95 #include "cs_file.h"
96 #include "cs_io.h"
97 #include "cs_log.h"
98 #include "cs_mesh.h"
99 #include "cs_mesh_builder.h"
100 #include "cs_order.h"
101 #include "cs_part_to_block.h"
102 #include "cs_timer.h"
103 
104 /*----------------------------------------------------------------------------
105  *  Header for the current file
106  *----------------------------------------------------------------------------*/
107 
108 #include "cs_partition.h"
109 
110 /*----------------------------------------------------------------------------*/
111 
112 BEGIN_C_DECLS
113 
114 /*============================================================================
115  * Public Type documentation
116  *============================================================================*/
117 
118 /*
119  * \enum cs_partition_stage_t cs_partition.h
120  *
121  * Partitioning is always done just after reading the mesh, unless a
122  * partitioning input file is available, in which case the partitioning
123  * read replaces this stage.
124  *
125  * When a mesh modification implying a change of cell connectivity graph
126  * is expected, the mesh may be re-partitioned after the pre-processing
127  * stage, prior to calculation. By default, re-partitioning is only done
128  * if the partitioning algorithm chosen for that stage is expected to
129  * produce different results due to the connectivity change. This is
130  * the case for graph-based algorithms such as those of METIS or SCOTCH,
131  * when mesh joining is defined, or additional periodic matching is defined
132  * (and the algorithm is not configured to ignore periodicity information).
133  *
134  * There are thus two possible partitioning stages:
135  *
136  * - CS_PARTITION_FOR_PREPROCESS, which is optional, and occurs
137  *   just  after reading the mesh.
138  * - CS_PARTITION_MAIN, which occurs just after reading the mesh if
139  *   it is the only stage,, or after mesh preprocessing (and before
140  *   computation), if the partitioning for preprocessing stage is
141  *   activated.
142  *
143  * The number of partitioning stages is determined automatically based on
144  * information provided through \ref cs_partition_set_preprocess_hints,
145  * but re-partitioning may also be forced or inhibited using the
146  * \ref cs_partition_set_preprocess function.
147  */
148 
149 /*
150  * \enum cs_partition_algorithm_t cs_partition.h
151  *
152  * Partitioning algorithm type.
153 
154  * If the default algorithm is selected, the choice will be based on the
155  * following priority, depending on available libraries:
156  * -  PT-SCOTCH (or SCOTCH if partitioning on one rank);
157  * -  ParMETIS (or METIS if partitioning on one rank);
158  * -  Morton space-filling curve (in bounding box)
159  *
160  * If both partitioning stages are active, the default for the preprocessing
161  * stage will be based on the Morton space-filling curve (in bounding box),
162  * as this should be cheaper, and the initial cell connectivity graph
163  * is usually expected to be modified during preprocessing.
164  *
165  * \var CS_PARTITION_DEFAULT           Default partitioning (based on stage)
166  * \var CS_PARTITION_SFC_MORTON_BOX    Morton (Z) curve in bounding box
167  * \var CS_PARTITION_SFC_MORTON_CUBE   Morton (Z) curve in bounding cube
168  * \var CS_PARTITION_SFC_HILBERT_BOX   Peano-Hilbert curve in bounding box
169  * \var CS_PARTITION_SFC_HILBERT_CUBE  Peano-Hilbert curve in bounding cube
170  * \var CS_PARTITION_SCOTCH            PT-SCOTCH or SCOTCH
171  * \var CS_PARTITION_METIS             ParMETIS or METIS
172  * \var CS_PARTITION_BLOCK             Unoptimized (naive) block partitioning
173  * \var CS_PARTITION_NONE              No repartitioning (for computation
174  *                                     stage after preprocessing)
175  */
176 
177 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
178 
179 /*=============================================================================
180  * Local Macro definitions
181  *============================================================================*/
182 
183 /*============================================================================
184  * Local Type definitions
185  *============================================================================*/
186 
187 typedef double  _vtx_coords_t[3];
188 
189 /*============================================================================
190  * Public function prototypes
191  *============================================================================*/
192 
193 /*============================================================================
194  * Static global variables
195  *============================================================================*/
196 
197 static cs_partition_algorithm_t   _part_algorithm[2] = {CS_PARTITION_DEFAULT,
198                                                         CS_PARTITION_DEFAULT};
199 static int                        _part_rank_step[2] = {1, 1};
200 static bool                       _part_ignore_perio[2] = {false, false};
201 
202 static int                        _part_compute_join_hint = false;
203 static int                        _part_compute_perio_hint = false;
204 static int                        _part_preprocess_active = 1; /* 0: inactive;
205                                                                   1: default;
206                                                                   2: active */
207 static int                        _part_write_output = 1;
208 static int                        _part_n_extra_partitions = 0;
209 static int                       *_part_extra_partitions_list = NULL;
210 
211 static bool                       _part_uniform_sfc_block_size = false;
212 
213 #if defined(WIN32) || defined(_WIN32)
214 static const char _dir_separator = '\\';
215 #else
216 static const char _dir_separator = '/';
217 #endif
218 
219 /*============================================================================
220  * Private function definitions
221  *============================================================================*/
222 
223 #if defined(HAVE_MPI)
224 
225 /*----------------------------------------------------------------------------
226  * Create a reduced communicator
227  *
228  * parameters:
229  *   part_step <-- step between active ranks
230  *
231  * returns:
232  *   handle to reduced communicator
233  *----------------------------------------------------------------------------*/
234 
235 static MPI_Comm
_init_reduced_communicator(int part_step)236 _init_reduced_communicator(int  part_step)
237 {
238   int n_ranks;
239   int ranges[1][3];
240   MPI_Group old_group, new_group;
241   MPI_Comm part_comm = MPI_COMM_NULL;
242 
243   n_ranks = cs_glob_n_ranks;
244 
245   MPI_Barrier(cs_glob_mpi_comm); /* For debugging */
246 
247   MPI_Comm_size(cs_glob_mpi_comm, &n_ranks);
248   MPI_Comm_group(cs_glob_mpi_comm, &old_group);
249 
250   ranges[0][0] = 0;
251   ranges[0][1] = n_ranks - 1;
252   ranges[0][2] = part_step;
253 
254   MPI_Group_range_incl(old_group, 1, ranges, &new_group);
255   MPI_Comm_create(cs_glob_mpi_comm, new_group, &part_comm);
256   MPI_Group_free(&new_group);
257 
258   MPI_Group_free(&old_group);
259 
260   MPI_Barrier(cs_glob_mpi_comm); /* For debugging */
261 
262   if (cs_glob_rank_id > -1 && cs_glob_rank_id % part_step) {
263     /* MPI_Comm_free(&part_comm) does not work outside of the group */
264     part_comm = MPI_COMM_NULL;
265   }
266 
267   return part_comm;
268 }
269 
270 #endif /* defined(HAVE_MPI) */
271 
272 /*----------------------------------------------------------------------------
273  * Display the distribution of cells per partition in serial mode
274  *
275  * parameters:
276  *   cell_range <-- first and past-the-last cell numbers for this rank
277  *   n_parts    <-- number of partitions
278  *   part       <-- cell partition number
279  *----------------------------------------------------------------------------*/
280 
281 static void
_cell_part_histogram(cs_gnum_t cell_range[2],int n_parts,const int part[])282 _cell_part_histogram(cs_gnum_t   cell_range[2],
283                      int         n_parts,
284                      const   int part[])
285 {
286   int i, k;
287   size_t j;
288   double step;
289 
290   cs_lnum_t *n_part_cells;
291   cs_lnum_t n_min, n_max;
292   size_t count[10];
293   int n_steps = 10;
294   size_t n_cells = 0;
295 
296   if (cell_range[1] > cell_range[0])
297     n_cells = cell_range[1] - cell_range[0];
298 
299   if (n_parts <= 1) /* Should never happen */
300     return;
301 
302   bft_printf(_("  Number of cells per domain (histogramm):\n"));
303 
304   BFT_MALLOC(n_part_cells, n_parts, cs_lnum_t);
305 
306   for (i = 0; i < n_parts; i++)
307     n_part_cells[i] = 0;
308 
309   for (j = 0; j < n_cells; j++)
310     n_part_cells[part[j]] += 1;
311 
312 #if defined(HAVE_MPI)
313 
314   if (cs_glob_n_ranks > 1) {
315     cs_lnum_t *n_part_cells_sum;
316     BFT_MALLOC(n_part_cells_sum, n_parts, cs_lnum_t);
317     MPI_Allreduce(n_part_cells, n_part_cells_sum, n_parts,
318                   CS_MPI_LNUM, MPI_SUM, cs_glob_mpi_comm);
319     BFT_FREE(n_part_cells);
320     n_part_cells = n_part_cells_sum;
321     n_part_cells_sum = NULL;
322   }
323 
324 #endif /* defined(HAVE_MPI) */
325 
326   /* Compute min and max */
327 
328   n_min = n_part_cells[0];
329   n_max = n_part_cells[0];
330 
331   for (i = 1; i < n_parts; i++) {
332     if (n_part_cells[i] > n_max)
333       n_max = n_part_cells[i];
334     else if (n_part_cells[i] < n_min)
335       n_min = n_part_cells[i];
336   }
337 
338   /* Define axis subdivisions */
339 
340   for (i = 0; i < n_steps; i++)
341     count[i] = 0;
342 
343   if (n_max - n_min > 0) {
344 
345     if (n_max-n_min < n_steps)
346       n_steps = n_max-n_min > 0 ? n_max-n_min : 1;
347 
348     step = (double)(n_max - n_min) / n_steps;
349 
350     /* Loop on partitions */
351 
352     for (i = 0; i < n_parts; i++) {
353 
354       /* Associated subdivision */
355 
356       for (j = 0, k = 1; k < n_steps; j++, k++) {
357         if (n_part_cells[i] < n_min + k*step)
358           break;
359       }
360       count[j] += 1;
361 
362     }
363 
364     for (i = 0, j = 1; i < n_steps - 1; i++, j++)
365       bft_printf("    [ %10d ; %10d [ = %10d\n",
366                  (int)(n_min + i*step),
367                  (int)(n_min + j*step),
368                  (int)(count[i]));
369 
370     bft_printf("    [ %10d ; %10d ] = %10d\n",
371                (int)(n_min + (n_steps - 1)*step),
372                (int)n_max,
373                (int)(count[n_steps - 1]));
374 
375   }
376 
377   else { /* if (n_max == n_min) */
378     bft_printf("    [ %10d ; %10d ] = %10d\n",
379                (int)(n_min), (int)n_max, (int)n_parts);
380   }
381 
382   BFT_FREE(n_part_cells);
383 }
384 
385 #if defined(HAVE_MPI)
386 
387 /*----------------------------------------------------------------------------
388  * Compute cell centers using minimal local data in parallel mode
389  *
390  * parameters:
391  *   n_cells      <-- number of cells
392  *   n_faces      <-- number of faces
393  *   face_cells   <-- face -> cells connectivity
394  *   face_vtx_idx <-- face -> vertices connectivity index
395  *   face_vtx     <-- face -> vertices connectivity
396  *   vtx_coord    <-- vertex coordinates
397  *   cell_center  --> cell centers
398  *----------------------------------------------------------------------------*/
399 
400 static void
_cell_center_g(cs_lnum_t n_cells,cs_lnum_t n_faces,const cs_lnum_t face_cells[],const cs_lnum_t face_vtx_idx[],const cs_lnum_t face_vtx[],const cs_real_t vtx_coord[],cs_coord_t cell_center[])401 _cell_center_g(cs_lnum_t         n_cells,
402                cs_lnum_t         n_faces,
403                const cs_lnum_t   face_cells[],
404                const cs_lnum_t   face_vtx_idx[],
405                const cs_lnum_t   face_vtx[],
406                const cs_real_t   vtx_coord[],
407                cs_coord_t        cell_center[])
408 {
409   cs_lnum_t i, j;
410   cs_lnum_t vtx_id, face_id, start_id, end_id;
411   cs_lnum_t n_face_vertices;
412   cs_coord_t ref_normal[3], vtx_cog[3], face_center[3];
413 
414   cs_lnum_t n_max_face_vertices = 0;
415 
416   _vtx_coords_t *face_vtx_coord = NULL;
417   cs_coord_t *weight = NULL;
418 
419   const double surf_epsilon = 1e-24;
420 
421   assert(face_vtx_idx[0] == 0);
422 
423   BFT_MALLOC(weight, n_cells, cs_coord_t);
424 
425   for (i = 0; i < n_cells; i++) {
426     weight[i] = 0.0;
427     for (j = 0; j < 3; j++)
428       cell_center[i*3 + j] = 0.0;
429   }
430 
431   /* Counting and allocation */
432 
433   n_max_face_vertices = 0;
434 
435   for (face_id = 0; face_id < n_faces; face_id++) {
436     n_face_vertices = face_vtx_idx[face_id + 1] - face_vtx_idx[face_id];
437     if (n_max_face_vertices <= n_face_vertices)
438       n_max_face_vertices = n_face_vertices;
439   }
440 
441   BFT_MALLOC(face_vtx_coord, n_max_face_vertices, _vtx_coords_t);
442 
443   /* Loop on each face */
444 
445   for (face_id = 0; face_id < n_faces; face_id++) {
446 
447     /* Initialization */
448 
449     cs_lnum_t tri_id;
450 
451     cs_lnum_t cell_id_0 = face_cells[face_id*2] -1;
452     cs_lnum_t cell_id_1 = face_cells[face_id*2 + 1] -1;
453     cs_coord_t unweighted_center[3] = {0.0, 0.0, 0.0};
454     cs_coord_t face_surface = 0.0;
455 
456     n_face_vertices = 0;
457 
458     start_id = face_vtx_idx[face_id];
459     end_id = face_vtx_idx[face_id + 1];
460 
461     /* Define the polygon (P) according to the vertices (Pi) of the face */
462 
463     for (vtx_id = start_id; vtx_id < end_id; vtx_id++) {
464 
465       cs_lnum_t shift = 3 * (face_vtx[vtx_id] - 1);
466       for (i = 0; i < 3; i++)
467         face_vtx_coord[n_face_vertices][i] = vtx_coord[shift + i];
468       n_face_vertices++;
469 
470     }
471 
472     /* Compute the barycenter of the face vertices */
473 
474     for (i = 0; i < 3; i++) {
475       vtx_cog[i] = 0.0;
476       for (vtx_id = 0; vtx_id < n_face_vertices; vtx_id++)
477         vtx_cog[i] += face_vtx_coord[vtx_id][i];
478       vtx_cog[i] /= n_face_vertices;
479     }
480 
481     /* Loop on the triangles of the face (defined by an edge of the face
482        and its barycenter) */
483 
484     for (i = 0; i < 3; i++) {
485       ref_normal[i] = 0.;
486       face_center[i] = 0.0;
487     }
488 
489     for (tri_id = 0 ; tri_id < n_face_vertices ; tri_id++) {
490 
491       cs_coord_t tri_surface;
492       cs_coord_t vect1[3], vect2[3], tri_normal[3], tri_center[3];
493 
494       cs_lnum_t id0 = tri_id;
495       cs_lnum_t id1 = (tri_id + 1)%n_face_vertices;
496 
497       /* Normal for each triangle */
498 
499       for (i = 0; i < 3; i++) {
500         vect1[i] = face_vtx_coord[id0][i] - vtx_cog[i];
501         vect2[i] = face_vtx_coord[id1][i] - vtx_cog[i];
502       }
503 
504       tri_normal[0] = vect1[1] * vect2[2] - vect2[1] * vect1[2];
505       tri_normal[1] = vect2[0] * vect1[2] - vect1[0] * vect2[2];
506       tri_normal[2] = vect1[0] * vect2[1] - vect2[0] * vect1[1];
507 
508       if (tri_id == 0) {
509         for (i = 0; i < 3; i++)
510           ref_normal[i] = tri_normal[i];
511       }
512 
513       /* Center of gravity for a triangle */
514 
515       for (i = 0; i < 3; i++) {
516         tri_center[i] = (  vtx_cog[i]
517                          + face_vtx_coord[id0][i]
518                          + face_vtx_coord[id1][i]) / 3.0;
519       }
520 
521       tri_surface = sqrt(  tri_normal[0]*tri_normal[0]
522                          + tri_normal[1]*tri_normal[1]
523                          + tri_normal[2]*tri_normal[2]) * 0.5;
524 
525       if ((  tri_normal[0]*ref_normal[0]
526            + tri_normal[1]*ref_normal[1]
527            + tri_normal[2]*ref_normal[2]) < 0.0)
528         tri_surface *= -1.0;
529 
530       /* Now compute contribution to face center and surface */
531 
532       face_surface += tri_surface;
533 
534       for (i = 0; i < 3; i++) {
535         face_center[i] += tri_surface * tri_center[i];
536         unweighted_center[i] = tri_center[i];
537       }
538 
539     } /* End of loop  on triangles of the face */
540 
541     if (face_surface > surf_epsilon) {
542       for (i = 0; i < 3; i++)
543         face_center[i] /= face_surface;
544     }
545     else {
546       face_surface = surf_epsilon;
547       for (i = 0; i < 3; i++)
548         face_center[i] = unweighted_center[i] * face_surface / n_face_vertices;
549     }
550 
551     /* Now contribute to cell centers */
552 
553     assert(cell_id_0 > -2 && cell_id_1 > -2);
554 
555     if (cell_id_0 > -1) {
556       for (i = 0; i < 3; i++)
557         cell_center[cell_id_0*3 + i] += face_center[i]*face_surface;
558       weight[cell_id_0] += face_surface;
559     }
560 
561     if (cell_id_1 > -1) {
562       for (i = 0; i < 3; i++)
563         cell_center[cell_id_1*3 + i] += face_center[i]*face_surface;
564       weight[cell_id_1] += face_surface;
565     }
566 
567   } /* End of loop on faces */
568 
569   BFT_FREE(face_vtx_coord);
570 
571   for (i = 0; i < n_cells; i++) {
572     for (j = 0; j < 3; j++)
573       cell_center[i*3 + j] /= weight[i];
574   }
575 
576   BFT_FREE(weight);
577 }
578 
579 /*----------------------------------------------------------------------------
580  * Compute cell centers using block data in parallel mode.
581  *
582  * parameters:
583  *   mb          <-- pointer to mesh builder helper structure
584  *   cell_center --> cell centers array
585  *   comm        <-- associated MPI communicator
586  *----------------------------------------------------------------------------*/
587 
588 static void
_precompute_cell_center_g(const cs_mesh_builder_t * mb,cs_coord_t cell_center[],MPI_Comm comm)589 _precompute_cell_center_g(const cs_mesh_builder_t  *mb,
590                           cs_coord_t                cell_center[],
591                           MPI_Comm                  comm)
592 {
593   cs_lnum_t i;
594   int n_ranks = 0;
595 
596   cs_datatype_t gnum_type = (sizeof(cs_gnum_t) == 8) ? CS_UINT64 : CS_UINT32;
597   cs_datatype_t real_type = (sizeof(cs_real_t) == 8) ? CS_DOUBLE : CS_FLOAT;
598 
599   cs_lnum_t _n_cells = 0;
600   cs_lnum_t _n_faces = 0;
601 
602   cs_gnum_t *_cell_num = NULL;
603   cs_gnum_t *_face_num = NULL;
604   cs_gnum_t *_face_gcells = NULL;
605   cs_gnum_t *_face_gvertices = NULL;
606 
607   cs_lnum_t *_face_cells = NULL;
608   cs_lnum_t *_face_vertices_idx = NULL;
609   cs_lnum_t *_face_vertices = NULL;
610 
611   cs_all_to_all_t *d = NULL;
612 
613   /* Initialization */
614 
615   MPI_Comm_size(comm, &n_ranks);
616 
617   assert((sizeof(cs_lnum_t) == 4) || (sizeof(cs_lnum_t) == 8));
618 
619   _n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
620 
621   BFT_MALLOC(_cell_num, _n_cells, cs_gnum_t);
622 
623   for (i = 0; i < _n_cells; i++)
624     _cell_num[i] = mb->cell_bi.gnum_range[0] + i;
625 
626   /* Distribute faces */
627   /*------------------*/
628 
629   d = cs_block_to_part_create_by_adj_s(comm,
630                                        mb->face_bi,
631                                        mb->cell_bi,
632                                        2,
633                                        mb->face_cells,
634                                        NULL,
635                                        NULL,
636                                        &_n_faces,
637                                        &_face_num);
638 
639   BFT_MALLOC(_face_gcells, _n_faces*2, cs_gnum_t);
640 
641   /* Face -> cell connectivity */
642 
643   cs_all_to_all_copy_array(d,
644                            gnum_type,
645                            2,
646                            true,  /* reverse */
647                            mb->face_cells,
648                            _face_gcells);
649 
650   /* Now convert face -> cell connectivity to local cell numbers */
651 
652   BFT_MALLOC(_face_cells, _n_faces*2, cs_lnum_t);
653 
654   cs_block_to_part_global_to_local(_n_faces*2,
655                                    1,
656                                    _n_cells,
657                                    true,
658                                    _cell_num,
659                                    _face_gcells,
660                                    _face_cells);
661 
662   BFT_FREE(_cell_num);
663   BFT_FREE(_face_gcells);
664 
665   /* Face connectivity */
666 
667   BFT_MALLOC(_face_vertices_idx, _n_faces + 1, cs_lnum_t);
668 
669   cs_all_to_all_copy_index(d,
670                            true,  /* reverse */
671                            mb->face_vertices_idx,
672                            _face_vertices_idx);
673 
674   BFT_MALLOC(_face_gvertices, _face_vertices_idx[_n_faces], cs_gnum_t);
675 
676   cs_all_to_all_copy_indexed(d,
677                              gnum_type,
678                              true,  /* reverse */
679                              mb->face_vertices_idx,
680                              mb->face_vertices,
681                              _face_vertices_idx,
682                              _face_gvertices);
683 
684   cs_all_to_all_destroy(&d);
685 
686   /* Vertices */
687 
688   size_t     _n_vertices = 0;
689   cs_gnum_t *_vtx_num = NULL;
690 
691   cs_order_single_gnum(_face_vertices_idx[_n_faces],
692                        1, /* base */
693                        _face_gvertices,
694                        &_n_vertices,
695                        &_vtx_num);
696 
697   cs_all_to_all_t *dv
698     = cs_all_to_all_create_from_block(_n_vertices,
699                                       CS_ALL_TO_ALL_USE_DEST_ID,
700                                       _vtx_num,
701                                       mb->vertex_bi,
702                                       comm);
703 
704   cs_real_t *_vtx_coord
705     = cs_all_to_all_copy_array(dv,
706                                real_type,
707                                3,
708                                true, /* reverse */
709                                mb->vertex_coords,
710                                NULL);
711 
712   cs_all_to_all_destroy(&dv);
713 
714   /* Now convert face -> vertex connectivity to local vertex numbers */
715 
716   BFT_MALLOC(_face_vertices, _face_vertices_idx[_n_faces], cs_lnum_t);
717 
718   cs_block_to_part_global_to_local(_face_vertices_idx[_n_faces],
719                                    1,
720                                    _n_vertices,
721                                    true,
722                                    _vtx_num,
723                                    _face_gvertices,
724                                    _face_vertices);
725 
726   BFT_FREE(_face_gvertices);
727 
728   _cell_center_g(_n_cells,
729                  _n_faces,
730                  _face_cells,
731                  _face_vertices_idx,
732                  _face_vertices,
733                  _vtx_coord,
734                  cell_center);
735 
736   BFT_FREE(_vtx_coord);
737   BFT_FREE(_vtx_num);
738 
739   BFT_FREE(_face_cells);
740 
741   BFT_FREE(_face_vertices_idx);
742   BFT_FREE(_face_vertices);
743 
744   BFT_FREE(_face_num);
745 }
746 
747 #endif /* defined(HAVE_MPI) */
748 
749 /*----------------------------------------------------------------------------
750  * Compute cell centers using block data in serial mode.
751  *
752  * parameters:
753  *   mb          <-- pointer to mesh builder helper structure
754  *   cell_center --> cell centers array
755  *----------------------------------------------------------------------------*/
756 
757 static void
_precompute_cell_center_l(const cs_mesh_builder_t * mb,cs_coord_t cell_center[])758 _precompute_cell_center_l(const cs_mesh_builder_t  *mb,
759                           cs_coord_t                cell_center[])
760 {
761   cs_lnum_t i, j;
762   cs_lnum_t vtx_id, face_id, start_id, end_id;
763   cs_lnum_t n_face_vertices;
764   cs_coord_t ref_normal[3], vtx_cog[3], face_center[3];
765 
766   cs_lnum_t n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
767   cs_lnum_t n_faces = mb->face_bi.gnum_range[1] - mb->face_bi.gnum_range[0];
768 
769   const cs_gnum_t  *face_cells = mb->face_cells;
770   const cs_lnum_t  *face_vtx_idx = mb->face_vertices_idx;
771   const cs_gnum_t  *face_vtx = mb->face_vertices;
772   const cs_real_t  *vtx_coord = mb->vertex_coords;
773 
774   cs_lnum_t n_max_face_vertices = 0;
775 
776   _vtx_coords_t *face_vtx_coord = NULL;
777   cs_coord_t *weight = NULL;
778 
779   const double surf_epsilon = 1e-24;
780 
781   assert(face_vtx_idx[0] == 0);
782 
783   BFT_MALLOC(weight, n_cells, cs_coord_t);
784 
785   for (i = 0; i < n_cells; i++) {
786     weight[i] = 0.0;
787     for (j = 0; j < 3; j++)
788       cell_center[i*3 + j] = 0.0;
789   }
790 
791   /* Counting and allocation */
792 
793   n_max_face_vertices = 0;
794 
795   for (face_id = 0; face_id < n_faces; face_id++) {
796     n_face_vertices = face_vtx_idx[face_id + 1] - face_vtx_idx[face_id];
797     if (n_max_face_vertices <= n_face_vertices)
798       n_max_face_vertices = n_face_vertices;
799   }
800 
801   BFT_MALLOC(face_vtx_coord, n_max_face_vertices, _vtx_coords_t);
802 
803   /* Loop on each face */
804 
805   for (face_id = 0; face_id < n_faces; face_id++) {
806 
807     /* Initialization */
808 
809     cs_lnum_t tri_id;
810 
811     cs_lnum_t cell_id_0 = face_cells[face_id*2] -1;
812     cs_lnum_t cell_id_1 = face_cells[face_id*2 + 1] -1;
813     cs_coord_t unweighted_center[3] = {0.0, 0.0, 0.0};
814     cs_coord_t face_surface = 0.0;
815 
816     n_face_vertices = 0;
817 
818     start_id = face_vtx_idx[face_id];
819     end_id = face_vtx_idx[face_id + 1];
820 
821     /* Define the polygon (P) according to the vertices (Pi) of the face */
822 
823     for (vtx_id = start_id; vtx_id < end_id; vtx_id++) {
824 
825       cs_lnum_t shift = 3 * (face_vtx[vtx_id] - 1);
826       for (i = 0; i < 3; i++)
827         face_vtx_coord[n_face_vertices][i] = vtx_coord[shift + i];
828       n_face_vertices++;
829 
830     }
831 
832     /* Compute the barycenter of the face vertices */
833 
834     for (i = 0; i < 3; i++) {
835       vtx_cog[i] = 0.0;
836       for (vtx_id = 0; vtx_id < n_face_vertices; vtx_id++)
837         vtx_cog[i] += face_vtx_coord[vtx_id][i];
838       vtx_cog[i] /= n_face_vertices;
839     }
840 
841     /* Loop on the triangles of the face (defined by an edge of the face
842        and its barycenter) */
843 
844     for (i = 0; i < 3; i++) {
845       ref_normal[i] = 0.;
846       face_center[i] = 0.0;
847     }
848 
849     for (tri_id = 0 ; tri_id < n_face_vertices ; tri_id++) {
850 
851       cs_coord_t tri_surface;
852       cs_coord_t vect1[3], vect2[3], tri_normal[3], tri_center[3];
853 
854       cs_lnum_t id0 = tri_id;
855       cs_lnum_t id1 = (tri_id + 1)%n_face_vertices;
856 
857       /* Normal for each triangle */
858 
859       for (i = 0; i < 3; i++) {
860         vect1[i] = face_vtx_coord[id0][i] - vtx_cog[i];
861         vect2[i] = face_vtx_coord[id1][i] - vtx_cog[i];
862       }
863 
864       tri_normal[0] = vect1[1] * vect2[2] - vect2[1] * vect1[2];
865       tri_normal[1] = vect2[0] * vect1[2] - vect1[0] * vect2[2];
866       tri_normal[2] = vect1[0] * vect2[1] - vect2[0] * vect1[1];
867 
868       if (tri_id == 0) {
869         for (i = 0; i < 3; i++)
870           ref_normal[i] = tri_normal[i];
871       }
872 
873       /* Center of gravity for a triangle */
874 
875       for (i = 0; i < 3; i++) {
876         tri_center[i] = (  vtx_cog[i]
877                          + face_vtx_coord[id0][i]
878                          + face_vtx_coord[id1][i]) / 3.0;
879       }
880 
881       tri_surface = sqrt(  tri_normal[0]*tri_normal[0]
882                          + tri_normal[1]*tri_normal[1]
883                          + tri_normal[2]*tri_normal[2]) * 0.5;
884 
885       if ((  tri_normal[0]*ref_normal[0]
886            + tri_normal[1]*ref_normal[1]
887            + tri_normal[2]*ref_normal[2]) < 0.0)
888         tri_surface *= -1.0;
889 
890       /* Now compute contribution to face center and surface */
891 
892       face_surface += tri_surface;
893 
894       for (i = 0; i < 3; i++) {
895         face_center[i] += tri_surface * tri_center[i];
896         unweighted_center[i] = tri_center[i];
897       }
898 
899     } /* End of loop  on triangles of the face */
900 
901     if (face_surface > surf_epsilon) {
902       for (i = 0; i < 3; i++)
903         face_center[i] /= face_surface;
904     }
905     else {
906       face_surface = surf_epsilon;
907       for (i = 0; i < 3; i++)
908         face_center[i] = unweighted_center[i] * face_surface / n_face_vertices;
909     }
910 
911     /* Now contribute to cell centers */
912 
913     assert(cell_id_0 > -2 && cell_id_1 > -2);
914 
915     if (cell_id_0 > -1) {
916       for (i = 0; i < 3; i++)
917         cell_center[cell_id_0*3 + i] += face_center[i]*face_surface;
918       weight[cell_id_0] += face_surface;
919     }
920 
921     if (cell_id_1 > -1) {
922       for (i = 0; i < 3; i++)
923         cell_center[cell_id_1*3 + i] += face_center[i]*face_surface;
924       weight[cell_id_1] += face_surface;
925     }
926 
927   } /* End of loop on faces */
928 
929   BFT_FREE(face_vtx_coord);
930 
931   for (i = 0; i < n_cells; i++) {
932     for (j = 0; j < 3; j++)
933       cell_center[i*3 + j] /= weight[i];
934   }
935 
936   BFT_FREE(weight);
937 }
938 
939 /*----------------------------------------------------------------------------
940  * Define cell ranks using a space-filling curve.
941  *
942  * parameters:
943  *   n_g_cells   <-- global number of cells
944  *   n_ranks     <-- number of ranks in partition
945  *   mb          <-- pointer to mesh builder helper structure
946  *   sfc_type    <-- type of space-filling curve
947  *   cell_rank   --> cell rank (1 to n numbering)
948  *   comm        <-- associated MPI communicator
949  *----------------------------------------------------------------------------*/
950 
951 #if defined(HAVE_MPI)
952 
953 static void
_cell_rank_by_sfc(cs_gnum_t n_g_cells,int n_ranks,const cs_mesh_builder_t * mb,fvm_io_num_sfc_t sfc_type,int cell_rank[],MPI_Comm comm)954 _cell_rank_by_sfc(cs_gnum_t                 n_g_cells,
955                   int                       n_ranks,
956                   const cs_mesh_builder_t  *mb,
957                   fvm_io_num_sfc_t          sfc_type,
958                   int                       cell_rank[],
959                   MPI_Comm                  comm)
960 
961 #else
962 
963 static void
964 _cell_rank_by_sfc(cs_gnum_t                 n_g_cells,
965                   int                       n_ranks,
966                   const cs_mesh_builder_t  *mb,
967                   fvm_io_num_sfc_t          sfc_type,
968                   int                       cell_rank[])
969 
970 #endif
971 {
972   cs_lnum_t i;
973   cs_timer_t  start_time, end_time;
974   cs_timer_counter_t dt;
975 
976   cs_lnum_t n_cells = 0, block_size = 0;
977 
978   cs_coord_t *cell_center = NULL;
979   fvm_io_num_t *cell_io_num = NULL;
980   const cs_gnum_t *cell_num = NULL;
981 
982   bft_printf(_("\n Partitioning by space-filling curve: %s.\n"),
983              _(fvm_io_num_sfc_type_name[sfc_type]));
984 
985   start_time = cs_timer_time();
986 
987   n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
988   block_size = mb->cell_bi.block_size;
989 
990   BFT_MALLOC(cell_center, n_cells*3, cs_coord_t);
991 
992 #if defined(HAVE_MPI)
993   if (n_ranks > 1)
994     _precompute_cell_center_g(mb, cell_center, comm);
995 #endif
996   if (n_ranks == 1)
997     _precompute_cell_center_l(mb, cell_center);
998 
999   end_time = cs_timer_time();
1000   dt = cs_timer_diff(&start_time, &end_time);
1001   start_time = end_time;
1002 
1003   cs_log_printf(CS_LOG_PERFORMANCE,
1004                 _("  precompute cell centers:    %.3g s\n"),
1005                 (double)(dt.nsec)/1.e9);
1006 
1007   cell_io_num = fvm_io_num_create_from_sfc(cell_center,
1008                                            3,
1009                                            n_cells,
1010                                            sfc_type);
1011 
1012   BFT_FREE(cell_center);
1013 
1014   cell_num = fvm_io_num_get_global_num(cell_io_num);
1015 
1016   block_size = n_g_cells / n_ranks;
1017   if (n_g_cells % n_ranks)
1018     block_size += 1;
1019 
1020   /* Determine rank based on global numbering with SFC ordering; */
1021 
1022   if (_part_uniform_sfc_block_size == false) {
1023 
1024     cs_gnum_t cells_per_rank = n_g_cells / n_ranks;
1025     cs_lnum_t rmdr = n_g_cells - cells_per_rank * (cs_gnum_t)n_ranks;
1026 
1027     if (rmdr == 0) {
1028       for (i = 0; i < n_cells; i++)
1029         cell_rank[i] = (cell_num[i] - 1) / cells_per_rank;
1030     }
1031     else {
1032       cs_gnum_t n_ranks_rmdr = n_ranks - rmdr;
1033       cs_gnum_t n_ranks_cells_per_rank = n_ranks_rmdr * cells_per_rank;
1034       for (i = 0; i < n_cells; i++) {
1035         if ((cell_num[i] - 1)  <  n_ranks_cells_per_rank)
1036           cell_rank[i] = (cell_num[i] - 1) / cells_per_rank;
1037         else
1038           cell_rank[i] = (cell_num[i] + n_ranks_rmdr - 1) / (cells_per_rank + 1);
1039       }
1040     }
1041 
1042   }
1043 
1044   else {
1045 
1046     /* Plan for case where we would need a fixed block size,
1047        for example, using an external linear solver assuming this.
1048        This may not work at high process counts, where the last
1049        ranks will have no data (a solution to this would be
1050        to build a slightly smaller MPI communicator). */
1051 
1052     for (i = 0; i < n_cells; i++) {
1053       cell_rank[i] = ((cell_num[i] - 1) / block_size);
1054       assert(cell_rank[i] > -1 && cell_rank[i] < n_ranks);
1055     }
1056 
1057   }
1058 
1059   cell_io_num = fvm_io_num_destroy(cell_io_num);
1060 
1061   end_time = cs_timer_time();
1062   dt = cs_timer_diff(&start_time, &end_time);
1063 
1064   if (sfc_type < FVM_IO_NUM_SFC_HILBERT_BOX)
1065     cs_log_printf(CS_LOG_PERFORMANCE,
1066                   _("  Morton (Z) curve:           %.3g s\n"),
1067                   (double)(dt.nsec)/1.e9);
1068   else
1069     cs_log_printf(CS_LOG_PERFORMANCE,
1070                   _("  Peano-Hilbert curve:        %.3g s\n"),
1071                   (double)(dt.nsec)/1.e9);
1072 }
1073 
1074 #if defined(HAVE_MPI)
1075 
1076 /*----------------------------------------------------------------------------
1077  * Update global face -> cells adjacency for periodicity in parallel mode.
1078  *
1079  * If face Fi is periodic with face Fj, and face Fi is adjacent to cell Ci,
1080  * while face Fj is adjacent to face Cj, we add Cj to Fi's adjacent cells,
1081  * and Ci to Fj's adjacent cells, just as if Fi and Fj were regular interior
1082  * faces (this ignores the geometric transformation, but is sufficent to
1083  * build the cells -> cells graph for domain partitioning).
1084  *
1085  * This function should be called when faces are distributed by blocks,
1086  * that is prior to redistribution based on face -> cells adjacency.
1087  *
1088  * arguments:
1089  *   bi                 <-- block size and range info for faces
1090  *   g_face_cells       <-> global face->cells adjacency to update
1091  *   n_periodic_couples <-- number of periodic couples
1092  *   periodic_couples   <-- array indicating periodic couples (interlaced,
1093  *                          using global numberings)
1094  *   comm               <-- associated MPI communicator
1095  *----------------------------------------------------------------------------*/
1096 
1097 static void
_add_perio_to_face_cells_g(cs_block_dist_info_t bi,cs_gnum_t g_face_cells[],cs_lnum_t n_periodic_couples,const cs_gnum_t periodic_couples[],MPI_Comm comm)1098 _add_perio_to_face_cells_g(cs_block_dist_info_t  bi,
1099                            cs_gnum_t             g_face_cells[],
1100                            cs_lnum_t             n_periodic_couples,
1101                            const cs_gnum_t       periodic_couples[],
1102                            MPI_Comm              comm)
1103 {
1104   /* Initialization */
1105 
1106   int flags = 0;
1107 
1108   const cs_lnum_t n_send = n_periodic_couples*2;
1109 
1110   cs_all_to_all_t *d = cs_all_to_all_create_from_block(n_send,
1111                                                        flags,
1112                                                        periodic_couples,
1113                                                        bi,
1114                                                        comm);
1115 
1116   /* Distribute to blocks */
1117 
1118   cs_gnum_t *b_data = cs_all_to_all_copy_array(d,
1119                                                CS_GNUM_TYPE,
1120                                                1,
1121                                                false, /* reverse */
1122                                                periodic_couples,
1123                                                NULL);
1124 
1125   cs_lnum_t  n_b = cs_all_to_all_n_elts_dest(d);
1126 
1127   /* Receive buffer contains global cell face whose cell adjacency
1128      is defined on the local rank, and we replace the received value
1129      by the adjacent cell number, for return exchange */
1130 
1131   for (cs_lnum_t i = 0; i < n_b; i++) {
1132     cs_gnum_t g_face_id = b_data[i] - bi. gnum_range[0];
1133     cs_gnum_t c_num_0 = g_face_cells[g_face_id*2];
1134     const cs_gnum_t c_num_1 = g_face_cells[g_face_id*2 + 1];
1135     assert(c_num_0 == 0 || c_num_1 == 0);
1136     /* assign c_num_0 or c_num_1 depending on which is nonzero
1137        (or 0 if both are 0, which should not happen) */
1138     b_data[i] = c_num_0 + c_num_1;
1139   }
1140 
1141   cs_gnum_t *send_adj;
1142   BFT_MALLOC(send_adj, n_send*2, cs_gnum_t);
1143 
1144   cs_gnum_t *r_data = cs_all_to_all_copy_array(d,
1145                                                CS_GNUM_TYPE,
1146                                                1,
1147                                                true, /* reverse */
1148                                                b_data,
1149                                                NULL);
1150 
1151   BFT_FREE(b_data);
1152 
1153   /* Now r_data contains the global cell number matching a given face;
1154      Send global face number and cell number adjacent with its
1155      periodic face to rank defining the adjacency for this face */
1156 
1157   for (cs_lnum_t i = 0; i < n_send; i++) {
1158 
1159     cs_lnum_t k = (i + 1) % 2;  /* 1 for first value, 0 for
1160                                    second (permutation) */
1161 
1162     send_adj[i*2]     = periodic_couples[i];
1163     send_adj[i*2 + 1] = r_data[i + k];
1164 
1165   }
1166 
1167   BFT_FREE(r_data);
1168 
1169   b_data = cs_all_to_all_copy_array(d,
1170                                     CS_GNUM_TYPE,
1171                                     2,
1172                                     false, /* reverse */
1173                                     send_adj,
1174                                     NULL);
1175 
1176   BFT_FREE(send_adj);
1177 
1178   /* Update face -> cell connectivity */
1179 
1180   for (cs_lnum_t i = 0; i < n_b; i++) {
1181     const cs_gnum_t g_face_id = b_data[2*i] - bi. gnum_range[0];
1182     const cs_gnum_t g_cell_num = b_data[2*i + 1];
1183     if (g_face_cells[g_face_id*2] == 0)
1184       g_face_cells[g_face_id*2] = g_cell_num;
1185     else if (g_face_cells[g_face_id*2 + 1] == 0)
1186       g_face_cells[g_face_id*2 + 1] = g_cell_num;
1187     else if (g_face_cells[g_face_id*2 + 1] != g_cell_num)
1188       bft_error(__FILE__, __LINE__, 0,
1189                 _("Inconsistency adding periocicity info for partitioning.\n"
1190                   "Face %llu: adjacent to cells %llu and %llu,\n"
1191                   "trying to add %llu instead of %llu.\n\n"
1192                   "Try ignoring periodicity for partitioning."),
1193                 (unsigned long long)b_data[2*i],
1194                 (unsigned long long)g_face_cells[g_face_id*2],
1195                 (unsigned long long)g_face_cells[g_face_id*2+1],
1196                 (unsigned long long)g_cell_num,
1197                 (unsigned long long)g_face_cells[g_face_id*2+1]);
1198   }
1199 
1200   BFT_FREE(b_data);
1201 
1202   cs_all_to_all_destroy(&d);
1203 }
1204 
1205 #endif /* defined(HAVE_MPI) */
1206 
1207 /*----------------------------------------------------------------------------
1208  * Update global face -> cells adjacency for periodicity in local mode.
1209  *
1210  * If face Fi is periodic with face Fj, and face Fi is adjacent to cell Ci,
1211  * while face Fj is adjacent to face Cj, we add Cj to Fi's adjacent cells,
1212  * and Ci to Fj's adjacent cells, just as if Fi and Fj were regular interior
1213  * faces (this ignores the geometric transformation, but is suffient to
1214  * build the cells -> cells graph for domain partitioning).
1215  *
1216  * This function should be called when faces are distributed by blocks,
1217  * that is prior to redistribution based on face -> cells adjacency.
1218  *
1219  * arguments:
1220  *   bi                 <-- block size and range info for faces
1221  *   g_face_cells       <-> global face->cells adjacency to update
1222  *   n_periodic_couples <-- number of periodic couples
1223  *   periodic_couples   <-- array indicating periodic couples (interlaced,
1224  *                          using global numberings)
1225  *
1226  * returns:
1227  *   initialized partition to block distributor
1228  *----------------------------------------------------------------------------*/
1229 
1230 static void
_add_perio_to_face_cells_l(cs_gnum_t g_face_cells[],cs_lnum_t n_periodic_couples,const cs_gnum_t periodic_couples[])1231 _add_perio_to_face_cells_l(cs_gnum_t         g_face_cells[],
1232                            cs_lnum_t         n_periodic_couples,
1233                            const cs_gnum_t   periodic_couples[])
1234 {
1235   cs_lnum_t j;
1236 
1237   /* Update face -> cell connectivity */
1238 
1239   for (j = 0; j < n_periodic_couples; j++) {
1240 
1241     cs_gnum_t f_id_0 = periodic_couples[j*2] - 1;
1242     cs_gnum_t f_id_1 = periodic_couples[j*2 + 1] - 1;
1243     cs_gnum_t c_num_00 = g_face_cells[f_id_0*2];
1244     cs_gnum_t c_num_01 = g_face_cells[f_id_0*2 + 1];
1245     cs_gnum_t c_num_10 = g_face_cells[f_id_1*2];
1246     cs_gnum_t c_num_11 = g_face_cells[f_id_1*2 + 1];
1247 
1248     assert(c_num_00 == 0 || c_num_01 == 0);
1249     assert(c_num_10 == 0 || c_num_11 == 0);
1250 
1251     /* assign c_num_i0 or c_num_i1 depending on which is nonzero
1252        (or 0 if both are 0, which should not happen) */
1253 
1254     if (g_face_cells[f_id_0*2] == 0)
1255       g_face_cells[f_id_0*2] = c_num_10 + c_num_11;
1256     else
1257       g_face_cells[f_id_0*2 + 1] = c_num_00 + c_num_01;
1258 
1259     if (g_face_cells[f_id_1*2] == 0)
1260       g_face_cells[f_id_1*2] = c_num_00 + c_num_01;
1261     else
1262       g_face_cells[f_id_1*2 + 1] = c_num_10 + c_num_11;
1263   }
1264 }
1265 
1266 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS)
1267 
1268 /*----------------------------------------------------------------------------
1269  * Build cell -> cell connectivity
1270  *
1271  * parameters:
1272  *   n_cells        <-- number of cells in mesh
1273  *   n_faces        <-- number of cells in mesh
1274  *   start_cell     <-- number of first cell for the curent rank
1275  *   face_cells     <-- face->cells connectivity
1276  *   cell_idx       --> cell->cells index
1277  *   cell_neighbors --> cell->cells connectivity
1278  *----------------------------------------------------------------------------*/
1279 
1280 static void
_metis_cell_cells(size_t n_cells,size_t n_faces,cs_gnum_t start_cell,cs_gnum_t * face_cells,idx_t ** cell_idx,idx_t ** cell_neighbors)1281 _metis_cell_cells(size_t       n_cells,
1282                   size_t       n_faces,
1283                   cs_gnum_t    start_cell,
1284                   cs_gnum_t   *face_cells,
1285                   idx_t      **cell_idx,
1286                   idx_t      **cell_neighbors)
1287 {
1288   size_t i, id_0, id_1;
1289 
1290   cs_gnum_t  c_num[2];
1291 
1292   idx_t  *n_neighbors;
1293   idx_t  *_cell_idx;
1294   idx_t  *_cell_neighbors;
1295 
1296   /* Count and allocate arrays */
1297 
1298   BFT_MALLOC(n_neighbors, n_cells, idx_t);
1299 
1300   for (i = 0; i < n_cells; i++)
1301     n_neighbors[i] = 0;
1302 
1303   for (i = 0; i < n_faces; i++) {
1304 
1305     c_num[0] = face_cells[i*2];
1306     c_num[1] = face_cells[i*2 + 1];
1307 
1308     if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1309       continue;
1310 
1311     assert(   c_num[0] - start_cell < n_cells
1312            || c_num[1] - start_cell < n_cells);
1313 
1314     if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells)
1315       n_neighbors[c_num[0] - start_cell] += 1;
1316     if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells)
1317       n_neighbors[c_num[1] - start_cell] += 1;
1318   }
1319 
1320   BFT_MALLOC(_cell_idx, n_cells + 1, idx_t);
1321 
1322   _cell_idx[0] = 0;
1323 
1324   for (i = 0; i < n_cells; i++)
1325     _cell_idx[i + 1] = _cell_idx[i] + n_neighbors[i];
1326 
1327   BFT_MALLOC(_cell_neighbors, _cell_idx[n_cells], idx_t);
1328 
1329   for (i = 0; i < n_cells; i++)
1330     n_neighbors[i] = 0;
1331 
1332   for (i = 0; i < n_faces; i++) {
1333 
1334     c_num[0] = face_cells[i*2];
1335     c_num[1] = face_cells[i*2 + 1];
1336 
1337     if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1338       continue;
1339 
1340     if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells) {
1341       id_0 = c_num[0] - start_cell;
1342       _cell_neighbors[_cell_idx[id_0] + n_neighbors[id_0]] = c_num[1] - 1;
1343       n_neighbors[id_0] += 1;
1344     }
1345 
1346     if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells) {
1347       id_1 = c_num[1] - start_cell;
1348       _cell_neighbors[_cell_idx[id_1] + n_neighbors[id_1]] = c_num[0] - 1;
1349       n_neighbors[id_1] += 1;
1350     }
1351   }
1352 
1353   BFT_FREE(n_neighbors);
1354 
1355   *cell_idx = _cell_idx;
1356   *cell_neighbors = _cell_neighbors;
1357 }
1358 
1359 /*----------------------------------------------------------------------------
1360  * Compute partition using METIS
1361  *
1362  * parameters:
1363  *   n_cells       <-- number of cells in mesh
1364  *   n_parts       <-- number of partitions
1365  *   cell_cell_idx <-- cell->cells index
1366  *   cell_cell     <-- cell->cells connectivity
1367  *   cell_part     --> cell partition
1368  *----------------------------------------------------------------------------*/
1369 
1370 static void
_part_metis(size_t n_cells,int n_parts,idx_t * cell_idx,idx_t * cell_neighbors,int * cell_part)1371 _part_metis(size_t   n_cells,
1372             int      n_parts,
1373             idx_t   *cell_idx,
1374             idx_t   *cell_neighbors,
1375             int     *cell_part)
1376 {
1377   size_t i;
1378   double  start_time, end_time;
1379 
1380   idx_t   _n_constraints = 1;
1381 
1382   idx_t    edgecut    = 0; /* <-- Number of faces on partition */
1383 
1384   idx_t   _n_cells = n_cells;
1385   idx_t   _n_parts = n_parts;
1386   idx_t  *_cell_part = NULL;
1387 
1388   start_time = cs_timer_wtime();
1389 
1390   if (sizeof(idx_t) == sizeof(int))
1391     _cell_part = (idx_t *)cell_part;
1392 
1393   else
1394     BFT_MALLOC(_cell_part, n_cells, idx_t);
1395 
1396   if (n_parts < 8) {
1397 
1398     bft_printf(_("\n"
1399                  " Partitioning %llu cells to %d domains"
1400                  "   (METIS_PartGraphRecursive).\n"),
1401                (unsigned long long)n_cells, n_parts);
1402 
1403     METIS_PartGraphRecursive(&_n_cells,
1404                              &_n_constraints,
1405                              cell_idx,
1406                              cell_neighbors,
1407                              NULL,       /* vwgt:   cell weights */
1408                              NULL,       /* vsize:  size of the vertices */
1409                              NULL,       /* adjwgt: face weights */
1410                              &_n_parts,
1411                              NULL,       /* tpwgts */
1412                              NULL,       /* ubvec: load imbalance tolerance */
1413                              NULL,       /* options */
1414                              &edgecut,
1415                              _cell_part);
1416 
1417   }
1418 
1419   else {
1420 
1421     bft_printf(_("\n"
1422                  " Partitioning %llu cells to %d domains\n"
1423                  "  (METIS_PartGraphKway).\n"),
1424                (unsigned long long)n_cells, n_parts);
1425 
1426     METIS_PartGraphKway(&_n_cells,
1427                         &_n_constraints,
1428                         cell_idx,
1429                         cell_neighbors,
1430                         NULL,       /* vwgt:   cell weights */
1431                         NULL,       /* vsize:  size of the vertices */
1432                         NULL,       /* adjwgt: face weights */
1433                         &_n_parts,
1434                         NULL,       /* tpwgts */
1435                         NULL,       /* ubvec: load imbalance tolerance */
1436                         NULL,       /* options */
1437                         &edgecut,
1438                         _cell_part);
1439 
1440   }
1441 
1442   end_time = cs_timer_wtime();
1443 
1444   bft_printf(_("\n"
1445                "  Total number of faces on parallel boundaries: %llu\n"
1446                "  wall-clock time: %f s\n\n"),
1447              (unsigned long long)edgecut,
1448              (double)(end_time - start_time));
1449 
1450   cs_log_printf(CS_LOG_PERFORMANCE,
1451                 "  METIS_PartGraphKway:        %.3g s\n",
1452                 (double)(end_time - start_time));
1453 
1454   if (sizeof(idx_t) != sizeof(int)) {
1455     for (i = 0; i < n_cells; i++)
1456       cell_part[i] = _cell_part[i];
1457     BFT_FREE(_cell_part);
1458   }
1459 }
1460 
1461 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) */
1462 
1463 #if defined(HAVE_PARMETIS)
1464 
1465 /*----------------------------------------------------------------------------
1466  * Compute partition using ParMETIS
1467  *
1468  * parameters:
1469  *   n_g_cells     <-- global number of cells
1470  *   cell_range    <-- first and past-the-last cell numbers for this rank
1471  *   n_parts       <-- number of partitions
1472  *   cell_cell_idx <-- cell->cells index
1473  *   cell_cell     <-- cell->cells connectivity
1474  *   cell_part     --> cell partition
1475  *   comm          <-- associated MPI communicator
1476  *----------------------------------------------------------------------------*/
1477 
1478 static void
_part_parmetis(cs_gnum_t n_g_cells,cs_gnum_t cell_range[2],int n_parts,idx_t * cell_idx,idx_t * cell_neighbors,int * cell_part,MPI_Comm comm)1479 _part_parmetis(cs_gnum_t   n_g_cells,
1480                cs_gnum_t   cell_range[2],
1481                int         n_parts,
1482                idx_t      *cell_idx,
1483                idx_t      *cell_neighbors,
1484                int        *cell_part,
1485                MPI_Comm    comm)
1486 {
1487   size_t i;
1488   double  start_time, end_time;
1489 
1490   unsigned long long  edgecut    = 0; /* <-- Number of faces on partition */
1491 
1492   int       n_ranks;
1493   size_t    n_cells = cell_range[1] - cell_range[0];
1494   idx_t     vtxstart = cell_range[0] - 1;
1495   idx_t     vtxend = cell_range[1] - 1;
1496   idx_t    *vtxdist = NULL;
1497   idx_t    *_cell_part = NULL;
1498   MPI_Datatype mpi_idx_t = MPI_DATATYPE_NULL;
1499 
1500   start_time = cs_timer_wtime();
1501 
1502   MPI_Comm_size(comm, &n_ranks);
1503 
1504   /* Adjust mpi_idx_t if necessary */
1505 
1506   if (sizeof(idx_t) == sizeof(short))
1507     mpi_idx_t = MPI_SHORT;
1508   else if (sizeof(idx_t) == sizeof(int))
1509     mpi_idx_t = MPI_INT;
1510   else if (sizeof(idx_t) == sizeof(long))
1511     mpi_idx_t = MPI_LONG; /* standard ParMETIS 3.1.1 only short or int */
1512   else {
1513     assert(0); /* porting error, possible with future or modified ParMETIS */
1514   }
1515 
1516   if (sizeof(idx_t) == sizeof(int))
1517     _cell_part = cell_part;
1518 
1519   else
1520     BFT_MALLOC(_cell_part, n_cells, idx_t);
1521 
1522   bft_printf(_("\n"
1523                " Partitioning %llu cells to %d domains on %d ranks\n"
1524                "   (ParMETIS_V3_PartKway).\n"),
1525              (unsigned long long)n_g_cells, n_parts, n_ranks);
1526 
1527   /* Build vtxdist */
1528 
1529   BFT_MALLOC(vtxdist, n_ranks + 1, idx_t);
1530 
1531   MPI_Allgather(&vtxstart, 1, mpi_idx_t,
1532                 vtxdist, 1, mpi_idx_t, comm);
1533   MPI_Allreduce(&vtxend, vtxdist + n_ranks, 1, mpi_idx_t, MPI_MAX, comm);
1534 
1535   /* Call ParMETIS partitioning */
1536 
1537   {
1538     int      j;
1539     idx_t  _edgecut = 0;
1540     idx_t  _n_parts = n_parts;
1541     idx_t  ncon     = 1; /* number of weights for each vertex */
1542     idx_t  options[3] = {0, 1, 15}; /* By default if options[0] = 0 */
1543     idx_t  numflag  = 0; /* 0 to n-1 numbering (C type) */
1544     idx_t  wgtflag  = 0; /* No weighting for faces or cells */
1545 
1546     real_t wgt = 1.0/n_parts;
1547     real_t ubvec[]  = {1.5};
1548     real_t *tpwgts = NULL;
1549 
1550     BFT_MALLOC(tpwgts, n_parts, real_t);
1551 
1552     for (j = 0; j < n_parts; j++)
1553       tpwgts[j] = wgt;
1554 
1555     int retval = ParMETIS_V3_PartKway
1556                    (vtxdist,
1557                     cell_idx,
1558                     cell_neighbors,
1559                     NULL,       /* vwgt:   cell weights */
1560                     NULL,       /* adjwgt: face weights */
1561                     &wgtflag,
1562                     &numflag,
1563                     &ncon,
1564                     &_n_parts,
1565                     tpwgts,
1566                     ubvec,
1567                     options,
1568                     &_edgecut,
1569                     _cell_part,
1570                     &comm);
1571 
1572     BFT_FREE(tpwgts);
1573 
1574     edgecut = _edgecut;
1575 
1576     if (retval != METIS_OK)
1577       bft_error(__FILE__, __LINE__, 0,
1578                 _("Error in ParMETIS partitioning.\n"));
1579 
1580   }
1581 
1582   end_time = cs_timer_wtime();
1583 
1584   BFT_FREE(vtxdist);
1585 
1586   if (edgecut > 0)
1587     bft_printf(_("\n"
1588                  "  Total number of faces on parallel boundaries: %llu\n"
1589                  "  wall-clock time: %f s\n\n"),
1590                (unsigned long long)edgecut,
1591                (double)(end_time - start_time));
1592   else
1593     bft_printf(_("\n"
1594                  "  wall-clock time: %f s\n\n"),
1595                (double)(end_time - start_time));
1596 
1597   cs_log_printf(CS_LOG_PERFORMANCE,
1598                 "  ParMETIS_V3_PartKway:       %.3g s\n",
1599                 (double)(end_time - start_time));
1600 
1601   if (sizeof(idx_t) != sizeof(int)) {
1602     for (i = 0; i < n_cells; i++)
1603       cell_part[i] = _cell_part[i];
1604     BFT_FREE(_cell_part);
1605   }
1606 }
1607 
1608 #endif /* defined(HAVE_PARMETIS) */
1609 
1610 #if defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH)
1611 
1612 /*----------------------------------------------------------------------------
1613  * Sort an array "a" between its left bound "l" and its right bound "r"
1614  * thanks to a shell sort (Knuth algorithm).
1615  *
1616  * parameters:
1617  *   l <-- left bound
1618  *   r <-- right bound
1619  *   a <-> array to sort
1620  *---------------------------------------------------------------------------*/
1621 
1622 static void
_scotch_sort_shell(SCOTCH_Num l,SCOTCH_Num r,SCOTCH_Num a[])1623 _scotch_sort_shell(SCOTCH_Num  l,
1624                    SCOTCH_Num  r,
1625                    SCOTCH_Num  a[])
1626 {
1627   int i, j, h;
1628 
1629   /* Compute stride */
1630   for (h = 1; h <= (r-l)/9; h = 3*h+1) ;
1631 
1632   /* Sort array */
1633   for (; h > 0; h /= 3) {
1634 
1635     for (i = l+h; i < r; i++) {
1636 
1637       SCOTCH_Num v = a[i];
1638 
1639       j = i;
1640       while ((j >= l+h) && (v < a[j-h])) {
1641         a[j] = a[j-h];
1642         j -= h;
1643       }
1644       a[j] = v;
1645 
1646     } /* Loop on array elements */
1647 
1648   } /* End of loop on stride */
1649 
1650 }
1651 
1652 /*----------------------------------------------------------------------------
1653  * Print an error message and exit with an EXIT_FAILURE code.
1654  *
1655  * An implementation of this function is required by libScotch.
1656  *
1657  * parameters:
1658  *   errstr <-- format string, as printf() and family.
1659  *   ...    <-- variable arguments based on format string.
1660  *----------------------------------------------------------------------------*/
1661 
1662 void
SCOTCH_errorPrint(const char * errstr,...)1663 SCOTCH_errorPrint(const char  *errstr,
1664                   ...)
1665 {
1666   if (cs_glob_rank_id < 1) {
1667 
1668     va_list  errlist;
1669 
1670     fflush(stdout);
1671 
1672     fprintf(stderr, "\n");
1673 
1674     fprintf(stderr, _("\nFatal error encountered by libScotch.\n\n"));
1675 
1676     va_start(errlist, errstr);
1677     vfprintf(stderr, errstr, errlist);
1678     va_end(errlist);
1679     fprintf(stderr, "\n\n");
1680     fflush(stderr);
1681   }
1682 
1683   assert(0);
1684 
1685 #if defined(HAVE_MPI)
1686   {
1687     int mpi_flag;
1688     MPI_Initialized(&mpi_flag);
1689     if (mpi_flag != 0)
1690       MPI_Abort(cs_glob_mpi_comm, EXIT_FAILURE);
1691   }
1692 #endif /* HAVE_MPI */
1693 
1694   exit(EXIT_FAILURE);
1695 }
1696 
1697 /*----------------------------------------------------------------------------
1698  * Print a warning message.
1699  *
1700  * An implementation of this function is required by libScotch.
1701  *
1702  * parameters:
1703  *   errstr <-- format string, as printf() and family.
1704  *   ...    <-- variable arguments based on format string.
1705  *----------------------------------------------------------------------------*/
1706 
1707 void
SCOTCH_errorPrintW(const char * errstr,...)1708 SCOTCH_errorPrintW (const char *errstr,
1709                     ...)
1710 {
1711   if (cs_glob_rank_id < 1) {
1712 
1713     va_list  errlist;
1714 
1715     fflush(stdout);
1716 
1717     fprintf(stdout, "\n");
1718 
1719     fprintf(stdout, _("\nWarning (libScotch):\n\n"));
1720 
1721     va_start(errlist, errstr);
1722     vfprintf(stdout, errstr, errlist);
1723     va_end(errlist);
1724     fprintf(stdout, "\n\n");
1725     fflush(stdout);
1726   }
1727 }
1728 
1729 /*----------------------------------------------------------------------------
1730  * Build cell -> cell connectivity
1731  *
1732  * parameters:
1733  *   n_cells        <-- number of cells in mesh
1734  *   n_faces        <-- number of cells in mesh
1735  *   start_cell     <-- number of first cell for the curent rank
1736  *   face_cells     <-- face->cells connectivity
1737  *   cell_idx       --> cell->cells index
1738  *   cell_neighbors --> cell->cells connectivity
1739  *----------------------------------------------------------------------------*/
1740 
1741 static void
_scotch_cell_cells(size_t n_cells,size_t n_faces,cs_gnum_t start_cell,cs_gnum_t * face_cells,SCOTCH_Num ** cell_idx,SCOTCH_Num ** cell_neighbors)1742 _scotch_cell_cells(size_t        n_cells,
1743                    size_t        n_faces,
1744                    cs_gnum_t     start_cell,
1745                    cs_gnum_t    *face_cells,
1746                    SCOTCH_Num  **cell_idx,
1747                    SCOTCH_Num  **cell_neighbors)
1748 {
1749   size_t i;
1750   cs_gnum_t  id_0, id_1, c_num[2];
1751   SCOTCH_Num  start_id, end_id, c_id;
1752 
1753   SCOTCH_Num  *n_neighbors;
1754   SCOTCH_Num  *_cell_idx;
1755   SCOTCH_Num  *_cell_neighbors;
1756 
1757   /* Count and allocate arrays */
1758 
1759   BFT_MALLOC(n_neighbors, n_cells, SCOTCH_Num);
1760 
1761   for (i = 0; i < n_cells; i++)
1762     n_neighbors[i] = 0;
1763 
1764   for (i = 0; i < n_faces; i++) {
1765 
1766     c_num[0] = face_cells[i*2];
1767     c_num[1] = face_cells[i*2 + 1];
1768 
1769     if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1770       continue;
1771 
1772     assert(   c_num[0] - start_cell < n_cells
1773            || c_num[1] - start_cell < n_cells);
1774 
1775     if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells)
1776       n_neighbors[c_num[0] - start_cell] += 1;
1777     if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells)
1778       n_neighbors[c_num[1] - start_cell] += 1;
1779   }
1780 
1781   BFT_MALLOC(_cell_idx, n_cells + 1, SCOTCH_Num);
1782 
1783   _cell_idx[0] = 0;
1784 
1785   for (i = 0; i < n_cells; i++)
1786     _cell_idx[i + 1] = _cell_idx[i] + n_neighbors[i];
1787 
1788   BFT_MALLOC(_cell_neighbors, _cell_idx[n_cells], SCOTCH_Num);
1789 
1790   for (i = 0; i < n_cells; i++)
1791     n_neighbors[i] = 0;
1792 
1793   for (i = 0; i < n_faces; i++) {
1794 
1795     c_num[0] = face_cells[i*2];
1796     c_num[1] = face_cells[i*2 + 1];
1797 
1798     if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1799       continue;
1800 
1801     if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells) {
1802       id_0 = c_num[0] - start_cell;
1803       _cell_neighbors[_cell_idx[id_0] + n_neighbors[id_0]] = c_num[1] - 1;
1804       n_neighbors[id_0] += 1;
1805     }
1806 
1807     if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells) {
1808       id_1 = c_num[1] - start_cell;
1809       _cell_neighbors[_cell_idx[id_1] + n_neighbors[id_1]] = c_num[0] - 1;
1810       n_neighbors[id_1] += 1;
1811     }
1812   }
1813 
1814   BFT_FREE(n_neighbors);
1815 
1816   /* Clean graph */
1817 
1818   c_id = 0;
1819   start_id = _cell_idx[0]; /* also = 0 */
1820   end_id = 0;
1821 
1822   if (_cell_neighbors != NULL) {
1823 
1824     for (i = 0; i < n_cells; i++) {
1825 
1826       SCOTCH_Num j, n_prev;
1827 
1828       end_id = _cell_idx[i+1];
1829 
1830       _scotch_sort_shell(start_id, end_id, _cell_neighbors);
1831 
1832       n_prev = _cell_neighbors[start_id];
1833       _cell_neighbors[c_id] = n_prev;
1834       c_id += 1;
1835 
1836       for (j = start_id + 1; j < end_id; j++) {
1837         if (_cell_neighbors[j] != n_prev) {
1838           n_prev = _cell_neighbors[j];
1839           _cell_neighbors[c_id] = n_prev;
1840           c_id += 1;
1841         }
1842       }
1843 
1844       start_id = end_id;
1845       _cell_idx[i+1] = c_id;
1846 
1847     }
1848 
1849     if (c_id < end_id)
1850       BFT_REALLOC(_cell_neighbors, c_id, SCOTCH_Num);
1851 
1852   }
1853 
1854   /* Set return values */
1855 
1856   *cell_idx = _cell_idx;
1857   *cell_neighbors = _cell_neighbors;
1858 }
1859 
1860 /*----------------------------------------------------------------------------
1861  * Compute partition using SCOTCH
1862  *
1863  * parameters:
1864  *   n_cells       <-- number of cells in mesh
1865  *   n_parts       <-- number of partitions
1866  *   cell_cell_idx <-- cell->cells index
1867  *   cell_cell     <-- cell->cells connectivity
1868  *   cell_part     --> cell partition
1869  *----------------------------------------------------------------------------*/
1870 
1871 static void
_part_scotch(SCOTCH_Num n_cells,int n_parts,SCOTCH_Num * cell_idx,SCOTCH_Num * cell_neighbors,int * cell_part)1872 _part_scotch(SCOTCH_Num   n_cells,
1873              int          n_parts,
1874              SCOTCH_Num  *cell_idx,
1875              SCOTCH_Num  *cell_neighbors,
1876              int         *cell_part)
1877 {
1878   SCOTCH_Num  i;
1879   SCOTCH_Graph  grafdat;  /* Scotch graph object to interface with libScotch */
1880   SCOTCH_Strat  stradat;
1881 
1882   double  start_time, end_time;
1883 
1884   int     retval = 0;
1885 
1886   SCOTCH_Num    edgecut = 0; /* <-- Number of faces on partition */
1887   SCOTCH_Num  *_cell_part = NULL;
1888 
1889   /* Initialization */
1890 
1891   start_time = cs_timer_wtime();
1892 
1893   if (sizeof(SCOTCH_Num) == sizeof(int))
1894     _cell_part = (SCOTCH_Num *)cell_part;
1895   else
1896     BFT_MALLOC(_cell_part, n_cells, SCOTCH_Num);
1897 
1898   bft_printf(_("\n"
1899                " Partitioning %llu cells to %d domains\n"
1900                "   (SCOTCH_graphPart).\n"),
1901              (unsigned long long)n_cells, n_parts);
1902 
1903   /* Partition using libScotch */
1904 
1905   SCOTCH_graphInit(&grafdat);
1906 
1907   retval
1908     = SCOTCH_graphBuild(&grafdat,
1909                         0,                  /* baseval; 0 to n -1 numbering */
1910                         n_cells,            /* vertnbr */
1911                         cell_idx,           /* verttab */
1912                         NULL,               /* vendtab: verttab + 1 or NULL */
1913                         NULL,               /* velotab: vertex weights */
1914                         NULL,               /* vlbltab; vertex labels */
1915                         cell_idx[n_cells],  /* edgenbr */
1916                         cell_neighbors,     /* edgetab */
1917                         NULL);              /* edlotab */
1918 
1919   if (retval == 0) {
1920 
1921     SCOTCH_stratInit(&stradat);
1922 
1923     if (SCOTCH_graphCheck(&grafdat) == 0)
1924       retval = SCOTCH_graphPart(&grafdat, n_parts, &stradat, _cell_part);
1925 
1926     SCOTCH_stratExit(&stradat);
1927   }
1928 
1929   SCOTCH_graphExit(&grafdat);
1930 
1931   /* Shift cell_part values to 1 to n numbering and free possible temporary */
1932 
1933   if (sizeof(SCOTCH_Num) != sizeof(int)) {
1934     for (i = 0; i < n_cells; i++)
1935       cell_part[i] = _cell_part[i];
1936     BFT_FREE(_cell_part);
1937   }
1938 
1939   /* Compute edge cut */
1940 
1941   if (retval == 0) {
1942 
1943     SCOTCH_Num cell_id, edgenum, commcut;
1944 
1945     commcut = 0;
1946 
1947     for (cell_id = 0; cell_id < n_cells; cell_id++) {
1948       SCOTCH_Num  edgennd,  partval;
1949       partval = cell_part[cell_id];
1950       for (edgenum = cell_idx[cell_id], edgennd = cell_idx[cell_id + 1];
1951            edgenum < edgennd;
1952            edgenum++) {
1953         if (cell_part[cell_neighbors[edgenum]] != partval)
1954           commcut++;
1955       }
1956     }
1957 
1958     edgecut = commcut / 2;
1959   }
1960 
1961   /* Finalization */
1962 
1963   end_time = cs_timer_wtime();
1964 
1965   bft_printf(_("\n"
1966                "  Total number of faces on parallel boundaries: %llu\n"
1967                "  wall-clock time: %f s\n\n"),
1968              (unsigned long long)edgecut,
1969              (double)(end_time - start_time));
1970 
1971   cs_log_printf(CS_LOG_PERFORMANCE,
1972                 "  SCOTCH_graphPart:           %.3g s\n",
1973                 (double)(end_time - start_time));
1974 }
1975 
1976 #endif /* defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH) */
1977 
1978 #if defined(HAVE_PTSCOTCH)
1979 
1980 /*----------------------------------------------------------------------------
1981  * Compute partition using PT-SCOTCH
1982  *
1983  * parameters:
1984  *   n_g_cells     <-- global number of cells
1985  *   cell_range    <-- first and past-the-last cell numbers for this rank
1986  *   n_parts       <-- number of partitions
1987  *   cell_cell_idx <-- cell->cells index
1988  *   cell_cell     <-- cell->cells connectivity
1989  *   cell_part     --> cell partition
1990  *   comm          <-- associated MPI communicator
1991  *----------------------------------------------------------------------------*/
1992 
1993 static void
_part_ptscotch(cs_gnum_t n_g_cells,cs_gnum_t cell_range[2],int n_parts,SCOTCH_Num * cell_idx,SCOTCH_Num * cell_neighbors,int * cell_part,MPI_Comm comm)1994 _part_ptscotch(cs_gnum_t    n_g_cells,
1995                cs_gnum_t    cell_range[2],
1996                int          n_parts,
1997                SCOTCH_Num  *cell_idx,
1998                SCOTCH_Num  *cell_neighbors,
1999                int         *cell_part,
2000                MPI_Comm     comm)
2001 {
2002   int  n_ranks;
2003   SCOTCH_Num  i;
2004   SCOTCH_Dgraph  grafdat;  /* Scotch graph object to interface with libScotch */
2005   SCOTCH_Strat  stradat;
2006 
2007   double  start_time, end_time;
2008 
2009   int     retval = 0;
2010 
2011   SCOTCH_Num    n_cells = cell_range[1] - cell_range[0];
2012   SCOTCH_Num  *_cell_part = NULL;
2013 
2014   /* Initialization */
2015 
2016   start_time = cs_timer_wtime();
2017 
2018   MPI_Comm_size(comm, &n_ranks);
2019 
2020   if (sizeof(SCOTCH_Num) == sizeof(int))
2021     _cell_part = (SCOTCH_Num *)cell_part;
2022   else
2023     BFT_MALLOC(_cell_part, n_cells, SCOTCH_Num);
2024 
2025   bft_printf(_("\n"
2026                " Partitioning %llu cells to %d domains on %d ranks\n"
2027                "   (SCOTCH_dgraphPart).\n"),
2028              (unsigned long long)n_g_cells, n_parts, n_ranks);
2029 
2030   /* Partition using libScotch */
2031 
2032   retval = SCOTCH_dgraphInit(&grafdat, comm);
2033 
2034   if (retval == 0) {
2035     retval = SCOTCH_dgraphBuild
2036                (&grafdat,
2037                 0,                  /* baseval; 0 to n -1 numbering */
2038                 n_cells,            /* vertlocnbr */
2039                 n_cells,            /* vertlocmax (= vertlocnbr) */
2040                 cell_idx,           /* vertloctab */
2041                 NULL,               /* vendloctab: vertloctab + 1 or NULL */
2042                 NULL,               /* veloloctab: vertex weights */
2043                 NULL,               /* vlblloctab; vertex labels */
2044                 cell_idx[n_cells],  /* edgelocnbr */
2045                 cell_idx[n_cells],  /* edgelocsiz */
2046                 cell_neighbors,     /* edgeloctab */
2047                 NULL,               /* edgegstab */
2048                 NULL);              /* edloloctab */
2049   }
2050 
2051   if (retval == 0) {
2052 
2053     SCOTCH_stratInit(&stradat);
2054 
2055     if (SCOTCH_dgraphCheck(&grafdat) == 0)
2056       retval = SCOTCH_dgraphPart(&grafdat, n_parts, &stradat, _cell_part);
2057 
2058     SCOTCH_stratExit(&stradat);
2059   }
2060 
2061   SCOTCH_dgraphExit(&grafdat);
2062 
2063   /* Shift cell_part values to 1 to n numbering and free possible temporary */
2064 
2065   if (sizeof(SCOTCH_Num) != sizeof(int)) {
2066     for (i = 0; i < n_cells; i++)
2067       cell_part[i] = _cell_part[i];
2068     BFT_FREE(_cell_part);
2069   }
2070 
2071   /* Finalization */
2072 
2073   end_time = cs_timer_wtime();
2074 
2075   bft_printf(_("\n"
2076                "  wall-clock time: %f s\n\n"),
2077              (double)(end_time - start_time));
2078 
2079   cs_log_printf(CS_LOG_PERFORMANCE,
2080                 "  SCOTCH_dgraphPart:          %.3g s\n",
2081                 (double)(end_time - start_time));
2082 }
2083 
2084 #endif /* defined(HAVE_PTSCOTCH) */
2085 
2086 /*----------------------------------------------------------------------------
2087  * Prepare input from mesh builder for use by partitioner.
2088  *
2089  * parameters:
2090  *   mesh           <-- pointer to mesh structure
2091  *   mb             <-- pointer to mesh builder structure
2092  *   rank_step      <-- Step between active partitioning ranks
2093  *                      (1 in basic case, > 1 if we seek to partition on a
2094  *                      reduced number of ranks)
2095  *   ignore_perio   <-- ignore periodicity information if true
2096  *   cell_range     <-- first and past-the-last cell numbers for this rank
2097  *   n_faces        <-- number of local faces for current rank
2098  *   g_face_cells   <-> global face -> cells connectivity
2099  *----------------------------------------------------------------------------*/
2100 
2101 static void
_prepare_input(const cs_mesh_t * mesh,const cs_mesh_builder_t * mb,int rank_step,bool ignore_perio,cs_gnum_t cell_range[2],cs_lnum_t * n_faces,cs_gnum_t ** g_face_cells)2102 _prepare_input(const cs_mesh_t           *mesh,
2103                const cs_mesh_builder_t   *mb,
2104                int                        rank_step,
2105                bool                       ignore_perio,
2106                cs_gnum_t                  cell_range[2],
2107                cs_lnum_t                 *n_faces,
2108                cs_gnum_t                **g_face_cells)
2109 {
2110   int rank_id = cs_glob_rank_id;
2111   int n_ranks = cs_glob_n_ranks;
2112 
2113   cs_gnum_t *_g_face_cells = NULL;
2114 
2115   cs_block_dist_info_t cell_bi =  cs_block_dist_compute_sizes(rank_id,
2116                                                               n_ranks,
2117                                                               rank_step,
2118                                                               0,
2119                                                               mesh->n_g_cells);
2120 
2121   /* By default, the face -> cells connectivity is that of the mesh builder */
2122 
2123   *g_face_cells = mb->face_cells;
2124 
2125   /* In case of periodicity, update global face -> cells connectivity:
2126      if face Fi is periodic with face Fj, and face Fi is connected
2127      to cell Ci, while face Fj is connected to face Cj, we add Cj
2128      to the cells connected to Fi, and Ci to the cells connected to Fj,
2129      just as if Fi and Fj were regular interior faces (this ignores
2130      the geometric transformation, but is sufficient to build the
2131      cells -> cells graph for domain partitioning). */
2132 
2133   if (ignore_perio == false && mb->n_perio > 0) {
2134 
2135     int perio_id;
2136     cs_lnum_t n_per_face_couples = 0;
2137     cs_gnum_t n_g_per_face_couples = 0;
2138     cs_gnum_t _n_b_faces = 0;
2139     cs_gnum_t *_per_face_couples = NULL;
2140     const cs_gnum_t *per_face_couples = NULL;
2141 
2142     if (mb->face_bi.gnum_range[1] > mb->face_bi.gnum_range[0])
2143       _n_b_faces = mb->face_bi.gnum_range[1] - mb->face_bi.gnum_range[0];
2144 
2145     BFT_MALLOC(_g_face_cells, _n_b_faces*2, cs_gnum_t);
2146     *g_face_cells = _g_face_cells;
2147 
2148     memcpy(_g_face_cells, mb->face_cells, sizeof(cs_gnum_t)*_n_b_faces*2);
2149 
2150     if (mb->n_perio > 1) {
2151 
2152       /* Assemble faces from multiple periodicities into one */
2153 
2154       for (perio_id = 0; perio_id < mb->n_perio; perio_id++) {
2155         n_per_face_couples += mb->n_per_face_couples[perio_id];
2156         n_g_per_face_couples += mb->n_g_per_face_couples[perio_id];
2157       }
2158 
2159       BFT_MALLOC(_per_face_couples, n_per_face_couples*2, cs_gnum_t);
2160       per_face_couples = _per_face_couples;
2161 
2162       n_per_face_couples = 0;
2163 
2164       for (perio_id = 0; perio_id < mb->n_perio; perio_id++) {
2165         cs_gnum_t *per_face_couples_p = _per_face_couples + 2*n_per_face_couples;
2166         memcpy(per_face_couples_p,
2167                mb->per_face_couples[perio_id],
2168                sizeof(cs_gnum_t)*mb->n_per_face_couples[perio_id]*2);
2169         n_per_face_couples += mb->n_per_face_couples[perio_id];
2170       }
2171 
2172     }
2173     else { /* if mb->n_perio == 1 */
2174       n_per_face_couples = mb->n_per_face_couples[0];
2175       n_g_per_face_couples = mb->n_g_per_face_couples[0];
2176       per_face_couples = mb->per_face_couples[0];
2177     }
2178 
2179     if (n_g_per_face_couples > 0) {
2180 
2181 #if defined(HAVE_MPI)
2182       if (cs_glob_n_ranks > 1)
2183         _add_perio_to_face_cells_g(mb->face_bi,
2184                                    _g_face_cells,
2185                                    n_per_face_couples,
2186                                    per_face_couples,
2187                                    cs_glob_mpi_comm);
2188 #endif
2189 
2190       if (n_ranks == 1)
2191         _add_perio_to_face_cells_l(_g_face_cells,
2192                                    n_per_face_couples,
2193                                    per_face_couples);
2194 
2195     }
2196 
2197     if (_per_face_couples != NULL)
2198       BFT_FREE(_per_face_couples);
2199   }
2200 
2201   else if (mb->n_perio > 0)
2202     bft_printf(_("\n"
2203                  " Ignoring periodicity for graph-based partitioning.\n"));
2204 
2205   /* Distribute faces if necessary */
2206 
2207 #if defined(HAVE_MPI)
2208 
2209   if (cs_glob_n_ranks > 1) {
2210 
2211     cs_datatype_t gnum_type
2212       = (sizeof(cs_gnum_t) == 8) ? CS_UINT64 : CS_UINT32;
2213     cs_gnum_t *g_face_cells_tmp = NULL;
2214 
2215     cs_all_to_all_t
2216       *d = cs_block_to_part_create_by_adj_s(cs_glob_mpi_comm,
2217                                             mb->face_bi,
2218                                             cell_bi,
2219                                             2,
2220                                             *g_face_cells,
2221                                             NULL,
2222                                             NULL,
2223                                             n_faces,
2224                                             NULL);
2225 
2226     BFT_MALLOC(g_face_cells_tmp, (*n_faces)*2, cs_gnum_t);
2227 
2228     /* Face -> cell connectivity */
2229 
2230     cs_all_to_all_copy_array(d,
2231                              gnum_type,
2232                              2,
2233                              true,  /* reverse */
2234                              *g_face_cells,
2235                              g_face_cells_tmp);
2236 
2237     if (_g_face_cells != NULL) /* in case of periodicity */
2238       BFT_FREE(_g_face_cells);
2239 
2240     *g_face_cells = g_face_cells_tmp;
2241 
2242     cs_all_to_all_destroy(&d);
2243   }
2244 
2245 #endif /* defined(HAVE_MPI) */
2246 
2247   if (cs_glob_n_ranks == 1)
2248     *n_faces = mb->n_g_faces;
2249 
2250   /* Prepare auxiliary return values */
2251 
2252   cell_range[0] = cell_bi.gnum_range[0];
2253   cell_range[1] = cell_bi.gnum_range[1];
2254 }
2255 
2256 #if   defined(HAVE_METIS) || defined(HAVE_PARMETIS) \
2257    || defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH)
2258 
2259 /*----------------------------------------------------------------------------
2260  * Distribute partitioning info so as to match mesh builder block info.
2261  *
2262  * parameters:
2263  *   mb           <-- pointer to mesh builder structure
2264  *   rank_step    <-- Step between active partitioning ranks
2265  *                    (1 in basic case, > 1 if we seek to partition on a
2266  *                    reduced number of ranks)
2267  *   cell_range   <-- first and past-the-last cell numbers for this rank
2268  *   cell_rank    <-> pointer to pointer to cell rank
2269  *----------------------------------------------------------------------------*/
2270 
2271 static void
_distribute_output(const cs_mesh_builder_t * mb,int rank_step,const cs_gnum_t cell_range[2],int ** cell_rank)2272 _distribute_output(const cs_mesh_builder_t   *mb,
2273                    int                        rank_step,
2274                    const cs_gnum_t            cell_range[2],
2275                    int                      **cell_rank)
2276 {
2277   /* Distribute partitioning info if necessary */
2278 
2279 #if defined(HAVE_MPI)
2280 
2281   if (cs_glob_n_ranks > 1 && (mb->cell_bi.rank_step != rank_step)) {
2282 
2283     cs_gnum_t i;
2284     cs_gnum_t n_b_cells = 0, n_p_cells = 0;
2285     cs_datatype_t int_type = (sizeof(int) == 8) ? CS_INT64 : CS_INT32;
2286 
2287     cs_part_to_block_t *d = NULL;
2288     cs_gnum_t *global_cell_num = NULL;
2289 
2290     int *_cell_rank = NULL;
2291 
2292     if (mb->cell_bi.gnum_range[1] > mb->cell_bi.gnum_range[0])
2293       n_b_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
2294 
2295     if (cell_range[1] > cell_range[0])
2296       n_p_cells = cell_range[1] - cell_range[0];
2297 
2298     BFT_MALLOC(_cell_rank, n_b_cells, int);
2299     BFT_MALLOC(global_cell_num, n_p_cells, cs_gnum_t);
2300 
2301     for (i = 0; i < n_p_cells; i++)
2302       global_cell_num[i] = cell_range[0] + i;
2303 
2304     d = cs_part_to_block_create_by_gnum(cs_glob_mpi_comm,
2305                                         mb->cell_bi,
2306                                         n_p_cells,
2307                                         global_cell_num);
2308     cs_part_to_block_transfer_gnum(d, global_cell_num);
2309     global_cell_num = NULL;
2310 
2311     /* Cell rank id */
2312 
2313     cs_part_to_block_copy_array(d,
2314                                 int_type,
2315                                 1,
2316                                 *cell_rank,
2317                                 _cell_rank);
2318 
2319     cs_part_to_block_destroy(&d);
2320 
2321     BFT_FREE(*cell_rank);
2322     *cell_rank = _cell_rank;
2323   }
2324 
2325 #endif /* defined(HAVE_MPI) */
2326 }
2327 
2328 #endif /*    defined(HAVE_METIS) || defined(HAVE_PARMETIS) \
2329           || defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH) */
2330 
2331 /*----------------------------------------------------------------------------
2332  * Write output file.
2333  *
2334  * parameters:
2335  *   n_g_cells   <-- global number of cells
2336  *   cell_range  <-- first and past-the-last cell numbers for this rank
2337  *   n_ranks     <-- number of ranks corresonding to output file
2338  *   domain_rank <-- domain rank array (size: n_cells)
2339  *----------------------------------------------------------------------------*/
2340 
2341 static void
_write_output(cs_gnum_t n_g_cells,cs_gnum_t cell_range[2],int n_ranks,const int domain_rank[])2342 _write_output(cs_gnum_t  n_g_cells,
2343               cs_gnum_t  cell_range[2],
2344               int        n_ranks,
2345               const int  domain_rank[])
2346 {
2347   size_t i;
2348   int n_ranks_size;
2349   cs_file_access_t method;
2350   char *filename = NULL;
2351   cs_io_t *fh = NULL;
2352   int *domain_num = NULL;
2353   cs_datatype_t datatype_gnum = CS_DATATYPE_NULL;
2354   cs_datatype_t datatype_int = CS_DATATYPE_NULL;
2355 
2356   const char dir[] = "partition_output";
2357   const char magic_string[] = "Domain partitioning, R0";
2358 
2359   if (sizeof(int) == 4)
2360     datatype_int = CS_INT32;
2361   else if (sizeof(int) == 8)
2362     datatype_int = CS_INT64;
2363   else {
2364     assert(0);
2365   }
2366 
2367   if (sizeof(cs_gnum_t) == 4)
2368     datatype_gnum = CS_UINT32;
2369   else if (sizeof(cs_gnum_t) == 8)
2370     datatype_gnum = CS_UINT64;
2371   else {
2372     assert(0);
2373   }
2374 
2375   if (cell_range[1] > cell_range[0]) {
2376     cs_gnum_t _n_cells = cell_range[1] - cell_range[0];
2377     BFT_MALLOC(domain_num, _n_cells, int);
2378     for (i = 0; i < _n_cells; i++)
2379       domain_num[i] = domain_rank[i] + 1;
2380   }
2381 
2382   /* Create directory if required */
2383 
2384   if (cs_glob_rank_id < 1) {
2385     if (cs_file_isdir(dir) != 1) {
2386       if (cs_file_mkdir_default(dir) != 0)
2387         bft_error(__FILE__, __LINE__, errno,
2388                   _("The partitioning directory cannot be created"));
2389     }
2390   }
2391 #if defined(HAVE_MPI)
2392   if (cs_glob_n_ranks > 1)
2393     MPI_Barrier(cs_glob_mpi_comm);
2394 #endif
2395 
2396   /* Open file */
2397 
2398   for (i = n_ranks, n_ranks_size = 1;
2399        i >= 10;
2400        i /= 10, n_ranks_size += 1);
2401 
2402   BFT_MALLOC(filename,
2403              strlen(dir) + strlen("domain_number_") + n_ranks_size + 2,
2404              char);
2405 
2406   sprintf(filename,
2407           "%s%cdomain_number_%d",
2408           dir, _dir_separator, n_ranks);
2409 
2410 #if defined(HAVE_MPI)
2411   {
2412     MPI_Info  hints;
2413     MPI_Comm  block_comm, comm;
2414     cs_file_get_default_access(CS_FILE_MODE_WRITE, &method, &hints);
2415     cs_file_get_default_comm(NULL, &block_comm, &comm);
2416     assert(comm == cs_glob_mpi_comm || comm == MPI_COMM_NULL);
2417     fh = cs_io_initialize(filename,
2418                           magic_string,
2419                           CS_IO_MODE_WRITE,
2420                           method,
2421                           CS_IO_ECHO_OPEN_CLOSE,
2422                           hints,
2423                           block_comm,
2424                           comm);
2425   }
2426 #else
2427   {
2428     cs_file_get_default_access(CS_FILE_MODE_WRITE, &method);
2429     fh = cs_io_initialize(filename,
2430                           magic_string,
2431                           CS_IO_MODE_WRITE,
2432                           method,
2433                           CS_IO_ECHO_OPEN_CLOSE);
2434   }
2435 #endif
2436 
2437   BFT_FREE(filename);
2438 
2439   /* Write headers */
2440 
2441   cs_io_write_global("n_cells",
2442                      1,
2443                      1,
2444                      0,
2445                      1,
2446                      datatype_gnum,
2447                      &n_g_cells,
2448                      fh);
2449 
2450   cs_io_write_global("n_ranks",
2451                      1,
2452                      1,
2453                      0,
2454                      1,
2455                      datatype_int,
2456                      &n_ranks,
2457                      fh);
2458 
2459   cs_io_write_block_buffer("cell:domain number",
2460                            n_g_cells,
2461                            cell_range[0],
2462                            cell_range[1],
2463                            1,
2464                            0,
2465                            1,
2466                            datatype_int,
2467                            domain_num,
2468                            fh);
2469 
2470   cs_io_finalize(&fh);
2471 
2472   BFT_FREE(domain_num);
2473 }
2474 
2475 /*----------------------------------------------------------------------------
2476  * Read cell rank if available
2477  *
2478  * parameters:
2479  *   mesh <-- pointer to mesh structure
2480  *   mb   <-> pointer to mesh builder helper structure
2481  *   echo <-- echo (verbosity) level
2482  *----------------------------------------------------------------------------*/
2483 
2484 static void
_read_cell_rank(cs_mesh_t * mesh,cs_mesh_builder_t * mb,long echo)2485 _read_cell_rank(cs_mesh_t          *mesh,
2486                 cs_mesh_builder_t  *mb,
2487                 long                echo)
2488 {
2489   char file_name[64]; /* more than enough for
2490                          "partition_input/domain_number_<n_ranks>" */
2491   size_t  i;
2492   cs_file_access_t  method;
2493   cs_io_sec_header_t  header;
2494 
2495   cs_io_t  *rank_pp_in = NULL;
2496   cs_lnum_t   n_ranks = 0;
2497   cs_gnum_t   n_elts = 0;
2498   cs_gnum_t   n_g_cells = 0;
2499 
2500   const char magic_string[] = "Domain partitioning, R0";
2501   const char  *unexpected_msg = N_("Section of type <%s> on <%s>\n"
2502                                    "unexpected or of incorrect size");
2503 
2504   if (n_ranks == 1)
2505     return;
2506 
2507 #if (__STDC_VERSION__ < 199901L)
2508   sprintf(file_name,
2509           "partition_input%cdomain_number_%d",
2510           _dir_separator, cs_glob_n_ranks);
2511 #else
2512   snprintf(file_name, 64,
2513            "partition_input%cdomain_number_%d",
2514            _dir_separator, cs_glob_n_ranks);
2515 #endif
2516   file_name[63] = '\0'; /* Just in case; processor counts would need to be
2517                            in the exa-range for this to be necessary. */
2518 
2519   /* Test if file exists */
2520 
2521   if (! cs_file_isreg(file_name)) {
2522     bft_printf(_(" No \"%s\" file available;\n"), file_name);
2523     return;
2524   }
2525 
2526   /* Open file */
2527 
2528 #if defined(HAVE_MPI)
2529   {
2530     MPI_Info           hints;
2531     MPI_Comm           block_comm, comm;
2532     cs_file_get_default_access(CS_FILE_MODE_WRITE, &method, &hints);
2533     cs_file_get_default_comm(NULL, &block_comm, &comm);
2534     assert(comm == cs_glob_mpi_comm || comm == MPI_COMM_NULL);
2535     rank_pp_in = cs_io_initialize(file_name,
2536                                   magic_string,
2537                                   CS_IO_MODE_READ,
2538                                   method,
2539                                   echo,
2540                                   hints,
2541                                   block_comm,
2542                                   comm);
2543   }
2544 #else
2545   {
2546     cs_file_get_default_access(CS_FILE_MODE_WRITE, &method);
2547     rank_pp_in = cs_io_initialize(file_name,
2548                                   magic_string,
2549                                   CS_IO_MODE_READ,
2550                                   method,
2551                                   echo);
2552   }
2553 #endif
2554 
2555   if (echo > 0)
2556     bft_printf("\n");
2557 
2558   /* Loop on read sections */
2559 
2560   while (rank_pp_in != NULL) {
2561 
2562     /* Receive headers */
2563 
2564     cs_io_read_header(rank_pp_in, &header);
2565 
2566     /* Treatment according to the header name */
2567 
2568     if (strncmp(header.sec_name, "n_cells",
2569                 CS_IO_NAME_LEN) == 0) {
2570 
2571       if (header.n_vals != 1)
2572         bft_error(__FILE__, __LINE__, 0,
2573                   _(unexpected_msg), header.sec_name,
2574                   cs_io_get_name(rank_pp_in));
2575       else {
2576         cs_io_set_cs_gnum(&header, rank_pp_in);
2577         cs_io_read_global(&header, &n_g_cells, rank_pp_in);
2578         if (n_g_cells != mesh->n_g_cells)
2579           bft_error(__FILE__, __LINE__, 0,
2580                     _("The number of cells reported by file\n"
2581                       "\"%s\" (%llu)\n"
2582                       "does not correspond to those of the mesh (%llu)."),
2583                     cs_io_get_name(rank_pp_in),
2584                     (unsigned long long)(n_g_cells),
2585                     (unsigned long long)(mesh->n_g_cells));
2586       }
2587 
2588     }
2589     else if (strncmp(header.sec_name, "n_ranks",
2590                      CS_IO_NAME_LEN) == 0) {
2591 
2592       if (header.n_vals != 1)
2593         bft_error(__FILE__, __LINE__, 0,
2594                   _(unexpected_msg), header.sec_name,
2595                   cs_io_get_name(rank_pp_in));
2596       else {
2597         cs_io_set_cs_lnum(&header, rank_pp_in);
2598         cs_io_read_global(&header, &n_ranks, rank_pp_in);
2599         if (n_ranks != cs_glob_n_ranks)
2600           bft_error(__FILE__, __LINE__, 0,
2601                     _("The number of ranks reported by file\n"
2602                       "\"%s\" (%d) does not\n"
2603                       "correspond to the current number of ranks (%d)."),
2604                     cs_io_get_name(rank_pp_in), (int)n_ranks,
2605                     (int)cs_glob_n_ranks);
2606       }
2607 
2608     }
2609     else if (strncmp(header.sec_name, "cell:domain number",
2610                      CS_IO_NAME_LEN) == 0) {
2611 
2612       n_elts = mesh->n_g_cells;
2613       if (header.n_vals != (cs_file_off_t)n_elts)
2614         bft_error(__FILE__, __LINE__, 0,
2615                   _(unexpected_msg), header.sec_name,
2616                   cs_io_get_name(rank_pp_in));
2617       else {
2618         mb->have_cell_rank = true;
2619         cs_io_set_cs_lnum(&header, rank_pp_in);
2620         if (mb->cell_bi.gnum_range[0] > 0)
2621           n_elts = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
2622         BFT_MALLOC(mb->cell_rank, n_elts, int);
2623         cs_io_read_block(&header,
2624                          mb->cell_bi.gnum_range[0],
2625                          mb->cell_bi.gnum_range[1],
2626                          mb->cell_rank, rank_pp_in);
2627         for (i = 0; i < n_elts; i++) /* Convert 1 to n to 0 to n-1 */
2628           mb->cell_rank[i] -= 1;
2629       }
2630       cs_io_finalize(&rank_pp_in);
2631       rank_pp_in = NULL;
2632 
2633     }
2634 
2635     else
2636       bft_error(__FILE__, __LINE__, 0,
2637                 _("Section of type <%s> on <%s> is unexpected."),
2638                 header.sec_name, cs_io_get_name(rank_pp_in));
2639   }
2640 
2641   if (rank_pp_in != NULL)
2642     cs_io_finalize(&rank_pp_in);
2643 }
2644 
2645 /*----------------------------------------------------------------------------*
2646  * Define a naive partitioning by blocks.
2647  *
2648  * parameters:
2649  *   mesh      <-- pointer to mesh structure
2650  *   mb        <-- pointer to mesh builder structure
2651  *   cell_part --> assigned cell partition
2652  *----------------------------------------------------------------------------*/
2653 
2654 static void
_block_partititioning(const cs_mesh_t * mesh,const cs_mesh_builder_t * mb,int * cell_part)2655 _block_partititioning(const cs_mesh_t          *mesh,
2656                       const cs_mesh_builder_t  *mb,
2657                       int                      *cell_part)
2658 {
2659   cs_lnum_t i;
2660 
2661   int  n_ranks = cs_glob_n_ranks;
2662   cs_lnum_t block_size = mesh->n_g_cells / n_ranks;
2663   cs_lnum_t n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
2664 
2665   if (mesh->n_g_cells % n_ranks)
2666     block_size += 1;
2667 
2668   /* Use variable block size if necessary so that all ranks have some
2669      cells assigned if possible */
2670 
2671   if ((cs_lnum_t)((mesh->n_g_cells - 1) % block_size) < n_ranks - 1) {
2672 
2673     cs_gnum_t cells_per_rank = mesh->n_g_cells / n_ranks;
2674     cs_lnum_t rmdr = mesh->n_g_cells - cells_per_rank * (cs_gnum_t)n_ranks;
2675 
2676     if (rmdr == 0) {
2677       for (i = 0; i < n_cells; i++) {
2678         cs_gnum_t cell_num = mb->cell_bi.gnum_range[0] + i;
2679         cell_part[i] = (cell_num - 1) / cells_per_rank;
2680       }
2681     }
2682     else {
2683       cs_gnum_t n_ranks_rmdr = n_ranks - rmdr;
2684       cs_gnum_t n_ranks_cells_per_rank = n_ranks_rmdr * cells_per_rank;
2685       for (i = 0; i < n_cells; i++) {
2686         cs_gnum_t cell_num = mb->cell_bi.gnum_range[0] + i;
2687         if ((cell_num - 1)  <  n_ranks_cells_per_rank)
2688           cell_part[i] = (cell_num - 1) / cells_per_rank;
2689         else
2690           cell_part[i] = (cell_num + n_ranks_rmdr - 1) / (cells_per_rank + 1);
2691       }
2692     }
2693 
2694   }
2695 
2696   else {
2697     for (i = 0; i < n_cells; i++) {
2698       cs_gnum_t cell_num = mb->cell_bi.gnum_range[0] + i;
2699       cell_part[i] = ((cell_num - 1) / block_size);
2700     }
2701   }
2702 
2703 }
2704 
2705 /*----------------------------------------------------------------------------*
2706  * Indicate if a usable graph partitioning algorithm is available
2707  * for a given partitioning stage.
2708  *
2709  * If a non-default partitioning algorithm for this stage has ben defined,
2710  * it is considered usable here (checking may be done later).
2711  *
2712  * parameters:
2713  *   stage <-- associated partitioning stage
2714  *
2715  * returns:
2716  *   true if graph partitioning is available, false otherwise.
2717  *----------------------------------------------------------------------------*/
2718 
2719 static bool
_have_usable_graph_partitioning(cs_partition_stage_t stage)2720 _have_usable_graph_partitioning(cs_partition_stage_t  stage)
2721 {
2722   bool retval = false;
2723 
2724   cs_partition_algorithm_t a = _part_algorithm[stage];
2725 
2726   int n_part_ranks = cs_glob_n_ranks / _part_rank_step[stage];
2727   if (n_part_ranks < 1)
2728     n_part_ranks = 1;
2729 
2730   if (a >= CS_PARTITION_SCOTCH && a <= CS_PARTITION_METIS)
2731     retval = true;
2732 
2733 #if defined(HAVE_PTSCOTCH)
2734   if (a == CS_PARTITION_DEFAULT)
2735     retval = true;
2736 #elif defined(HAVE_SCOTCH)
2737   if (n_part_ranks == 1 && a == CS_PARTITION_DEFAULT)
2738     retval = true;
2739 #elif defined(HAVE_PARMETIS)
2740   if (a == CS_PARTITION_DEFAULT)
2741     retval = true;
2742 #elif defined(HAVE_METIS)
2743   if (n_part_ranks == 1 && a == CS_PARTITION_DEFAULT)
2744     retval = true;
2745 #endif
2746 
2747   return retval;
2748 }
2749 
2750 /*----------------------------------------------------------------------------*
2751  * Return default algorithm for domain partitioning.
2752  *
2753  * parameters:
2754  *   stage <-- associated partitioning stage
2755  *
2756  * returns:
2757  *   default partitioning algorithm for this stage
2758  *----------------------------------------------------------------------------*/
2759 
2760 static cs_partition_algorithm_t
_select_algorithm(cs_partition_stage_t stage)2761 _select_algorithm(cs_partition_stage_t  stage)
2762 {
2763   cs_partition_algorithm_t retval = _part_algorithm[stage];
2764 
2765   if (retval == CS_PARTITION_DEFAULT) {
2766 
2767     int n_part_ranks;
2768 
2769     n_part_ranks = cs_glob_n_ranks / _part_rank_step[stage];
2770     if (n_part_ranks < 1)
2771       n_part_ranks = 1;
2772 
2773     /* last stage of 1 or 2 */
2774 
2775 #if defined(HAVE_PTSCOTCH)
2776     if (retval == CS_PARTITION_DEFAULT)
2777       retval = CS_PARTITION_SCOTCH;
2778 #elif defined(HAVE_SCOTCH)
2779     if (n_part_ranks == 1 && retval == CS_PARTITION_DEFAULT)
2780       retval = CS_PARTITION_SCOTCH;
2781 #elif defined(HAVE_PARMETIS)
2782     retval = CS_PARTITION_METIS;
2783 #elif defined(HAVE_METIS)
2784     if (n_part_ranks == 1 && retval == CS_PARTITION_DEFAULT)
2785       retval = CS_PARTITION_METIS;
2786 #endif
2787     if (retval == CS_PARTITION_DEFAULT)
2788       retval = CS_PARTITION_SFC_MORTON_BOX;
2789 
2790     /* 1st stage of 2:
2791        If 2nd stage uses a space-filling curve, use same curve by default;
2792        otherwise, use default space-filling curve. */
2793 
2794     if (stage == CS_PARTITION_FOR_PREPROCESS) {
2795 
2796       if (   _part_algorithm[CS_PARTITION_MAIN] >= CS_PARTITION_SFC_MORTON_BOX
2797           && _part_algorithm[CS_PARTITION_MAIN] <= CS_PARTITION_SFC_HILBERT_CUBE)
2798         retval = _part_algorithm[CS_PARTITION_MAIN];
2799       else
2800         retval = CS_PARTITION_SFC_MORTON_BOX;
2801 
2802     }
2803 
2804   }
2805 
2806   return retval;
2807 }
2808 
2809 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
2810 
2811 /*============================================================================
2812  * Public function definitions
2813  *============================================================================*/
2814 
2815 /*----------------------------------------------------------------------------*/
2816 /*!
2817  * \brief Print information on external libraries
2818  *
2819  * \param[in]  log_type  log type
2820  */
2821 /*----------------------------------------------------------------------------*/
2822 
2823 void
cs_partition_external_library_info(cs_log_t log_type)2824 cs_partition_external_library_info(cs_log_t  log_type)
2825 {
2826 #if defined(HAVE_METIS)
2827 #if defined(HAVE_METIS_H) && defined(METIS_VER_MAJOR)
2828   cs_log_printf(log_type,
2829                 "    METIS %d.%d.%d\n",
2830                 METIS_VER_MAJOR, METIS_VER_MINOR, METIS_VER_SUBMINOR);
2831 #else
2832   cs_log_printf(log_type,
2833                 "    METIS\n");
2834 #endif
2835 #elif defined(HAVE_PARMETIS)
2836 #if defined(PARMETIS_MAJOR_VERSION) && defined(PARMETIS_SUBMINOR_VERSION)
2837   cs_log_printf(log_type,
2838                 "    ParMETIS %d.%d.%d\n",
2839                 PARMETIS_MAJOR_VERSION, PARMETIS_MINOR_VERSION,
2840                 PARMETIS_SUBMINOR_VERSION);
2841 #elif defined(PARMETIS_MAJOR_VERSION)
2842   cs_log_printf(log_type,
2843                 "    ParMETIS %d.%d\n",
2844                 PARMETIS_MAJOR_VERSION, PARMETIS_MINOR_VERSION);
2845 #else
2846   cs_log_printf(log_type,
2847                 "    ParMETIS\n");
2848 #endif
2849 #endif
2850 
2851 #if defined(HAVE_SCOTCH)
2852 #if defined(SCOTCH_VERSION) && defined(SCOTCH_RELEASE)
2853   cs_log_printf(log_type,
2854                 "    SCOTCH %d.%d.%d\n",
2855                 SCOTCH_VERSION, SCOTCH_RELEASE, SCOTCH_PATCHLEVEL);
2856 #else
2857   cs_log_printf(log_type,
2858                 "    SCOTCH\n");
2859 #endif
2860 #elif defined(HAVE_PTSCOTCH)
2861 #if defined(SCOTCH_VERSION) && defined(SCOTCH_RELEASE)
2862   cs_log_printf(log_type,
2863                 "    PT-SCOTCH %d.%d.%d\n",
2864                 SCOTCH_VERSION, SCOTCH_RELEASE, SCOTCH_PATCHLEVEL);
2865 #else
2866   cs_log_printf(log_type,
2867                 "    PT-SCOTCH\n");
2868 #endif
2869 #endif
2870 }
2871 
2872 /*----------------------------------------------------------------------------*/
2873 /*!
2874  * \brief  Set algorithm for domain partitioning.
2875  *
2876  * \param[in]  stage         associated partitioning stage
2877  * \param[in]  algorithm     partitioning algorithm choice
2878  * \param[in]  rank_step     if > 1, partitioning done on at most
2879  *                           n_ranks / rank_step processes
2880  *                           (for graph-based partitioning only)
2881  * \param[in]  ignore_perio  if true, ignore periodicity information when
2882  *                           present (for graph-based partitioning only)
2883  */
2884 /*----------------------------------------------------------------------------*/
2885 
2886 void
cs_partition_set_algorithm(cs_partition_stage_t stage,cs_partition_algorithm_t algorithm,int rank_step,bool ignore_perio)2887 cs_partition_set_algorithm(cs_partition_stage_t      stage,
2888                            cs_partition_algorithm_t  algorithm,
2889                            int                       rank_step,
2890                            bool                      ignore_perio)
2891 {
2892   int n_part_ranks = cs_glob_n_ranks / rank_step;
2893 
2894   if (n_part_ranks < 1) {
2895     rank_step = cs_glob_n_ranks;
2896     n_part_ranks = 1;
2897   }
2898 
2899   /* Check consistency of choice */
2900 
2901 #if !defined(HAVE_PTSCOTCH)
2902   if (algorithm == CS_PARTITION_SCOTCH) {
2903 #if defined(HAVE_SCOTCH)
2904     if (n_part_ranks > 1) {
2905       rank_step = cs_glob_n_ranks;
2906       cs_base_warn(__FILE__, __LINE__);
2907       bft_printf(
2908         _("Partitioning with %s requested, but %s is not available,\n"
2909            "so serial %s will be used."), "LibSCOTCH", "PT-SCOTCH", "SCOTCH");
2910     }
2911 #else
2912     bft_error(__FILE__, __LINE__, 0,
2913               _("Partitioning with %s required but neither\n"
2914                 "%s nor %s is available."), "LibSCOTCH", "PT-SCOTCH", "SCOTCH");
2915 #endif
2916   }
2917 #endif /* defined(HAVE_PTSCOTCH) */
2918 
2919 #if !defined(HAVE_PARMETIS)
2920   if (algorithm == CS_PARTITION_METIS) {
2921 #if defined(HAVE_METIS)
2922     if (n_part_ranks > 1) {
2923       rank_step = cs_glob_n_ranks;
2924       cs_base_warn(__FILE__, __LINE__);
2925       bft_printf(
2926          _("Partitioning with %s requested, but %s is not available,\n"
2927            "so serial %s will be used."), "METIS", "ParMETIS", "METIS");
2928     }
2929 #else
2930     bft_error(__FILE__, __LINE__, 0,
2931               _("Partitioning with %s required but neither\n"
2932                 "%s nor %s is available."), "METIS", "ParMETIS", "METIS");
2933 #endif
2934   }
2935 #endif /* defined(HAVE_PARMETIS) */
2936 
2937   /* Set options */
2938 
2939   _part_algorithm[stage] = algorithm;
2940   _part_rank_step[stage] = rank_step;
2941   _part_ignore_perio[stage] = ignore_perio;
2942 }
2943 
2944 /*----------------------------------------------------------------------------*/
2945 /*!
2946  * \brief Set partitioning write to file option.
2947  *
2948  * Partitioning information for subsequent calculations is written to file
2949  * after the last partitioning stage depending on the output level.
2950  *
2951  * Note that partitioning information for additional partitionings is
2952  * always written to file, regardless of this option.
2953  *
2954  * \param[in]  write_flag  option to save partitioning information:
2955  *                         0: never
2956  *                         1: for graph-based partitioning only
2957  *                         2: always
2958  */
2959 /*----------------------------------------------------------------------------*/
2960 
2961 void
cs_partition_set_write_level(int write_flag)2962 cs_partition_set_write_level(int  write_flag)
2963 {
2964   _part_write_output = write_flag;
2965 }
2966 
2967 /*----------------------------------------------------------------------------*/
2968 /*!
2969  * \brief Define hints indicating if initial partitioning fo a preprocessing
2970  *        stage is required.
2971  *
2972  * \param[in]  join           true if a mesh joining operation is planned.
2973  * \param[in]  join_periodic  true if a mesh periodic matching operation
2974  *                            is planned
2975  */
2976 /*----------------------------------------------------------------------------*/
2977 
2978 void
cs_partition_set_preprocess_hints(bool join,bool join_periodic)2979 cs_partition_set_preprocess_hints(bool  join,
2980                                   bool  join_periodic)
2981 {
2982   _part_compute_join_hint = join;
2983   _part_compute_perio_hint = join_periodic;
2984 }
2985 
2986 /*----------------------------------------------------------------------------*/
2987 /*!
2988  * \brief Activate or deactivate initial partitioning for preprocessing.
2989  *
2990  * \param[in]  active  true to activate pre-partitiong for the preprocessing
2991  *                     stage, false to de-activate it
2992  */
2993 /*----------------------------------------------------------------------------*/
2994 
2995 void
cs_partition_set_preprocess(bool active)2996 cs_partition_set_preprocess(bool  active)
2997 {
2998   if (active)
2999     _part_preprocess_active = 2;
3000   else
3001     _part_preprocess_active = 0;
3002 }
3003 
3004 /*----------------------------------------------------------------------------*/
3005 /*!
3006  * \brief Indicate if re-partitiong for the computation stage is required.
3007  *
3008  * \return  true if initial partitioning for preprocessing is active,
3009  *          false otherwise
3010  */
3011 /*----------------------------------------------------------------------------*/
3012 
3013 bool
cs_partition_get_preprocess(void)3014 cs_partition_get_preprocess(void)
3015 {
3016   bool retval = false;
3017 
3018   if (_part_preprocess_active < 1)
3019     retval = false;
3020 
3021   else if (_part_preprocess_active == 1) {
3022 
3023     if (_have_usable_graph_partitioning(CS_PARTITION_MAIN)) {
3024       if (_part_compute_join_hint)
3025         retval = true;
3026       if (   _part_compute_perio_hint
3027           && _part_ignore_perio[CS_PARTITION_MAIN] == false)
3028         retval = true;
3029     }
3030   }
3031 
3032   else
3033     retval = true;
3034 
3035   if (cs_glob_n_ranks < 2)
3036     retval = false;
3037 
3038   return retval;
3039 }
3040 
3041 /*----------------------------------------------------------------------------*/
3042 /*!
3043  * \brief Define list of extra partitionings to build.
3044  *
3045  * Partitionings in this list will be output to file, and may be used for
3046  * subsequent calculations.
3047  *
3048  * When partitioning for both preprocessing and calculation stages, output to
3049  * file of partioning data or generation of additional partitionings
3050  * (see \ref cs_partition_add_partitions) will only be done for the
3051  * second stage.
3052  *
3053  * \param[in]  n_extra_partitions     number of extra partitionings to compute
3054  * \param[in]  extra_partitions_list  list of extra partitions to compute
3055  */
3056 /*----------------------------------------------------------------------------*/
3057 
3058 void
cs_partition_add_partitions(int n_extra_partitions,int extra_partitions_list[])3059 cs_partition_add_partitions(int  n_extra_partitions,
3060                             int  extra_partitions_list[])
3061 {
3062   _part_n_extra_partitions = n_extra_partitions;
3063 
3064   BFT_REALLOC(_part_extra_partitions_list, n_extra_partitions, int);
3065 
3066   if (n_extra_partitions > 0)
3067     memcpy(_part_extra_partitions_list,
3068            extra_partitions_list,
3069            sizeof(int)*n_extra_partitions);
3070 }
3071 
3072 /*----------------------------------------------------------------------------*/
3073 /*!
3074  * \brief Partition mesh based on current options.
3075  *
3076  * \param[in]       mesh   pointer to mesh structure
3077  * \param[in, out]  mb     pointer to mesh builder structure
3078  * \param[in]       stage  associated partitioning stage
3079  */
3080 /*----------------------------------------------------------------------------*/
3081 
3082 void
cs_partition(cs_mesh_t * mesh,cs_mesh_builder_t * mb,cs_partition_stage_t stage)3083 cs_partition(cs_mesh_t             *mesh,
3084              cs_mesh_builder_t     *mb,
3085              cs_partition_stage_t   stage)
3086 {
3087   cs_timer_t t0, t1;
3088   cs_timer_counter_t dt;
3089 
3090   int n_part_ranks = cs_glob_n_ranks / _part_rank_step[stage];
3091   cs_partition_algorithm_t _algorithm = _select_algorithm(stage);
3092 
3093   bool write_output = false;
3094   int  n_extra_partitions = 0;
3095 
3096   int  *cell_part = NULL;
3097 
3098   cs_gnum_t  cell_range[2] = {0, 0};
3099   cs_lnum_t  n_cells = 0;
3100   cs_lnum_t  n_faces = 0;
3101   cs_gnum_t  *face_cells = NULL;
3102 
3103   /* Initialize local options */
3104 
3105   if (stage == CS_PARTITION_MAIN) {
3106     if (   (   _algorithm == CS_PARTITION_METIS
3107             || _algorithm == CS_PARTITION_SCOTCH)
3108         && _part_write_output > 0)
3109       write_output = true;
3110     else if (_part_write_output > 1)
3111       write_output = true;
3112     n_extra_partitions = _part_n_extra_partitions;
3113   }
3114 
3115   if (n_part_ranks < 1)
3116     n_part_ranks = 1;
3117 
3118   /* Free previous cell rank info if present */
3119 
3120   if (mb->cell_rank != NULL)
3121     BFT_FREE(mb->cell_rank);
3122 
3123   /* Read cell rank data if available */
3124 
3125   if (cs_glob_n_ranks > 1) {
3126     if (   stage != CS_PARTITION_MAIN
3127         || cs_partition_get_preprocess() == false) {
3128       _read_cell_rank(mesh, mb, CS_IO_ECHO_OPEN_CLOSE);
3129       if (mb->have_cell_rank)
3130         return;
3131     }
3132   }
3133   else { /* if (cs_glob_n_ranks == 1) */
3134     if (stage != CS_PARTITION_MAIN || n_extra_partitions < 1)
3135       return;
3136   }
3137 
3138   (void)cs_timer_wtime();
3139 
3140   /* Print header */
3141 
3142   bft_printf("\n ----------------------------------------------------------\n");
3143 
3144   cs_log_printf(CS_LOG_PERFORMANCE,
3145                 _("\n"
3146                   "Partitioning:\n\n"));
3147 
3148   /* Start timer */
3149 
3150   t0 = cs_timer_time();
3151 
3152   /* Adapt builder data for partitioning */
3153 
3154   if (_algorithm == CS_PARTITION_METIS || _algorithm == CS_PARTITION_SCOTCH) {
3155 
3156     _prepare_input(mesh,
3157                    mb,
3158                    _part_rank_step[stage],
3159                    _part_ignore_perio[stage],
3160                    cell_range,
3161                    &n_faces,
3162                    &face_cells);
3163 
3164     n_cells = cell_range[1] - cell_range[0];
3165 
3166   }
3167   else {
3168 
3169     n_part_ranks = cs_glob_n_ranks;
3170     n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
3171 
3172   }
3173 
3174   /* Build and partition graph */
3175 
3176 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS)
3177 
3178   if (_algorithm == CS_PARTITION_METIS) {
3179 
3180     int  i;
3181     cs_timer_t  t2;
3182     idx_t  *cell_idx = NULL, *cell_neighbors = NULL;
3183 
3184     _metis_cell_cells(n_cells,
3185                       n_faces,
3186                       cell_range[0],
3187                       face_cells,
3188                       &cell_idx,
3189                       &cell_neighbors);
3190 
3191     if (face_cells != mb->face_cells)
3192       BFT_FREE(face_cells);
3193 
3194     t2 = cs_timer_time();
3195     dt = cs_timer_diff(&t0, &t2);
3196 
3197     cs_log_printf(CS_LOG_PERFORMANCE,
3198                   _("  preparing graph:            %.3g s\n"),
3199                   (double)(dt.nsec)/1.e9);
3200 
3201 #if defined(HAVE_PARMETIS)
3202 
3203     if (n_part_ranks > 1) {
3204 
3205       MPI_Comm part_comm = cs_glob_mpi_comm;
3206 
3207       if (_part_rank_step[stage] > 1)
3208         part_comm = _init_reduced_communicator(_part_rank_step[stage]);
3209 
3210       for (i = 0; i < n_extra_partitions + 1; i++) {
3211 
3212         int  n_ranks = cs_glob_n_ranks;
3213 
3214         if (i < n_extra_partitions) {
3215           n_ranks = _part_extra_partitions_list[i];
3216           if (n_ranks == cs_glob_n_ranks) {
3217             write_output = true;
3218             continue;
3219           }
3220         }
3221 
3222         if (n_ranks < 2)
3223           continue;
3224 
3225         BFT_REALLOC(cell_part, n_cells, int);
3226 
3227         if (cs_glob_rank_id % _part_rank_step[stage] == 0)
3228           _part_parmetis(mesh->n_g_cells,
3229                          cell_range,
3230                          n_ranks,
3231                          cell_idx,
3232                          cell_neighbors,
3233                          cell_part,
3234                          part_comm);
3235 
3236         _distribute_output(mb,
3237                            _part_rank_step[stage],
3238                            cell_range,
3239                            &cell_part);
3240 
3241         _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3242 
3243         if (write_output || i < n_extra_partitions)
3244           _write_output(mesh->n_g_cells,
3245                         mb->cell_bi.gnum_range,
3246                         n_ranks,
3247                         cell_part);
3248       }
3249 
3250       if (part_comm != cs_glob_mpi_comm && part_comm != MPI_COMM_NULL)
3251         MPI_Comm_free(&part_comm);
3252     }
3253 
3254 #endif
3255 
3256     if (n_part_ranks == 1) {
3257 
3258       for (i = 0; i < n_extra_partitions + 1; i++) {
3259 
3260         int  n_ranks = cs_glob_n_ranks;
3261 
3262         if (i < n_extra_partitions) {
3263           n_ranks = _part_extra_partitions_list[i];
3264           if (n_ranks == cs_glob_n_ranks) {
3265             write_output = true;
3266             continue;
3267           }
3268         }
3269 
3270         if (n_ranks < 2)
3271           continue;
3272 
3273         BFT_REALLOC(cell_part, n_cells, int);
3274 
3275         if (cs_glob_rank_id < 0 || (cs_glob_rank_id % _part_rank_step[stage] == 0))
3276           _part_metis(n_cells,
3277                       n_ranks,
3278                       cell_idx,
3279                       cell_neighbors,
3280                       cell_part);
3281 
3282         _distribute_output(mb,
3283                            _part_rank_step[stage],
3284                            cell_range,
3285                            &cell_part);
3286 
3287         _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3288 
3289         if (write_output || i < n_extra_partitions)
3290           _write_output(mesh->n_g_cells,
3291                         mb->cell_bi.gnum_range,
3292                         n_ranks,
3293                         cell_part);
3294       }
3295     }
3296 
3297     BFT_FREE(cell_idx);
3298     BFT_FREE(cell_neighbors);
3299   }
3300 
3301 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) */
3302 
3303 #if defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH)
3304 
3305   if (_algorithm == CS_PARTITION_SCOTCH) {
3306 
3307     int  i;
3308     cs_timer_t  t2;
3309     SCOTCH_Num  *cell_idx = NULL, *cell_neighbors = NULL;
3310 
3311     _scotch_cell_cells(n_cells,
3312                        n_faces,
3313                        cell_range[0],
3314                        face_cells,
3315                        &cell_idx,
3316                        &cell_neighbors);
3317 
3318     if (face_cells != mb->face_cells)
3319       BFT_FREE(face_cells);
3320 
3321     t2 = cs_timer_time();
3322     dt = cs_timer_diff(&t0, &t2);
3323 
3324     cs_log_printf(CS_LOG_PERFORMANCE,
3325                   _("  preparing graph:            %.3g s\n"),
3326                   (double)(dt.nsec)/1.e9);
3327 
3328 #if defined(HAVE_PTSCOTCH)
3329 
3330     if (n_part_ranks > 1) {
3331 
3332       MPI_Comm part_comm = cs_glob_mpi_comm;
3333 
3334       if (_part_rank_step[stage] > 1)
3335         part_comm = _init_reduced_communicator(_part_rank_step[stage]);
3336 
3337       for (i = 0; i < n_extra_partitions + 1; i++) {
3338 
3339         int  n_ranks = cs_glob_n_ranks;
3340 
3341         if (i < n_extra_partitions) {
3342           n_ranks = _part_extra_partitions_list[i];
3343           if (n_ranks == cs_glob_n_ranks) {
3344             write_output = true;
3345             continue;
3346           }
3347         }
3348 
3349         if (n_ranks < 2)
3350           continue;
3351 
3352         BFT_REALLOC(cell_part, n_cells, int);
3353 
3354         if (cs_glob_rank_id % _part_rank_step[stage] == 0)
3355           _part_ptscotch(mesh->n_g_cells,
3356                          cell_range,
3357                          n_ranks,
3358                          cell_idx,
3359                          cell_neighbors,
3360                          cell_part,
3361                          part_comm);
3362 
3363         _distribute_output(mb,
3364                            _part_rank_step[stage],
3365                            cell_range,
3366                            &cell_part);
3367 
3368         _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3369 
3370         if (write_output || i < n_extra_partitions)
3371           _write_output(mesh->n_g_cells,
3372                         mb->cell_bi.gnum_range,
3373                         n_ranks,
3374                         cell_part);
3375       }
3376 
3377       if (part_comm != cs_glob_mpi_comm && part_comm != MPI_COMM_NULL)
3378         MPI_Comm_free(&part_comm);
3379     }
3380 
3381 #endif
3382 
3383     if (n_part_ranks == 1) {
3384 
3385       for (i = 0; i < n_extra_partitions + 1; i++) {
3386 
3387         int  n_ranks = cs_glob_n_ranks;
3388 
3389         if (i < n_extra_partitions) {
3390           n_ranks = _part_extra_partitions_list[i];
3391           if (n_ranks == cs_glob_n_ranks) {
3392             write_output = true;
3393             continue;
3394           }
3395         }
3396 
3397         if (n_ranks < 2)
3398           continue;
3399 
3400         BFT_REALLOC(cell_part, n_cells, int);
3401 
3402         if (cs_glob_rank_id < 0 || (cs_glob_rank_id % _part_rank_step[stage] == 0))
3403           _part_scotch(n_cells,
3404                        n_ranks,
3405                        cell_idx,
3406                        cell_neighbors,
3407                        cell_part);
3408 
3409         _distribute_output(mb,
3410                            _part_rank_step[stage],
3411                            cell_range,
3412                            &cell_part);
3413 
3414         _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3415 
3416         if (write_output || i < n_extra_partitions)
3417           _write_output(mesh->n_g_cells,
3418                         mb->cell_bi.gnum_range,
3419                         n_ranks,
3420                         cell_part);
3421       }
3422     }
3423 
3424     BFT_FREE(cell_idx);
3425     BFT_FREE(cell_neighbors);
3426   }
3427 
3428 #endif /* defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH) */
3429 
3430   if (   _algorithm >= CS_PARTITION_SFC_MORTON_BOX
3431       && _algorithm <= CS_PARTITION_SFC_HILBERT_CUBE) {
3432 
3433     int i;
3434     fvm_io_num_sfc_t sfc_type = _algorithm - CS_PARTITION_SFC_MORTON_BOX;
3435 
3436     BFT_MALLOC(cell_part, n_cells, int);
3437 
3438     for (i = 0; i < n_extra_partitions + 1; i++) {
3439 
3440       int  n_ranks = cs_glob_n_ranks;
3441 
3442       if (i < n_extra_partitions) {
3443         n_ranks = _part_extra_partitions_list[i];
3444         if (n_ranks == cs_glob_n_ranks) {
3445           write_output = true;
3446           continue;
3447         }
3448       }
3449 
3450       if (n_ranks < 2)
3451         continue;
3452 
3453 #if defined(HAVE_MPI)
3454       _cell_rank_by_sfc(mesh->n_g_cells,
3455                         n_ranks,
3456                         mb,
3457                         sfc_type,
3458                         cell_part,
3459                         cs_glob_mpi_comm);
3460 #else
3461       _cell_rank_by_sfc(mesh->n_g_cells, n_ranks, mb, sfc_type, cell_part);
3462 #endif
3463 
3464       _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3465 
3466       if (write_output || i < n_extra_partitions)
3467         _write_output(mesh->n_g_cells,
3468                       mb->cell_bi.gnum_range,
3469                       n_ranks,
3470                       cell_part);
3471     }
3472 
3473   }
3474 
3475   /* Naive partitioner */
3476 
3477   else if (_algorithm == CS_PARTITION_BLOCK) {
3478 
3479     BFT_MALLOC(cell_part, n_cells, int);
3480 
3481     _block_partititioning(mesh, mb, cell_part);
3482 
3483   }
3484 
3485   /* Reset extra partitions list if used */
3486 
3487   if (n_extra_partitions > 0) {
3488     BFT_FREE(_part_extra_partitions_list);
3489     _part_n_extra_partitions = 0;
3490   }
3491 
3492   /* Copy to mesh builder */
3493 
3494   mb->have_cell_rank = true;
3495   mb->cell_rank = cell_part;
3496 
3497   n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
3498 
3499   /* End of this section for log file */
3500 
3501   t1 = cs_timer_time();
3502 
3503   dt = cs_timer_diff(&t0, &t1);
3504 
3505   bft_printf(_("\n"
3506                " Partitioning finished (%.3g s)\n"),
3507              (double)(dt.nsec)/1.e9);
3508   bft_printf_flush();
3509 
3510   cs_log_printf(CS_LOG_PERFORMANCE,
3511                 _("  wall clock time:            %.3g s\n\n"),
3512                 (double)(dt.nsec)/1.e9);
3513 
3514   cs_log_separator(CS_LOG_PERFORMANCE);
3515 }
3516 
3517 /*----------------------------------------------------------------------------*/
3518 
3519 END_C_DECLS
3520