1 /*============================================================================
2 * Partitioner
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 #include "cs_defs.h"
28
29 /*----------------------------------------------------------------------------
30 * Standard C library headers
31 *----------------------------------------------------------------------------*/
32
33 #include <assert.h>
34 #include <errno.h>
35 #include <locale.h>
36 #include <math.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40
41 #if defined(HAVE_MPI)
42 #include <mpi.h>
43 #endif
44
45 /*----------------------------------------------------------------------------
46 * METIS library headers
47 *----------------------------------------------------------------------------*/
48
49 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS)
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #if defined(HAVE_PARMETIS)
56 #include <parmetis.h>
57 #endif
58
59 #if defined(HAVE_METIS)
60 #include <metis.h>
61 #endif
62
63 #ifdef __cplusplus
64 }
65 #endif
66
67 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) */
68
69 /*----------------------------------------------------------------------------
70 * SCOTCH library headers
71 *----------------------------------------------------------------------------*/
72
73 #if defined(HAVE_SCOTCH)
74 #include <stdint.h>
75 #include <scotch.h>
76 #elif defined(HAVE_PTSCOTCH)
77 #include <stdint.h>
78 #include <ptscotch.h>
79 #endif
80
81 /*----------------------------------------------------------------------------
82 * Local headers
83 *----------------------------------------------------------------------------*/
84
85 #include "bft_error.h"
86 #include "bft_mem.h"
87 #include "bft_printf.h"
88
89 #include "fvm_io_num.h"
90
91 #include "cs_all_to_all.h"
92 #include "cs_base.h"
93 #include "cs_block_dist.h"
94 #include "cs_block_to_part.h"
95 #include "cs_file.h"
96 #include "cs_io.h"
97 #include "cs_log.h"
98 #include "cs_mesh.h"
99 #include "cs_mesh_builder.h"
100 #include "cs_order.h"
101 #include "cs_part_to_block.h"
102 #include "cs_timer.h"
103
104 /*----------------------------------------------------------------------------
105 * Header for the current file
106 *----------------------------------------------------------------------------*/
107
108 #include "cs_partition.h"
109
110 /*----------------------------------------------------------------------------*/
111
112 BEGIN_C_DECLS
113
114 /*============================================================================
115 * Public Type documentation
116 *============================================================================*/
117
118 /*
119 * \enum cs_partition_stage_t cs_partition.h
120 *
121 * Partitioning is always done just after reading the mesh, unless a
122 * partitioning input file is available, in which case the partitioning
123 * read replaces this stage.
124 *
125 * When a mesh modification implying a change of cell connectivity graph
126 * is expected, the mesh may be re-partitioned after the pre-processing
127 * stage, prior to calculation. By default, re-partitioning is only done
128 * if the partitioning algorithm chosen for that stage is expected to
129 * produce different results due to the connectivity change. This is
130 * the case for graph-based algorithms such as those of METIS or SCOTCH,
131 * when mesh joining is defined, or additional periodic matching is defined
132 * (and the algorithm is not configured to ignore periodicity information).
133 *
134 * There are thus two possible partitioning stages:
135 *
136 * - CS_PARTITION_FOR_PREPROCESS, which is optional, and occurs
137 * just after reading the mesh.
138 * - CS_PARTITION_MAIN, which occurs just after reading the mesh if
139 * it is the only stage,, or after mesh preprocessing (and before
140 * computation), if the partitioning for preprocessing stage is
141 * activated.
142 *
143 * The number of partitioning stages is determined automatically based on
144 * information provided through \ref cs_partition_set_preprocess_hints,
145 * but re-partitioning may also be forced or inhibited using the
146 * \ref cs_partition_set_preprocess function.
147 */
148
149 /*
150 * \enum cs_partition_algorithm_t cs_partition.h
151 *
152 * Partitioning algorithm type.
153
154 * If the default algorithm is selected, the choice will be based on the
155 * following priority, depending on available libraries:
156 * - PT-SCOTCH (or SCOTCH if partitioning on one rank);
157 * - ParMETIS (or METIS if partitioning on one rank);
158 * - Morton space-filling curve (in bounding box)
159 *
160 * If both partitioning stages are active, the default for the preprocessing
161 * stage will be based on the Morton space-filling curve (in bounding box),
162 * as this should be cheaper, and the initial cell connectivity graph
163 * is usually expected to be modified during preprocessing.
164 *
165 * \var CS_PARTITION_DEFAULT Default partitioning (based on stage)
166 * \var CS_PARTITION_SFC_MORTON_BOX Morton (Z) curve in bounding box
167 * \var CS_PARTITION_SFC_MORTON_CUBE Morton (Z) curve in bounding cube
168 * \var CS_PARTITION_SFC_HILBERT_BOX Peano-Hilbert curve in bounding box
169 * \var CS_PARTITION_SFC_HILBERT_CUBE Peano-Hilbert curve in bounding cube
170 * \var CS_PARTITION_SCOTCH PT-SCOTCH or SCOTCH
171 * \var CS_PARTITION_METIS ParMETIS or METIS
172 * \var CS_PARTITION_BLOCK Unoptimized (naive) block partitioning
173 * \var CS_PARTITION_NONE No repartitioning (for computation
174 * stage after preprocessing)
175 */
176
177 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
178
179 /*=============================================================================
180 * Local Macro definitions
181 *============================================================================*/
182
183 /*============================================================================
184 * Local Type definitions
185 *============================================================================*/
186
187 typedef double _vtx_coords_t[3];
188
189 /*============================================================================
190 * Public function prototypes
191 *============================================================================*/
192
193 /*============================================================================
194 * Static global variables
195 *============================================================================*/
196
197 static cs_partition_algorithm_t _part_algorithm[2] = {CS_PARTITION_DEFAULT,
198 CS_PARTITION_DEFAULT};
199 static int _part_rank_step[2] = {1, 1};
200 static bool _part_ignore_perio[2] = {false, false};
201
202 static int _part_compute_join_hint = false;
203 static int _part_compute_perio_hint = false;
204 static int _part_preprocess_active = 1; /* 0: inactive;
205 1: default;
206 2: active */
207 static int _part_write_output = 1;
208 static int _part_n_extra_partitions = 0;
209 static int *_part_extra_partitions_list = NULL;
210
211 static bool _part_uniform_sfc_block_size = false;
212
213 #if defined(WIN32) || defined(_WIN32)
214 static const char _dir_separator = '\\';
215 #else
216 static const char _dir_separator = '/';
217 #endif
218
219 /*============================================================================
220 * Private function definitions
221 *============================================================================*/
222
223 #if defined(HAVE_MPI)
224
225 /*----------------------------------------------------------------------------
226 * Create a reduced communicator
227 *
228 * parameters:
229 * part_step <-- step between active ranks
230 *
231 * returns:
232 * handle to reduced communicator
233 *----------------------------------------------------------------------------*/
234
235 static MPI_Comm
_init_reduced_communicator(int part_step)236 _init_reduced_communicator(int part_step)
237 {
238 int n_ranks;
239 int ranges[1][3];
240 MPI_Group old_group, new_group;
241 MPI_Comm part_comm = MPI_COMM_NULL;
242
243 n_ranks = cs_glob_n_ranks;
244
245 MPI_Barrier(cs_glob_mpi_comm); /* For debugging */
246
247 MPI_Comm_size(cs_glob_mpi_comm, &n_ranks);
248 MPI_Comm_group(cs_glob_mpi_comm, &old_group);
249
250 ranges[0][0] = 0;
251 ranges[0][1] = n_ranks - 1;
252 ranges[0][2] = part_step;
253
254 MPI_Group_range_incl(old_group, 1, ranges, &new_group);
255 MPI_Comm_create(cs_glob_mpi_comm, new_group, &part_comm);
256 MPI_Group_free(&new_group);
257
258 MPI_Group_free(&old_group);
259
260 MPI_Barrier(cs_glob_mpi_comm); /* For debugging */
261
262 if (cs_glob_rank_id > -1 && cs_glob_rank_id % part_step) {
263 /* MPI_Comm_free(&part_comm) does not work outside of the group */
264 part_comm = MPI_COMM_NULL;
265 }
266
267 return part_comm;
268 }
269
270 #endif /* defined(HAVE_MPI) */
271
272 /*----------------------------------------------------------------------------
273 * Display the distribution of cells per partition in serial mode
274 *
275 * parameters:
276 * cell_range <-- first and past-the-last cell numbers for this rank
277 * n_parts <-- number of partitions
278 * part <-- cell partition number
279 *----------------------------------------------------------------------------*/
280
281 static void
_cell_part_histogram(cs_gnum_t cell_range[2],int n_parts,const int part[])282 _cell_part_histogram(cs_gnum_t cell_range[2],
283 int n_parts,
284 const int part[])
285 {
286 int i, k;
287 size_t j;
288 double step;
289
290 cs_lnum_t *n_part_cells;
291 cs_lnum_t n_min, n_max;
292 size_t count[10];
293 int n_steps = 10;
294 size_t n_cells = 0;
295
296 if (cell_range[1] > cell_range[0])
297 n_cells = cell_range[1] - cell_range[0];
298
299 if (n_parts <= 1) /* Should never happen */
300 return;
301
302 bft_printf(_(" Number of cells per domain (histogramm):\n"));
303
304 BFT_MALLOC(n_part_cells, n_parts, cs_lnum_t);
305
306 for (i = 0; i < n_parts; i++)
307 n_part_cells[i] = 0;
308
309 for (j = 0; j < n_cells; j++)
310 n_part_cells[part[j]] += 1;
311
312 #if defined(HAVE_MPI)
313
314 if (cs_glob_n_ranks > 1) {
315 cs_lnum_t *n_part_cells_sum;
316 BFT_MALLOC(n_part_cells_sum, n_parts, cs_lnum_t);
317 MPI_Allreduce(n_part_cells, n_part_cells_sum, n_parts,
318 CS_MPI_LNUM, MPI_SUM, cs_glob_mpi_comm);
319 BFT_FREE(n_part_cells);
320 n_part_cells = n_part_cells_sum;
321 n_part_cells_sum = NULL;
322 }
323
324 #endif /* defined(HAVE_MPI) */
325
326 /* Compute min and max */
327
328 n_min = n_part_cells[0];
329 n_max = n_part_cells[0];
330
331 for (i = 1; i < n_parts; i++) {
332 if (n_part_cells[i] > n_max)
333 n_max = n_part_cells[i];
334 else if (n_part_cells[i] < n_min)
335 n_min = n_part_cells[i];
336 }
337
338 /* Define axis subdivisions */
339
340 for (i = 0; i < n_steps; i++)
341 count[i] = 0;
342
343 if (n_max - n_min > 0) {
344
345 if (n_max-n_min < n_steps)
346 n_steps = n_max-n_min > 0 ? n_max-n_min : 1;
347
348 step = (double)(n_max - n_min) / n_steps;
349
350 /* Loop on partitions */
351
352 for (i = 0; i < n_parts; i++) {
353
354 /* Associated subdivision */
355
356 for (j = 0, k = 1; k < n_steps; j++, k++) {
357 if (n_part_cells[i] < n_min + k*step)
358 break;
359 }
360 count[j] += 1;
361
362 }
363
364 for (i = 0, j = 1; i < n_steps - 1; i++, j++)
365 bft_printf(" [ %10d ; %10d [ = %10d\n",
366 (int)(n_min + i*step),
367 (int)(n_min + j*step),
368 (int)(count[i]));
369
370 bft_printf(" [ %10d ; %10d ] = %10d\n",
371 (int)(n_min + (n_steps - 1)*step),
372 (int)n_max,
373 (int)(count[n_steps - 1]));
374
375 }
376
377 else { /* if (n_max == n_min) */
378 bft_printf(" [ %10d ; %10d ] = %10d\n",
379 (int)(n_min), (int)n_max, (int)n_parts);
380 }
381
382 BFT_FREE(n_part_cells);
383 }
384
385 #if defined(HAVE_MPI)
386
387 /*----------------------------------------------------------------------------
388 * Compute cell centers using minimal local data in parallel mode
389 *
390 * parameters:
391 * n_cells <-- number of cells
392 * n_faces <-- number of faces
393 * face_cells <-- face -> cells connectivity
394 * face_vtx_idx <-- face -> vertices connectivity index
395 * face_vtx <-- face -> vertices connectivity
396 * vtx_coord <-- vertex coordinates
397 * cell_center --> cell centers
398 *----------------------------------------------------------------------------*/
399
400 static void
_cell_center_g(cs_lnum_t n_cells,cs_lnum_t n_faces,const cs_lnum_t face_cells[],const cs_lnum_t face_vtx_idx[],const cs_lnum_t face_vtx[],const cs_real_t vtx_coord[],cs_coord_t cell_center[])401 _cell_center_g(cs_lnum_t n_cells,
402 cs_lnum_t n_faces,
403 const cs_lnum_t face_cells[],
404 const cs_lnum_t face_vtx_idx[],
405 const cs_lnum_t face_vtx[],
406 const cs_real_t vtx_coord[],
407 cs_coord_t cell_center[])
408 {
409 cs_lnum_t i, j;
410 cs_lnum_t vtx_id, face_id, start_id, end_id;
411 cs_lnum_t n_face_vertices;
412 cs_coord_t ref_normal[3], vtx_cog[3], face_center[3];
413
414 cs_lnum_t n_max_face_vertices = 0;
415
416 _vtx_coords_t *face_vtx_coord = NULL;
417 cs_coord_t *weight = NULL;
418
419 const double surf_epsilon = 1e-24;
420
421 assert(face_vtx_idx[0] == 0);
422
423 BFT_MALLOC(weight, n_cells, cs_coord_t);
424
425 for (i = 0; i < n_cells; i++) {
426 weight[i] = 0.0;
427 for (j = 0; j < 3; j++)
428 cell_center[i*3 + j] = 0.0;
429 }
430
431 /* Counting and allocation */
432
433 n_max_face_vertices = 0;
434
435 for (face_id = 0; face_id < n_faces; face_id++) {
436 n_face_vertices = face_vtx_idx[face_id + 1] - face_vtx_idx[face_id];
437 if (n_max_face_vertices <= n_face_vertices)
438 n_max_face_vertices = n_face_vertices;
439 }
440
441 BFT_MALLOC(face_vtx_coord, n_max_face_vertices, _vtx_coords_t);
442
443 /* Loop on each face */
444
445 for (face_id = 0; face_id < n_faces; face_id++) {
446
447 /* Initialization */
448
449 cs_lnum_t tri_id;
450
451 cs_lnum_t cell_id_0 = face_cells[face_id*2] -1;
452 cs_lnum_t cell_id_1 = face_cells[face_id*2 + 1] -1;
453 cs_coord_t unweighted_center[3] = {0.0, 0.0, 0.0};
454 cs_coord_t face_surface = 0.0;
455
456 n_face_vertices = 0;
457
458 start_id = face_vtx_idx[face_id];
459 end_id = face_vtx_idx[face_id + 1];
460
461 /* Define the polygon (P) according to the vertices (Pi) of the face */
462
463 for (vtx_id = start_id; vtx_id < end_id; vtx_id++) {
464
465 cs_lnum_t shift = 3 * (face_vtx[vtx_id] - 1);
466 for (i = 0; i < 3; i++)
467 face_vtx_coord[n_face_vertices][i] = vtx_coord[shift + i];
468 n_face_vertices++;
469
470 }
471
472 /* Compute the barycenter of the face vertices */
473
474 for (i = 0; i < 3; i++) {
475 vtx_cog[i] = 0.0;
476 for (vtx_id = 0; vtx_id < n_face_vertices; vtx_id++)
477 vtx_cog[i] += face_vtx_coord[vtx_id][i];
478 vtx_cog[i] /= n_face_vertices;
479 }
480
481 /* Loop on the triangles of the face (defined by an edge of the face
482 and its barycenter) */
483
484 for (i = 0; i < 3; i++) {
485 ref_normal[i] = 0.;
486 face_center[i] = 0.0;
487 }
488
489 for (tri_id = 0 ; tri_id < n_face_vertices ; tri_id++) {
490
491 cs_coord_t tri_surface;
492 cs_coord_t vect1[3], vect2[3], tri_normal[3], tri_center[3];
493
494 cs_lnum_t id0 = tri_id;
495 cs_lnum_t id1 = (tri_id + 1)%n_face_vertices;
496
497 /* Normal for each triangle */
498
499 for (i = 0; i < 3; i++) {
500 vect1[i] = face_vtx_coord[id0][i] - vtx_cog[i];
501 vect2[i] = face_vtx_coord[id1][i] - vtx_cog[i];
502 }
503
504 tri_normal[0] = vect1[1] * vect2[2] - vect2[1] * vect1[2];
505 tri_normal[1] = vect2[0] * vect1[2] - vect1[0] * vect2[2];
506 tri_normal[2] = vect1[0] * vect2[1] - vect2[0] * vect1[1];
507
508 if (tri_id == 0) {
509 for (i = 0; i < 3; i++)
510 ref_normal[i] = tri_normal[i];
511 }
512
513 /* Center of gravity for a triangle */
514
515 for (i = 0; i < 3; i++) {
516 tri_center[i] = ( vtx_cog[i]
517 + face_vtx_coord[id0][i]
518 + face_vtx_coord[id1][i]) / 3.0;
519 }
520
521 tri_surface = sqrt( tri_normal[0]*tri_normal[0]
522 + tri_normal[1]*tri_normal[1]
523 + tri_normal[2]*tri_normal[2]) * 0.5;
524
525 if (( tri_normal[0]*ref_normal[0]
526 + tri_normal[1]*ref_normal[1]
527 + tri_normal[2]*ref_normal[2]) < 0.0)
528 tri_surface *= -1.0;
529
530 /* Now compute contribution to face center and surface */
531
532 face_surface += tri_surface;
533
534 for (i = 0; i < 3; i++) {
535 face_center[i] += tri_surface * tri_center[i];
536 unweighted_center[i] = tri_center[i];
537 }
538
539 } /* End of loop on triangles of the face */
540
541 if (face_surface > surf_epsilon) {
542 for (i = 0; i < 3; i++)
543 face_center[i] /= face_surface;
544 }
545 else {
546 face_surface = surf_epsilon;
547 for (i = 0; i < 3; i++)
548 face_center[i] = unweighted_center[i] * face_surface / n_face_vertices;
549 }
550
551 /* Now contribute to cell centers */
552
553 assert(cell_id_0 > -2 && cell_id_1 > -2);
554
555 if (cell_id_0 > -1) {
556 for (i = 0; i < 3; i++)
557 cell_center[cell_id_0*3 + i] += face_center[i]*face_surface;
558 weight[cell_id_0] += face_surface;
559 }
560
561 if (cell_id_1 > -1) {
562 for (i = 0; i < 3; i++)
563 cell_center[cell_id_1*3 + i] += face_center[i]*face_surface;
564 weight[cell_id_1] += face_surface;
565 }
566
567 } /* End of loop on faces */
568
569 BFT_FREE(face_vtx_coord);
570
571 for (i = 0; i < n_cells; i++) {
572 for (j = 0; j < 3; j++)
573 cell_center[i*3 + j] /= weight[i];
574 }
575
576 BFT_FREE(weight);
577 }
578
579 /*----------------------------------------------------------------------------
580 * Compute cell centers using block data in parallel mode.
581 *
582 * parameters:
583 * mb <-- pointer to mesh builder helper structure
584 * cell_center --> cell centers array
585 * comm <-- associated MPI communicator
586 *----------------------------------------------------------------------------*/
587
588 static void
_precompute_cell_center_g(const cs_mesh_builder_t * mb,cs_coord_t cell_center[],MPI_Comm comm)589 _precompute_cell_center_g(const cs_mesh_builder_t *mb,
590 cs_coord_t cell_center[],
591 MPI_Comm comm)
592 {
593 cs_lnum_t i;
594 int n_ranks = 0;
595
596 cs_datatype_t gnum_type = (sizeof(cs_gnum_t) == 8) ? CS_UINT64 : CS_UINT32;
597 cs_datatype_t real_type = (sizeof(cs_real_t) == 8) ? CS_DOUBLE : CS_FLOAT;
598
599 cs_lnum_t _n_cells = 0;
600 cs_lnum_t _n_faces = 0;
601
602 cs_gnum_t *_cell_num = NULL;
603 cs_gnum_t *_face_num = NULL;
604 cs_gnum_t *_face_gcells = NULL;
605 cs_gnum_t *_face_gvertices = NULL;
606
607 cs_lnum_t *_face_cells = NULL;
608 cs_lnum_t *_face_vertices_idx = NULL;
609 cs_lnum_t *_face_vertices = NULL;
610
611 cs_all_to_all_t *d = NULL;
612
613 /* Initialization */
614
615 MPI_Comm_size(comm, &n_ranks);
616
617 assert((sizeof(cs_lnum_t) == 4) || (sizeof(cs_lnum_t) == 8));
618
619 _n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
620
621 BFT_MALLOC(_cell_num, _n_cells, cs_gnum_t);
622
623 for (i = 0; i < _n_cells; i++)
624 _cell_num[i] = mb->cell_bi.gnum_range[0] + i;
625
626 /* Distribute faces */
627 /*------------------*/
628
629 d = cs_block_to_part_create_by_adj_s(comm,
630 mb->face_bi,
631 mb->cell_bi,
632 2,
633 mb->face_cells,
634 NULL,
635 NULL,
636 &_n_faces,
637 &_face_num);
638
639 BFT_MALLOC(_face_gcells, _n_faces*2, cs_gnum_t);
640
641 /* Face -> cell connectivity */
642
643 cs_all_to_all_copy_array(d,
644 gnum_type,
645 2,
646 true, /* reverse */
647 mb->face_cells,
648 _face_gcells);
649
650 /* Now convert face -> cell connectivity to local cell numbers */
651
652 BFT_MALLOC(_face_cells, _n_faces*2, cs_lnum_t);
653
654 cs_block_to_part_global_to_local(_n_faces*2,
655 1,
656 _n_cells,
657 true,
658 _cell_num,
659 _face_gcells,
660 _face_cells);
661
662 BFT_FREE(_cell_num);
663 BFT_FREE(_face_gcells);
664
665 /* Face connectivity */
666
667 BFT_MALLOC(_face_vertices_idx, _n_faces + 1, cs_lnum_t);
668
669 cs_all_to_all_copy_index(d,
670 true, /* reverse */
671 mb->face_vertices_idx,
672 _face_vertices_idx);
673
674 BFT_MALLOC(_face_gvertices, _face_vertices_idx[_n_faces], cs_gnum_t);
675
676 cs_all_to_all_copy_indexed(d,
677 gnum_type,
678 true, /* reverse */
679 mb->face_vertices_idx,
680 mb->face_vertices,
681 _face_vertices_idx,
682 _face_gvertices);
683
684 cs_all_to_all_destroy(&d);
685
686 /* Vertices */
687
688 size_t _n_vertices = 0;
689 cs_gnum_t *_vtx_num = NULL;
690
691 cs_order_single_gnum(_face_vertices_idx[_n_faces],
692 1, /* base */
693 _face_gvertices,
694 &_n_vertices,
695 &_vtx_num);
696
697 cs_all_to_all_t *dv
698 = cs_all_to_all_create_from_block(_n_vertices,
699 CS_ALL_TO_ALL_USE_DEST_ID,
700 _vtx_num,
701 mb->vertex_bi,
702 comm);
703
704 cs_real_t *_vtx_coord
705 = cs_all_to_all_copy_array(dv,
706 real_type,
707 3,
708 true, /* reverse */
709 mb->vertex_coords,
710 NULL);
711
712 cs_all_to_all_destroy(&dv);
713
714 /* Now convert face -> vertex connectivity to local vertex numbers */
715
716 BFT_MALLOC(_face_vertices, _face_vertices_idx[_n_faces], cs_lnum_t);
717
718 cs_block_to_part_global_to_local(_face_vertices_idx[_n_faces],
719 1,
720 _n_vertices,
721 true,
722 _vtx_num,
723 _face_gvertices,
724 _face_vertices);
725
726 BFT_FREE(_face_gvertices);
727
728 _cell_center_g(_n_cells,
729 _n_faces,
730 _face_cells,
731 _face_vertices_idx,
732 _face_vertices,
733 _vtx_coord,
734 cell_center);
735
736 BFT_FREE(_vtx_coord);
737 BFT_FREE(_vtx_num);
738
739 BFT_FREE(_face_cells);
740
741 BFT_FREE(_face_vertices_idx);
742 BFT_FREE(_face_vertices);
743
744 BFT_FREE(_face_num);
745 }
746
747 #endif /* defined(HAVE_MPI) */
748
749 /*----------------------------------------------------------------------------
750 * Compute cell centers using block data in serial mode.
751 *
752 * parameters:
753 * mb <-- pointer to mesh builder helper structure
754 * cell_center --> cell centers array
755 *----------------------------------------------------------------------------*/
756
757 static void
_precompute_cell_center_l(const cs_mesh_builder_t * mb,cs_coord_t cell_center[])758 _precompute_cell_center_l(const cs_mesh_builder_t *mb,
759 cs_coord_t cell_center[])
760 {
761 cs_lnum_t i, j;
762 cs_lnum_t vtx_id, face_id, start_id, end_id;
763 cs_lnum_t n_face_vertices;
764 cs_coord_t ref_normal[3], vtx_cog[3], face_center[3];
765
766 cs_lnum_t n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
767 cs_lnum_t n_faces = mb->face_bi.gnum_range[1] - mb->face_bi.gnum_range[0];
768
769 const cs_gnum_t *face_cells = mb->face_cells;
770 const cs_lnum_t *face_vtx_idx = mb->face_vertices_idx;
771 const cs_gnum_t *face_vtx = mb->face_vertices;
772 const cs_real_t *vtx_coord = mb->vertex_coords;
773
774 cs_lnum_t n_max_face_vertices = 0;
775
776 _vtx_coords_t *face_vtx_coord = NULL;
777 cs_coord_t *weight = NULL;
778
779 const double surf_epsilon = 1e-24;
780
781 assert(face_vtx_idx[0] == 0);
782
783 BFT_MALLOC(weight, n_cells, cs_coord_t);
784
785 for (i = 0; i < n_cells; i++) {
786 weight[i] = 0.0;
787 for (j = 0; j < 3; j++)
788 cell_center[i*3 + j] = 0.0;
789 }
790
791 /* Counting and allocation */
792
793 n_max_face_vertices = 0;
794
795 for (face_id = 0; face_id < n_faces; face_id++) {
796 n_face_vertices = face_vtx_idx[face_id + 1] - face_vtx_idx[face_id];
797 if (n_max_face_vertices <= n_face_vertices)
798 n_max_face_vertices = n_face_vertices;
799 }
800
801 BFT_MALLOC(face_vtx_coord, n_max_face_vertices, _vtx_coords_t);
802
803 /* Loop on each face */
804
805 for (face_id = 0; face_id < n_faces; face_id++) {
806
807 /* Initialization */
808
809 cs_lnum_t tri_id;
810
811 cs_lnum_t cell_id_0 = face_cells[face_id*2] -1;
812 cs_lnum_t cell_id_1 = face_cells[face_id*2 + 1] -1;
813 cs_coord_t unweighted_center[3] = {0.0, 0.0, 0.0};
814 cs_coord_t face_surface = 0.0;
815
816 n_face_vertices = 0;
817
818 start_id = face_vtx_idx[face_id];
819 end_id = face_vtx_idx[face_id + 1];
820
821 /* Define the polygon (P) according to the vertices (Pi) of the face */
822
823 for (vtx_id = start_id; vtx_id < end_id; vtx_id++) {
824
825 cs_lnum_t shift = 3 * (face_vtx[vtx_id] - 1);
826 for (i = 0; i < 3; i++)
827 face_vtx_coord[n_face_vertices][i] = vtx_coord[shift + i];
828 n_face_vertices++;
829
830 }
831
832 /* Compute the barycenter of the face vertices */
833
834 for (i = 0; i < 3; i++) {
835 vtx_cog[i] = 0.0;
836 for (vtx_id = 0; vtx_id < n_face_vertices; vtx_id++)
837 vtx_cog[i] += face_vtx_coord[vtx_id][i];
838 vtx_cog[i] /= n_face_vertices;
839 }
840
841 /* Loop on the triangles of the face (defined by an edge of the face
842 and its barycenter) */
843
844 for (i = 0; i < 3; i++) {
845 ref_normal[i] = 0.;
846 face_center[i] = 0.0;
847 }
848
849 for (tri_id = 0 ; tri_id < n_face_vertices ; tri_id++) {
850
851 cs_coord_t tri_surface;
852 cs_coord_t vect1[3], vect2[3], tri_normal[3], tri_center[3];
853
854 cs_lnum_t id0 = tri_id;
855 cs_lnum_t id1 = (tri_id + 1)%n_face_vertices;
856
857 /* Normal for each triangle */
858
859 for (i = 0; i < 3; i++) {
860 vect1[i] = face_vtx_coord[id0][i] - vtx_cog[i];
861 vect2[i] = face_vtx_coord[id1][i] - vtx_cog[i];
862 }
863
864 tri_normal[0] = vect1[1] * vect2[2] - vect2[1] * vect1[2];
865 tri_normal[1] = vect2[0] * vect1[2] - vect1[0] * vect2[2];
866 tri_normal[2] = vect1[0] * vect2[1] - vect2[0] * vect1[1];
867
868 if (tri_id == 0) {
869 for (i = 0; i < 3; i++)
870 ref_normal[i] = tri_normal[i];
871 }
872
873 /* Center of gravity for a triangle */
874
875 for (i = 0; i < 3; i++) {
876 tri_center[i] = ( vtx_cog[i]
877 + face_vtx_coord[id0][i]
878 + face_vtx_coord[id1][i]) / 3.0;
879 }
880
881 tri_surface = sqrt( tri_normal[0]*tri_normal[0]
882 + tri_normal[1]*tri_normal[1]
883 + tri_normal[2]*tri_normal[2]) * 0.5;
884
885 if (( tri_normal[0]*ref_normal[0]
886 + tri_normal[1]*ref_normal[1]
887 + tri_normal[2]*ref_normal[2]) < 0.0)
888 tri_surface *= -1.0;
889
890 /* Now compute contribution to face center and surface */
891
892 face_surface += tri_surface;
893
894 for (i = 0; i < 3; i++) {
895 face_center[i] += tri_surface * tri_center[i];
896 unweighted_center[i] = tri_center[i];
897 }
898
899 } /* End of loop on triangles of the face */
900
901 if (face_surface > surf_epsilon) {
902 for (i = 0; i < 3; i++)
903 face_center[i] /= face_surface;
904 }
905 else {
906 face_surface = surf_epsilon;
907 for (i = 0; i < 3; i++)
908 face_center[i] = unweighted_center[i] * face_surface / n_face_vertices;
909 }
910
911 /* Now contribute to cell centers */
912
913 assert(cell_id_0 > -2 && cell_id_1 > -2);
914
915 if (cell_id_0 > -1) {
916 for (i = 0; i < 3; i++)
917 cell_center[cell_id_0*3 + i] += face_center[i]*face_surface;
918 weight[cell_id_0] += face_surface;
919 }
920
921 if (cell_id_1 > -1) {
922 for (i = 0; i < 3; i++)
923 cell_center[cell_id_1*3 + i] += face_center[i]*face_surface;
924 weight[cell_id_1] += face_surface;
925 }
926
927 } /* End of loop on faces */
928
929 BFT_FREE(face_vtx_coord);
930
931 for (i = 0; i < n_cells; i++) {
932 for (j = 0; j < 3; j++)
933 cell_center[i*3 + j] /= weight[i];
934 }
935
936 BFT_FREE(weight);
937 }
938
939 /*----------------------------------------------------------------------------
940 * Define cell ranks using a space-filling curve.
941 *
942 * parameters:
943 * n_g_cells <-- global number of cells
944 * n_ranks <-- number of ranks in partition
945 * mb <-- pointer to mesh builder helper structure
946 * sfc_type <-- type of space-filling curve
947 * cell_rank --> cell rank (1 to n numbering)
948 * comm <-- associated MPI communicator
949 *----------------------------------------------------------------------------*/
950
951 #if defined(HAVE_MPI)
952
953 static void
_cell_rank_by_sfc(cs_gnum_t n_g_cells,int n_ranks,const cs_mesh_builder_t * mb,fvm_io_num_sfc_t sfc_type,int cell_rank[],MPI_Comm comm)954 _cell_rank_by_sfc(cs_gnum_t n_g_cells,
955 int n_ranks,
956 const cs_mesh_builder_t *mb,
957 fvm_io_num_sfc_t sfc_type,
958 int cell_rank[],
959 MPI_Comm comm)
960
961 #else
962
963 static void
964 _cell_rank_by_sfc(cs_gnum_t n_g_cells,
965 int n_ranks,
966 const cs_mesh_builder_t *mb,
967 fvm_io_num_sfc_t sfc_type,
968 int cell_rank[])
969
970 #endif
971 {
972 cs_lnum_t i;
973 cs_timer_t start_time, end_time;
974 cs_timer_counter_t dt;
975
976 cs_lnum_t n_cells = 0, block_size = 0;
977
978 cs_coord_t *cell_center = NULL;
979 fvm_io_num_t *cell_io_num = NULL;
980 const cs_gnum_t *cell_num = NULL;
981
982 bft_printf(_("\n Partitioning by space-filling curve: %s.\n"),
983 _(fvm_io_num_sfc_type_name[sfc_type]));
984
985 start_time = cs_timer_time();
986
987 n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
988 block_size = mb->cell_bi.block_size;
989
990 BFT_MALLOC(cell_center, n_cells*3, cs_coord_t);
991
992 #if defined(HAVE_MPI)
993 if (n_ranks > 1)
994 _precompute_cell_center_g(mb, cell_center, comm);
995 #endif
996 if (n_ranks == 1)
997 _precompute_cell_center_l(mb, cell_center);
998
999 end_time = cs_timer_time();
1000 dt = cs_timer_diff(&start_time, &end_time);
1001 start_time = end_time;
1002
1003 cs_log_printf(CS_LOG_PERFORMANCE,
1004 _(" precompute cell centers: %.3g s\n"),
1005 (double)(dt.nsec)/1.e9);
1006
1007 cell_io_num = fvm_io_num_create_from_sfc(cell_center,
1008 3,
1009 n_cells,
1010 sfc_type);
1011
1012 BFT_FREE(cell_center);
1013
1014 cell_num = fvm_io_num_get_global_num(cell_io_num);
1015
1016 block_size = n_g_cells / n_ranks;
1017 if (n_g_cells % n_ranks)
1018 block_size += 1;
1019
1020 /* Determine rank based on global numbering with SFC ordering; */
1021
1022 if (_part_uniform_sfc_block_size == false) {
1023
1024 cs_gnum_t cells_per_rank = n_g_cells / n_ranks;
1025 cs_lnum_t rmdr = n_g_cells - cells_per_rank * (cs_gnum_t)n_ranks;
1026
1027 if (rmdr == 0) {
1028 for (i = 0; i < n_cells; i++)
1029 cell_rank[i] = (cell_num[i] - 1) / cells_per_rank;
1030 }
1031 else {
1032 cs_gnum_t n_ranks_rmdr = n_ranks - rmdr;
1033 cs_gnum_t n_ranks_cells_per_rank = n_ranks_rmdr * cells_per_rank;
1034 for (i = 0; i < n_cells; i++) {
1035 if ((cell_num[i] - 1) < n_ranks_cells_per_rank)
1036 cell_rank[i] = (cell_num[i] - 1) / cells_per_rank;
1037 else
1038 cell_rank[i] = (cell_num[i] + n_ranks_rmdr - 1) / (cells_per_rank + 1);
1039 }
1040 }
1041
1042 }
1043
1044 else {
1045
1046 /* Plan for case where we would need a fixed block size,
1047 for example, using an external linear solver assuming this.
1048 This may not work at high process counts, where the last
1049 ranks will have no data (a solution to this would be
1050 to build a slightly smaller MPI communicator). */
1051
1052 for (i = 0; i < n_cells; i++) {
1053 cell_rank[i] = ((cell_num[i] - 1) / block_size);
1054 assert(cell_rank[i] > -1 && cell_rank[i] < n_ranks);
1055 }
1056
1057 }
1058
1059 cell_io_num = fvm_io_num_destroy(cell_io_num);
1060
1061 end_time = cs_timer_time();
1062 dt = cs_timer_diff(&start_time, &end_time);
1063
1064 if (sfc_type < FVM_IO_NUM_SFC_HILBERT_BOX)
1065 cs_log_printf(CS_LOG_PERFORMANCE,
1066 _(" Morton (Z) curve: %.3g s\n"),
1067 (double)(dt.nsec)/1.e9);
1068 else
1069 cs_log_printf(CS_LOG_PERFORMANCE,
1070 _(" Peano-Hilbert curve: %.3g s\n"),
1071 (double)(dt.nsec)/1.e9);
1072 }
1073
1074 #if defined(HAVE_MPI)
1075
1076 /*----------------------------------------------------------------------------
1077 * Update global face -> cells adjacency for periodicity in parallel mode.
1078 *
1079 * If face Fi is periodic with face Fj, and face Fi is adjacent to cell Ci,
1080 * while face Fj is adjacent to face Cj, we add Cj to Fi's adjacent cells,
1081 * and Ci to Fj's adjacent cells, just as if Fi and Fj were regular interior
1082 * faces (this ignores the geometric transformation, but is sufficent to
1083 * build the cells -> cells graph for domain partitioning).
1084 *
1085 * This function should be called when faces are distributed by blocks,
1086 * that is prior to redistribution based on face -> cells adjacency.
1087 *
1088 * arguments:
1089 * bi <-- block size and range info for faces
1090 * g_face_cells <-> global face->cells adjacency to update
1091 * n_periodic_couples <-- number of periodic couples
1092 * periodic_couples <-- array indicating periodic couples (interlaced,
1093 * using global numberings)
1094 * comm <-- associated MPI communicator
1095 *----------------------------------------------------------------------------*/
1096
1097 static void
_add_perio_to_face_cells_g(cs_block_dist_info_t bi,cs_gnum_t g_face_cells[],cs_lnum_t n_periodic_couples,const cs_gnum_t periodic_couples[],MPI_Comm comm)1098 _add_perio_to_face_cells_g(cs_block_dist_info_t bi,
1099 cs_gnum_t g_face_cells[],
1100 cs_lnum_t n_periodic_couples,
1101 const cs_gnum_t periodic_couples[],
1102 MPI_Comm comm)
1103 {
1104 /* Initialization */
1105
1106 int flags = 0;
1107
1108 const cs_lnum_t n_send = n_periodic_couples*2;
1109
1110 cs_all_to_all_t *d = cs_all_to_all_create_from_block(n_send,
1111 flags,
1112 periodic_couples,
1113 bi,
1114 comm);
1115
1116 /* Distribute to blocks */
1117
1118 cs_gnum_t *b_data = cs_all_to_all_copy_array(d,
1119 CS_GNUM_TYPE,
1120 1,
1121 false, /* reverse */
1122 periodic_couples,
1123 NULL);
1124
1125 cs_lnum_t n_b = cs_all_to_all_n_elts_dest(d);
1126
1127 /* Receive buffer contains global cell face whose cell adjacency
1128 is defined on the local rank, and we replace the received value
1129 by the adjacent cell number, for return exchange */
1130
1131 for (cs_lnum_t i = 0; i < n_b; i++) {
1132 cs_gnum_t g_face_id = b_data[i] - bi. gnum_range[0];
1133 cs_gnum_t c_num_0 = g_face_cells[g_face_id*2];
1134 const cs_gnum_t c_num_1 = g_face_cells[g_face_id*2 + 1];
1135 assert(c_num_0 == 0 || c_num_1 == 0);
1136 /* assign c_num_0 or c_num_1 depending on which is nonzero
1137 (or 0 if both are 0, which should not happen) */
1138 b_data[i] = c_num_0 + c_num_1;
1139 }
1140
1141 cs_gnum_t *send_adj;
1142 BFT_MALLOC(send_adj, n_send*2, cs_gnum_t);
1143
1144 cs_gnum_t *r_data = cs_all_to_all_copy_array(d,
1145 CS_GNUM_TYPE,
1146 1,
1147 true, /* reverse */
1148 b_data,
1149 NULL);
1150
1151 BFT_FREE(b_data);
1152
1153 /* Now r_data contains the global cell number matching a given face;
1154 Send global face number and cell number adjacent with its
1155 periodic face to rank defining the adjacency for this face */
1156
1157 for (cs_lnum_t i = 0; i < n_send; i++) {
1158
1159 cs_lnum_t k = (i + 1) % 2; /* 1 for first value, 0 for
1160 second (permutation) */
1161
1162 send_adj[i*2] = periodic_couples[i];
1163 send_adj[i*2 + 1] = r_data[i + k];
1164
1165 }
1166
1167 BFT_FREE(r_data);
1168
1169 b_data = cs_all_to_all_copy_array(d,
1170 CS_GNUM_TYPE,
1171 2,
1172 false, /* reverse */
1173 send_adj,
1174 NULL);
1175
1176 BFT_FREE(send_adj);
1177
1178 /* Update face -> cell connectivity */
1179
1180 for (cs_lnum_t i = 0; i < n_b; i++) {
1181 const cs_gnum_t g_face_id = b_data[2*i] - bi. gnum_range[0];
1182 const cs_gnum_t g_cell_num = b_data[2*i + 1];
1183 if (g_face_cells[g_face_id*2] == 0)
1184 g_face_cells[g_face_id*2] = g_cell_num;
1185 else if (g_face_cells[g_face_id*2 + 1] == 0)
1186 g_face_cells[g_face_id*2 + 1] = g_cell_num;
1187 else if (g_face_cells[g_face_id*2 + 1] != g_cell_num)
1188 bft_error(__FILE__, __LINE__, 0,
1189 _("Inconsistency adding periocicity info for partitioning.\n"
1190 "Face %llu: adjacent to cells %llu and %llu,\n"
1191 "trying to add %llu instead of %llu.\n\n"
1192 "Try ignoring periodicity for partitioning."),
1193 (unsigned long long)b_data[2*i],
1194 (unsigned long long)g_face_cells[g_face_id*2],
1195 (unsigned long long)g_face_cells[g_face_id*2+1],
1196 (unsigned long long)g_cell_num,
1197 (unsigned long long)g_face_cells[g_face_id*2+1]);
1198 }
1199
1200 BFT_FREE(b_data);
1201
1202 cs_all_to_all_destroy(&d);
1203 }
1204
1205 #endif /* defined(HAVE_MPI) */
1206
1207 /*----------------------------------------------------------------------------
1208 * Update global face -> cells adjacency for periodicity in local mode.
1209 *
1210 * If face Fi is periodic with face Fj, and face Fi is adjacent to cell Ci,
1211 * while face Fj is adjacent to face Cj, we add Cj to Fi's adjacent cells,
1212 * and Ci to Fj's adjacent cells, just as if Fi and Fj were regular interior
1213 * faces (this ignores the geometric transformation, but is suffient to
1214 * build the cells -> cells graph for domain partitioning).
1215 *
1216 * This function should be called when faces are distributed by blocks,
1217 * that is prior to redistribution based on face -> cells adjacency.
1218 *
1219 * arguments:
1220 * bi <-- block size and range info for faces
1221 * g_face_cells <-> global face->cells adjacency to update
1222 * n_periodic_couples <-- number of periodic couples
1223 * periodic_couples <-- array indicating periodic couples (interlaced,
1224 * using global numberings)
1225 *
1226 * returns:
1227 * initialized partition to block distributor
1228 *----------------------------------------------------------------------------*/
1229
1230 static void
_add_perio_to_face_cells_l(cs_gnum_t g_face_cells[],cs_lnum_t n_periodic_couples,const cs_gnum_t periodic_couples[])1231 _add_perio_to_face_cells_l(cs_gnum_t g_face_cells[],
1232 cs_lnum_t n_periodic_couples,
1233 const cs_gnum_t periodic_couples[])
1234 {
1235 cs_lnum_t j;
1236
1237 /* Update face -> cell connectivity */
1238
1239 for (j = 0; j < n_periodic_couples; j++) {
1240
1241 cs_gnum_t f_id_0 = periodic_couples[j*2] - 1;
1242 cs_gnum_t f_id_1 = periodic_couples[j*2 + 1] - 1;
1243 cs_gnum_t c_num_00 = g_face_cells[f_id_0*2];
1244 cs_gnum_t c_num_01 = g_face_cells[f_id_0*2 + 1];
1245 cs_gnum_t c_num_10 = g_face_cells[f_id_1*2];
1246 cs_gnum_t c_num_11 = g_face_cells[f_id_1*2 + 1];
1247
1248 assert(c_num_00 == 0 || c_num_01 == 0);
1249 assert(c_num_10 == 0 || c_num_11 == 0);
1250
1251 /* assign c_num_i0 or c_num_i1 depending on which is nonzero
1252 (or 0 if both are 0, which should not happen) */
1253
1254 if (g_face_cells[f_id_0*2] == 0)
1255 g_face_cells[f_id_0*2] = c_num_10 + c_num_11;
1256 else
1257 g_face_cells[f_id_0*2 + 1] = c_num_00 + c_num_01;
1258
1259 if (g_face_cells[f_id_1*2] == 0)
1260 g_face_cells[f_id_1*2] = c_num_00 + c_num_01;
1261 else
1262 g_face_cells[f_id_1*2 + 1] = c_num_10 + c_num_11;
1263 }
1264 }
1265
1266 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS)
1267
1268 /*----------------------------------------------------------------------------
1269 * Build cell -> cell connectivity
1270 *
1271 * parameters:
1272 * n_cells <-- number of cells in mesh
1273 * n_faces <-- number of cells in mesh
1274 * start_cell <-- number of first cell for the curent rank
1275 * face_cells <-- face->cells connectivity
1276 * cell_idx --> cell->cells index
1277 * cell_neighbors --> cell->cells connectivity
1278 *----------------------------------------------------------------------------*/
1279
1280 static void
_metis_cell_cells(size_t n_cells,size_t n_faces,cs_gnum_t start_cell,cs_gnum_t * face_cells,idx_t ** cell_idx,idx_t ** cell_neighbors)1281 _metis_cell_cells(size_t n_cells,
1282 size_t n_faces,
1283 cs_gnum_t start_cell,
1284 cs_gnum_t *face_cells,
1285 idx_t **cell_idx,
1286 idx_t **cell_neighbors)
1287 {
1288 size_t i, id_0, id_1;
1289
1290 cs_gnum_t c_num[2];
1291
1292 idx_t *n_neighbors;
1293 idx_t *_cell_idx;
1294 idx_t *_cell_neighbors;
1295
1296 /* Count and allocate arrays */
1297
1298 BFT_MALLOC(n_neighbors, n_cells, idx_t);
1299
1300 for (i = 0; i < n_cells; i++)
1301 n_neighbors[i] = 0;
1302
1303 for (i = 0; i < n_faces; i++) {
1304
1305 c_num[0] = face_cells[i*2];
1306 c_num[1] = face_cells[i*2 + 1];
1307
1308 if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1309 continue;
1310
1311 assert( c_num[0] - start_cell < n_cells
1312 || c_num[1] - start_cell < n_cells);
1313
1314 if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells)
1315 n_neighbors[c_num[0] - start_cell] += 1;
1316 if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells)
1317 n_neighbors[c_num[1] - start_cell] += 1;
1318 }
1319
1320 BFT_MALLOC(_cell_idx, n_cells + 1, idx_t);
1321
1322 _cell_idx[0] = 0;
1323
1324 for (i = 0; i < n_cells; i++)
1325 _cell_idx[i + 1] = _cell_idx[i] + n_neighbors[i];
1326
1327 BFT_MALLOC(_cell_neighbors, _cell_idx[n_cells], idx_t);
1328
1329 for (i = 0; i < n_cells; i++)
1330 n_neighbors[i] = 0;
1331
1332 for (i = 0; i < n_faces; i++) {
1333
1334 c_num[0] = face_cells[i*2];
1335 c_num[1] = face_cells[i*2 + 1];
1336
1337 if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1338 continue;
1339
1340 if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells) {
1341 id_0 = c_num[0] - start_cell;
1342 _cell_neighbors[_cell_idx[id_0] + n_neighbors[id_0]] = c_num[1] - 1;
1343 n_neighbors[id_0] += 1;
1344 }
1345
1346 if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells) {
1347 id_1 = c_num[1] - start_cell;
1348 _cell_neighbors[_cell_idx[id_1] + n_neighbors[id_1]] = c_num[0] - 1;
1349 n_neighbors[id_1] += 1;
1350 }
1351 }
1352
1353 BFT_FREE(n_neighbors);
1354
1355 *cell_idx = _cell_idx;
1356 *cell_neighbors = _cell_neighbors;
1357 }
1358
1359 /*----------------------------------------------------------------------------
1360 * Compute partition using METIS
1361 *
1362 * parameters:
1363 * n_cells <-- number of cells in mesh
1364 * n_parts <-- number of partitions
1365 * cell_cell_idx <-- cell->cells index
1366 * cell_cell <-- cell->cells connectivity
1367 * cell_part --> cell partition
1368 *----------------------------------------------------------------------------*/
1369
1370 static void
_part_metis(size_t n_cells,int n_parts,idx_t * cell_idx,idx_t * cell_neighbors,int * cell_part)1371 _part_metis(size_t n_cells,
1372 int n_parts,
1373 idx_t *cell_idx,
1374 idx_t *cell_neighbors,
1375 int *cell_part)
1376 {
1377 size_t i;
1378 double start_time, end_time;
1379
1380 idx_t _n_constraints = 1;
1381
1382 idx_t edgecut = 0; /* <-- Number of faces on partition */
1383
1384 idx_t _n_cells = n_cells;
1385 idx_t _n_parts = n_parts;
1386 idx_t *_cell_part = NULL;
1387
1388 start_time = cs_timer_wtime();
1389
1390 if (sizeof(idx_t) == sizeof(int))
1391 _cell_part = (idx_t *)cell_part;
1392
1393 else
1394 BFT_MALLOC(_cell_part, n_cells, idx_t);
1395
1396 if (n_parts < 8) {
1397
1398 bft_printf(_("\n"
1399 " Partitioning %llu cells to %d domains"
1400 " (METIS_PartGraphRecursive).\n"),
1401 (unsigned long long)n_cells, n_parts);
1402
1403 METIS_PartGraphRecursive(&_n_cells,
1404 &_n_constraints,
1405 cell_idx,
1406 cell_neighbors,
1407 NULL, /* vwgt: cell weights */
1408 NULL, /* vsize: size of the vertices */
1409 NULL, /* adjwgt: face weights */
1410 &_n_parts,
1411 NULL, /* tpwgts */
1412 NULL, /* ubvec: load imbalance tolerance */
1413 NULL, /* options */
1414 &edgecut,
1415 _cell_part);
1416
1417 }
1418
1419 else {
1420
1421 bft_printf(_("\n"
1422 " Partitioning %llu cells to %d domains\n"
1423 " (METIS_PartGraphKway).\n"),
1424 (unsigned long long)n_cells, n_parts);
1425
1426 METIS_PartGraphKway(&_n_cells,
1427 &_n_constraints,
1428 cell_idx,
1429 cell_neighbors,
1430 NULL, /* vwgt: cell weights */
1431 NULL, /* vsize: size of the vertices */
1432 NULL, /* adjwgt: face weights */
1433 &_n_parts,
1434 NULL, /* tpwgts */
1435 NULL, /* ubvec: load imbalance tolerance */
1436 NULL, /* options */
1437 &edgecut,
1438 _cell_part);
1439
1440 }
1441
1442 end_time = cs_timer_wtime();
1443
1444 bft_printf(_("\n"
1445 " Total number of faces on parallel boundaries: %llu\n"
1446 " wall-clock time: %f s\n\n"),
1447 (unsigned long long)edgecut,
1448 (double)(end_time - start_time));
1449
1450 cs_log_printf(CS_LOG_PERFORMANCE,
1451 " METIS_PartGraphKway: %.3g s\n",
1452 (double)(end_time - start_time));
1453
1454 if (sizeof(idx_t) != sizeof(int)) {
1455 for (i = 0; i < n_cells; i++)
1456 cell_part[i] = _cell_part[i];
1457 BFT_FREE(_cell_part);
1458 }
1459 }
1460
1461 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) */
1462
1463 #if defined(HAVE_PARMETIS)
1464
1465 /*----------------------------------------------------------------------------
1466 * Compute partition using ParMETIS
1467 *
1468 * parameters:
1469 * n_g_cells <-- global number of cells
1470 * cell_range <-- first and past-the-last cell numbers for this rank
1471 * n_parts <-- number of partitions
1472 * cell_cell_idx <-- cell->cells index
1473 * cell_cell <-- cell->cells connectivity
1474 * cell_part --> cell partition
1475 * comm <-- associated MPI communicator
1476 *----------------------------------------------------------------------------*/
1477
1478 static void
_part_parmetis(cs_gnum_t n_g_cells,cs_gnum_t cell_range[2],int n_parts,idx_t * cell_idx,idx_t * cell_neighbors,int * cell_part,MPI_Comm comm)1479 _part_parmetis(cs_gnum_t n_g_cells,
1480 cs_gnum_t cell_range[2],
1481 int n_parts,
1482 idx_t *cell_idx,
1483 idx_t *cell_neighbors,
1484 int *cell_part,
1485 MPI_Comm comm)
1486 {
1487 size_t i;
1488 double start_time, end_time;
1489
1490 unsigned long long edgecut = 0; /* <-- Number of faces on partition */
1491
1492 int n_ranks;
1493 size_t n_cells = cell_range[1] - cell_range[0];
1494 idx_t vtxstart = cell_range[0] - 1;
1495 idx_t vtxend = cell_range[1] - 1;
1496 idx_t *vtxdist = NULL;
1497 idx_t *_cell_part = NULL;
1498 MPI_Datatype mpi_idx_t = MPI_DATATYPE_NULL;
1499
1500 start_time = cs_timer_wtime();
1501
1502 MPI_Comm_size(comm, &n_ranks);
1503
1504 /* Adjust mpi_idx_t if necessary */
1505
1506 if (sizeof(idx_t) == sizeof(short))
1507 mpi_idx_t = MPI_SHORT;
1508 else if (sizeof(idx_t) == sizeof(int))
1509 mpi_idx_t = MPI_INT;
1510 else if (sizeof(idx_t) == sizeof(long))
1511 mpi_idx_t = MPI_LONG; /* standard ParMETIS 3.1.1 only short or int */
1512 else {
1513 assert(0); /* porting error, possible with future or modified ParMETIS */
1514 }
1515
1516 if (sizeof(idx_t) == sizeof(int))
1517 _cell_part = cell_part;
1518
1519 else
1520 BFT_MALLOC(_cell_part, n_cells, idx_t);
1521
1522 bft_printf(_("\n"
1523 " Partitioning %llu cells to %d domains on %d ranks\n"
1524 " (ParMETIS_V3_PartKway).\n"),
1525 (unsigned long long)n_g_cells, n_parts, n_ranks);
1526
1527 /* Build vtxdist */
1528
1529 BFT_MALLOC(vtxdist, n_ranks + 1, idx_t);
1530
1531 MPI_Allgather(&vtxstart, 1, mpi_idx_t,
1532 vtxdist, 1, mpi_idx_t, comm);
1533 MPI_Allreduce(&vtxend, vtxdist + n_ranks, 1, mpi_idx_t, MPI_MAX, comm);
1534
1535 /* Call ParMETIS partitioning */
1536
1537 {
1538 int j;
1539 idx_t _edgecut = 0;
1540 idx_t _n_parts = n_parts;
1541 idx_t ncon = 1; /* number of weights for each vertex */
1542 idx_t options[3] = {0, 1, 15}; /* By default if options[0] = 0 */
1543 idx_t numflag = 0; /* 0 to n-1 numbering (C type) */
1544 idx_t wgtflag = 0; /* No weighting for faces or cells */
1545
1546 real_t wgt = 1.0/n_parts;
1547 real_t ubvec[] = {1.5};
1548 real_t *tpwgts = NULL;
1549
1550 BFT_MALLOC(tpwgts, n_parts, real_t);
1551
1552 for (j = 0; j < n_parts; j++)
1553 tpwgts[j] = wgt;
1554
1555 int retval = ParMETIS_V3_PartKway
1556 (vtxdist,
1557 cell_idx,
1558 cell_neighbors,
1559 NULL, /* vwgt: cell weights */
1560 NULL, /* adjwgt: face weights */
1561 &wgtflag,
1562 &numflag,
1563 &ncon,
1564 &_n_parts,
1565 tpwgts,
1566 ubvec,
1567 options,
1568 &_edgecut,
1569 _cell_part,
1570 &comm);
1571
1572 BFT_FREE(tpwgts);
1573
1574 edgecut = _edgecut;
1575
1576 if (retval != METIS_OK)
1577 bft_error(__FILE__, __LINE__, 0,
1578 _("Error in ParMETIS partitioning.\n"));
1579
1580 }
1581
1582 end_time = cs_timer_wtime();
1583
1584 BFT_FREE(vtxdist);
1585
1586 if (edgecut > 0)
1587 bft_printf(_("\n"
1588 " Total number of faces on parallel boundaries: %llu\n"
1589 " wall-clock time: %f s\n\n"),
1590 (unsigned long long)edgecut,
1591 (double)(end_time - start_time));
1592 else
1593 bft_printf(_("\n"
1594 " wall-clock time: %f s\n\n"),
1595 (double)(end_time - start_time));
1596
1597 cs_log_printf(CS_LOG_PERFORMANCE,
1598 " ParMETIS_V3_PartKway: %.3g s\n",
1599 (double)(end_time - start_time));
1600
1601 if (sizeof(idx_t) != sizeof(int)) {
1602 for (i = 0; i < n_cells; i++)
1603 cell_part[i] = _cell_part[i];
1604 BFT_FREE(_cell_part);
1605 }
1606 }
1607
1608 #endif /* defined(HAVE_PARMETIS) */
1609
1610 #if defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH)
1611
1612 /*----------------------------------------------------------------------------
1613 * Sort an array "a" between its left bound "l" and its right bound "r"
1614 * thanks to a shell sort (Knuth algorithm).
1615 *
1616 * parameters:
1617 * l <-- left bound
1618 * r <-- right bound
1619 * a <-> array to sort
1620 *---------------------------------------------------------------------------*/
1621
1622 static void
_scotch_sort_shell(SCOTCH_Num l,SCOTCH_Num r,SCOTCH_Num a[])1623 _scotch_sort_shell(SCOTCH_Num l,
1624 SCOTCH_Num r,
1625 SCOTCH_Num a[])
1626 {
1627 int i, j, h;
1628
1629 /* Compute stride */
1630 for (h = 1; h <= (r-l)/9; h = 3*h+1) ;
1631
1632 /* Sort array */
1633 for (; h > 0; h /= 3) {
1634
1635 for (i = l+h; i < r; i++) {
1636
1637 SCOTCH_Num v = a[i];
1638
1639 j = i;
1640 while ((j >= l+h) && (v < a[j-h])) {
1641 a[j] = a[j-h];
1642 j -= h;
1643 }
1644 a[j] = v;
1645
1646 } /* Loop on array elements */
1647
1648 } /* End of loop on stride */
1649
1650 }
1651
1652 /*----------------------------------------------------------------------------
1653 * Print an error message and exit with an EXIT_FAILURE code.
1654 *
1655 * An implementation of this function is required by libScotch.
1656 *
1657 * parameters:
1658 * errstr <-- format string, as printf() and family.
1659 * ... <-- variable arguments based on format string.
1660 *----------------------------------------------------------------------------*/
1661
1662 void
SCOTCH_errorPrint(const char * errstr,...)1663 SCOTCH_errorPrint(const char *errstr,
1664 ...)
1665 {
1666 if (cs_glob_rank_id < 1) {
1667
1668 va_list errlist;
1669
1670 fflush(stdout);
1671
1672 fprintf(stderr, "\n");
1673
1674 fprintf(stderr, _("\nFatal error encountered by libScotch.\n\n"));
1675
1676 va_start(errlist, errstr);
1677 vfprintf(stderr, errstr, errlist);
1678 va_end(errlist);
1679 fprintf(stderr, "\n\n");
1680 fflush(stderr);
1681 }
1682
1683 assert(0);
1684
1685 #if defined(HAVE_MPI)
1686 {
1687 int mpi_flag;
1688 MPI_Initialized(&mpi_flag);
1689 if (mpi_flag != 0)
1690 MPI_Abort(cs_glob_mpi_comm, EXIT_FAILURE);
1691 }
1692 #endif /* HAVE_MPI */
1693
1694 exit(EXIT_FAILURE);
1695 }
1696
1697 /*----------------------------------------------------------------------------
1698 * Print a warning message.
1699 *
1700 * An implementation of this function is required by libScotch.
1701 *
1702 * parameters:
1703 * errstr <-- format string, as printf() and family.
1704 * ... <-- variable arguments based on format string.
1705 *----------------------------------------------------------------------------*/
1706
1707 void
SCOTCH_errorPrintW(const char * errstr,...)1708 SCOTCH_errorPrintW (const char *errstr,
1709 ...)
1710 {
1711 if (cs_glob_rank_id < 1) {
1712
1713 va_list errlist;
1714
1715 fflush(stdout);
1716
1717 fprintf(stdout, "\n");
1718
1719 fprintf(stdout, _("\nWarning (libScotch):\n\n"));
1720
1721 va_start(errlist, errstr);
1722 vfprintf(stdout, errstr, errlist);
1723 va_end(errlist);
1724 fprintf(stdout, "\n\n");
1725 fflush(stdout);
1726 }
1727 }
1728
1729 /*----------------------------------------------------------------------------
1730 * Build cell -> cell connectivity
1731 *
1732 * parameters:
1733 * n_cells <-- number of cells in mesh
1734 * n_faces <-- number of cells in mesh
1735 * start_cell <-- number of first cell for the curent rank
1736 * face_cells <-- face->cells connectivity
1737 * cell_idx --> cell->cells index
1738 * cell_neighbors --> cell->cells connectivity
1739 *----------------------------------------------------------------------------*/
1740
1741 static void
_scotch_cell_cells(size_t n_cells,size_t n_faces,cs_gnum_t start_cell,cs_gnum_t * face_cells,SCOTCH_Num ** cell_idx,SCOTCH_Num ** cell_neighbors)1742 _scotch_cell_cells(size_t n_cells,
1743 size_t n_faces,
1744 cs_gnum_t start_cell,
1745 cs_gnum_t *face_cells,
1746 SCOTCH_Num **cell_idx,
1747 SCOTCH_Num **cell_neighbors)
1748 {
1749 size_t i;
1750 cs_gnum_t id_0, id_1, c_num[2];
1751 SCOTCH_Num start_id, end_id, c_id;
1752
1753 SCOTCH_Num *n_neighbors;
1754 SCOTCH_Num *_cell_idx;
1755 SCOTCH_Num *_cell_neighbors;
1756
1757 /* Count and allocate arrays */
1758
1759 BFT_MALLOC(n_neighbors, n_cells, SCOTCH_Num);
1760
1761 for (i = 0; i < n_cells; i++)
1762 n_neighbors[i] = 0;
1763
1764 for (i = 0; i < n_faces; i++) {
1765
1766 c_num[0] = face_cells[i*2];
1767 c_num[1] = face_cells[i*2 + 1];
1768
1769 if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1770 continue;
1771
1772 assert( c_num[0] - start_cell < n_cells
1773 || c_num[1] - start_cell < n_cells);
1774
1775 if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells)
1776 n_neighbors[c_num[0] - start_cell] += 1;
1777 if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells)
1778 n_neighbors[c_num[1] - start_cell] += 1;
1779 }
1780
1781 BFT_MALLOC(_cell_idx, n_cells + 1, SCOTCH_Num);
1782
1783 _cell_idx[0] = 0;
1784
1785 for (i = 0; i < n_cells; i++)
1786 _cell_idx[i + 1] = _cell_idx[i] + n_neighbors[i];
1787
1788 BFT_MALLOC(_cell_neighbors, _cell_idx[n_cells], SCOTCH_Num);
1789
1790 for (i = 0; i < n_cells; i++)
1791 n_neighbors[i] = 0;
1792
1793 for (i = 0; i < n_faces; i++) {
1794
1795 c_num[0] = face_cells[i*2];
1796 c_num[1] = face_cells[i*2 + 1];
1797
1798 if (c_num[0] == 0 || c_num[1] == 0 || c_num[0] == c_num[1])
1799 continue;
1800
1801 if (c_num[0] >= start_cell && c_num[0] - start_cell < n_cells) {
1802 id_0 = c_num[0] - start_cell;
1803 _cell_neighbors[_cell_idx[id_0] + n_neighbors[id_0]] = c_num[1] - 1;
1804 n_neighbors[id_0] += 1;
1805 }
1806
1807 if (c_num[1] >= start_cell && c_num[1] - start_cell < n_cells) {
1808 id_1 = c_num[1] - start_cell;
1809 _cell_neighbors[_cell_idx[id_1] + n_neighbors[id_1]] = c_num[0] - 1;
1810 n_neighbors[id_1] += 1;
1811 }
1812 }
1813
1814 BFT_FREE(n_neighbors);
1815
1816 /* Clean graph */
1817
1818 c_id = 0;
1819 start_id = _cell_idx[0]; /* also = 0 */
1820 end_id = 0;
1821
1822 if (_cell_neighbors != NULL) {
1823
1824 for (i = 0; i < n_cells; i++) {
1825
1826 SCOTCH_Num j, n_prev;
1827
1828 end_id = _cell_idx[i+1];
1829
1830 _scotch_sort_shell(start_id, end_id, _cell_neighbors);
1831
1832 n_prev = _cell_neighbors[start_id];
1833 _cell_neighbors[c_id] = n_prev;
1834 c_id += 1;
1835
1836 for (j = start_id + 1; j < end_id; j++) {
1837 if (_cell_neighbors[j] != n_prev) {
1838 n_prev = _cell_neighbors[j];
1839 _cell_neighbors[c_id] = n_prev;
1840 c_id += 1;
1841 }
1842 }
1843
1844 start_id = end_id;
1845 _cell_idx[i+1] = c_id;
1846
1847 }
1848
1849 if (c_id < end_id)
1850 BFT_REALLOC(_cell_neighbors, c_id, SCOTCH_Num);
1851
1852 }
1853
1854 /* Set return values */
1855
1856 *cell_idx = _cell_idx;
1857 *cell_neighbors = _cell_neighbors;
1858 }
1859
1860 /*----------------------------------------------------------------------------
1861 * Compute partition using SCOTCH
1862 *
1863 * parameters:
1864 * n_cells <-- number of cells in mesh
1865 * n_parts <-- number of partitions
1866 * cell_cell_idx <-- cell->cells index
1867 * cell_cell <-- cell->cells connectivity
1868 * cell_part --> cell partition
1869 *----------------------------------------------------------------------------*/
1870
1871 static void
_part_scotch(SCOTCH_Num n_cells,int n_parts,SCOTCH_Num * cell_idx,SCOTCH_Num * cell_neighbors,int * cell_part)1872 _part_scotch(SCOTCH_Num n_cells,
1873 int n_parts,
1874 SCOTCH_Num *cell_idx,
1875 SCOTCH_Num *cell_neighbors,
1876 int *cell_part)
1877 {
1878 SCOTCH_Num i;
1879 SCOTCH_Graph grafdat; /* Scotch graph object to interface with libScotch */
1880 SCOTCH_Strat stradat;
1881
1882 double start_time, end_time;
1883
1884 int retval = 0;
1885
1886 SCOTCH_Num edgecut = 0; /* <-- Number of faces on partition */
1887 SCOTCH_Num *_cell_part = NULL;
1888
1889 /* Initialization */
1890
1891 start_time = cs_timer_wtime();
1892
1893 if (sizeof(SCOTCH_Num) == sizeof(int))
1894 _cell_part = (SCOTCH_Num *)cell_part;
1895 else
1896 BFT_MALLOC(_cell_part, n_cells, SCOTCH_Num);
1897
1898 bft_printf(_("\n"
1899 " Partitioning %llu cells to %d domains\n"
1900 " (SCOTCH_graphPart).\n"),
1901 (unsigned long long)n_cells, n_parts);
1902
1903 /* Partition using libScotch */
1904
1905 SCOTCH_graphInit(&grafdat);
1906
1907 retval
1908 = SCOTCH_graphBuild(&grafdat,
1909 0, /* baseval; 0 to n -1 numbering */
1910 n_cells, /* vertnbr */
1911 cell_idx, /* verttab */
1912 NULL, /* vendtab: verttab + 1 or NULL */
1913 NULL, /* velotab: vertex weights */
1914 NULL, /* vlbltab; vertex labels */
1915 cell_idx[n_cells], /* edgenbr */
1916 cell_neighbors, /* edgetab */
1917 NULL); /* edlotab */
1918
1919 if (retval == 0) {
1920
1921 SCOTCH_stratInit(&stradat);
1922
1923 if (SCOTCH_graphCheck(&grafdat) == 0)
1924 retval = SCOTCH_graphPart(&grafdat, n_parts, &stradat, _cell_part);
1925
1926 SCOTCH_stratExit(&stradat);
1927 }
1928
1929 SCOTCH_graphExit(&grafdat);
1930
1931 /* Shift cell_part values to 1 to n numbering and free possible temporary */
1932
1933 if (sizeof(SCOTCH_Num) != sizeof(int)) {
1934 for (i = 0; i < n_cells; i++)
1935 cell_part[i] = _cell_part[i];
1936 BFT_FREE(_cell_part);
1937 }
1938
1939 /* Compute edge cut */
1940
1941 if (retval == 0) {
1942
1943 SCOTCH_Num cell_id, edgenum, commcut;
1944
1945 commcut = 0;
1946
1947 for (cell_id = 0; cell_id < n_cells; cell_id++) {
1948 SCOTCH_Num edgennd, partval;
1949 partval = cell_part[cell_id];
1950 for (edgenum = cell_idx[cell_id], edgennd = cell_idx[cell_id + 1];
1951 edgenum < edgennd;
1952 edgenum++) {
1953 if (cell_part[cell_neighbors[edgenum]] != partval)
1954 commcut++;
1955 }
1956 }
1957
1958 edgecut = commcut / 2;
1959 }
1960
1961 /* Finalization */
1962
1963 end_time = cs_timer_wtime();
1964
1965 bft_printf(_("\n"
1966 " Total number of faces on parallel boundaries: %llu\n"
1967 " wall-clock time: %f s\n\n"),
1968 (unsigned long long)edgecut,
1969 (double)(end_time - start_time));
1970
1971 cs_log_printf(CS_LOG_PERFORMANCE,
1972 " SCOTCH_graphPart: %.3g s\n",
1973 (double)(end_time - start_time));
1974 }
1975
1976 #endif /* defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH) */
1977
1978 #if defined(HAVE_PTSCOTCH)
1979
1980 /*----------------------------------------------------------------------------
1981 * Compute partition using PT-SCOTCH
1982 *
1983 * parameters:
1984 * n_g_cells <-- global number of cells
1985 * cell_range <-- first and past-the-last cell numbers for this rank
1986 * n_parts <-- number of partitions
1987 * cell_cell_idx <-- cell->cells index
1988 * cell_cell <-- cell->cells connectivity
1989 * cell_part --> cell partition
1990 * comm <-- associated MPI communicator
1991 *----------------------------------------------------------------------------*/
1992
1993 static void
_part_ptscotch(cs_gnum_t n_g_cells,cs_gnum_t cell_range[2],int n_parts,SCOTCH_Num * cell_idx,SCOTCH_Num * cell_neighbors,int * cell_part,MPI_Comm comm)1994 _part_ptscotch(cs_gnum_t n_g_cells,
1995 cs_gnum_t cell_range[2],
1996 int n_parts,
1997 SCOTCH_Num *cell_idx,
1998 SCOTCH_Num *cell_neighbors,
1999 int *cell_part,
2000 MPI_Comm comm)
2001 {
2002 int n_ranks;
2003 SCOTCH_Num i;
2004 SCOTCH_Dgraph grafdat; /* Scotch graph object to interface with libScotch */
2005 SCOTCH_Strat stradat;
2006
2007 double start_time, end_time;
2008
2009 int retval = 0;
2010
2011 SCOTCH_Num n_cells = cell_range[1] - cell_range[0];
2012 SCOTCH_Num *_cell_part = NULL;
2013
2014 /* Initialization */
2015
2016 start_time = cs_timer_wtime();
2017
2018 MPI_Comm_size(comm, &n_ranks);
2019
2020 if (sizeof(SCOTCH_Num) == sizeof(int))
2021 _cell_part = (SCOTCH_Num *)cell_part;
2022 else
2023 BFT_MALLOC(_cell_part, n_cells, SCOTCH_Num);
2024
2025 bft_printf(_("\n"
2026 " Partitioning %llu cells to %d domains on %d ranks\n"
2027 " (SCOTCH_dgraphPart).\n"),
2028 (unsigned long long)n_g_cells, n_parts, n_ranks);
2029
2030 /* Partition using libScotch */
2031
2032 retval = SCOTCH_dgraphInit(&grafdat, comm);
2033
2034 if (retval == 0) {
2035 retval = SCOTCH_dgraphBuild
2036 (&grafdat,
2037 0, /* baseval; 0 to n -1 numbering */
2038 n_cells, /* vertlocnbr */
2039 n_cells, /* vertlocmax (= vertlocnbr) */
2040 cell_idx, /* vertloctab */
2041 NULL, /* vendloctab: vertloctab + 1 or NULL */
2042 NULL, /* veloloctab: vertex weights */
2043 NULL, /* vlblloctab; vertex labels */
2044 cell_idx[n_cells], /* edgelocnbr */
2045 cell_idx[n_cells], /* edgelocsiz */
2046 cell_neighbors, /* edgeloctab */
2047 NULL, /* edgegstab */
2048 NULL); /* edloloctab */
2049 }
2050
2051 if (retval == 0) {
2052
2053 SCOTCH_stratInit(&stradat);
2054
2055 if (SCOTCH_dgraphCheck(&grafdat) == 0)
2056 retval = SCOTCH_dgraphPart(&grafdat, n_parts, &stradat, _cell_part);
2057
2058 SCOTCH_stratExit(&stradat);
2059 }
2060
2061 SCOTCH_dgraphExit(&grafdat);
2062
2063 /* Shift cell_part values to 1 to n numbering and free possible temporary */
2064
2065 if (sizeof(SCOTCH_Num) != sizeof(int)) {
2066 for (i = 0; i < n_cells; i++)
2067 cell_part[i] = _cell_part[i];
2068 BFT_FREE(_cell_part);
2069 }
2070
2071 /* Finalization */
2072
2073 end_time = cs_timer_wtime();
2074
2075 bft_printf(_("\n"
2076 " wall-clock time: %f s\n\n"),
2077 (double)(end_time - start_time));
2078
2079 cs_log_printf(CS_LOG_PERFORMANCE,
2080 " SCOTCH_dgraphPart: %.3g s\n",
2081 (double)(end_time - start_time));
2082 }
2083
2084 #endif /* defined(HAVE_PTSCOTCH) */
2085
2086 /*----------------------------------------------------------------------------
2087 * Prepare input from mesh builder for use by partitioner.
2088 *
2089 * parameters:
2090 * mesh <-- pointer to mesh structure
2091 * mb <-- pointer to mesh builder structure
2092 * rank_step <-- Step between active partitioning ranks
2093 * (1 in basic case, > 1 if we seek to partition on a
2094 * reduced number of ranks)
2095 * ignore_perio <-- ignore periodicity information if true
2096 * cell_range <-- first and past-the-last cell numbers for this rank
2097 * n_faces <-- number of local faces for current rank
2098 * g_face_cells <-> global face -> cells connectivity
2099 *----------------------------------------------------------------------------*/
2100
2101 static void
_prepare_input(const cs_mesh_t * mesh,const cs_mesh_builder_t * mb,int rank_step,bool ignore_perio,cs_gnum_t cell_range[2],cs_lnum_t * n_faces,cs_gnum_t ** g_face_cells)2102 _prepare_input(const cs_mesh_t *mesh,
2103 const cs_mesh_builder_t *mb,
2104 int rank_step,
2105 bool ignore_perio,
2106 cs_gnum_t cell_range[2],
2107 cs_lnum_t *n_faces,
2108 cs_gnum_t **g_face_cells)
2109 {
2110 int rank_id = cs_glob_rank_id;
2111 int n_ranks = cs_glob_n_ranks;
2112
2113 cs_gnum_t *_g_face_cells = NULL;
2114
2115 cs_block_dist_info_t cell_bi = cs_block_dist_compute_sizes(rank_id,
2116 n_ranks,
2117 rank_step,
2118 0,
2119 mesh->n_g_cells);
2120
2121 /* By default, the face -> cells connectivity is that of the mesh builder */
2122
2123 *g_face_cells = mb->face_cells;
2124
2125 /* In case of periodicity, update global face -> cells connectivity:
2126 if face Fi is periodic with face Fj, and face Fi is connected
2127 to cell Ci, while face Fj is connected to face Cj, we add Cj
2128 to the cells connected to Fi, and Ci to the cells connected to Fj,
2129 just as if Fi and Fj were regular interior faces (this ignores
2130 the geometric transformation, but is sufficient to build the
2131 cells -> cells graph for domain partitioning). */
2132
2133 if (ignore_perio == false && mb->n_perio > 0) {
2134
2135 int perio_id;
2136 cs_lnum_t n_per_face_couples = 0;
2137 cs_gnum_t n_g_per_face_couples = 0;
2138 cs_gnum_t _n_b_faces = 0;
2139 cs_gnum_t *_per_face_couples = NULL;
2140 const cs_gnum_t *per_face_couples = NULL;
2141
2142 if (mb->face_bi.gnum_range[1] > mb->face_bi.gnum_range[0])
2143 _n_b_faces = mb->face_bi.gnum_range[1] - mb->face_bi.gnum_range[0];
2144
2145 BFT_MALLOC(_g_face_cells, _n_b_faces*2, cs_gnum_t);
2146 *g_face_cells = _g_face_cells;
2147
2148 memcpy(_g_face_cells, mb->face_cells, sizeof(cs_gnum_t)*_n_b_faces*2);
2149
2150 if (mb->n_perio > 1) {
2151
2152 /* Assemble faces from multiple periodicities into one */
2153
2154 for (perio_id = 0; perio_id < mb->n_perio; perio_id++) {
2155 n_per_face_couples += mb->n_per_face_couples[perio_id];
2156 n_g_per_face_couples += mb->n_g_per_face_couples[perio_id];
2157 }
2158
2159 BFT_MALLOC(_per_face_couples, n_per_face_couples*2, cs_gnum_t);
2160 per_face_couples = _per_face_couples;
2161
2162 n_per_face_couples = 0;
2163
2164 for (perio_id = 0; perio_id < mb->n_perio; perio_id++) {
2165 cs_gnum_t *per_face_couples_p = _per_face_couples + 2*n_per_face_couples;
2166 memcpy(per_face_couples_p,
2167 mb->per_face_couples[perio_id],
2168 sizeof(cs_gnum_t)*mb->n_per_face_couples[perio_id]*2);
2169 n_per_face_couples += mb->n_per_face_couples[perio_id];
2170 }
2171
2172 }
2173 else { /* if mb->n_perio == 1 */
2174 n_per_face_couples = mb->n_per_face_couples[0];
2175 n_g_per_face_couples = mb->n_g_per_face_couples[0];
2176 per_face_couples = mb->per_face_couples[0];
2177 }
2178
2179 if (n_g_per_face_couples > 0) {
2180
2181 #if defined(HAVE_MPI)
2182 if (cs_glob_n_ranks > 1)
2183 _add_perio_to_face_cells_g(mb->face_bi,
2184 _g_face_cells,
2185 n_per_face_couples,
2186 per_face_couples,
2187 cs_glob_mpi_comm);
2188 #endif
2189
2190 if (n_ranks == 1)
2191 _add_perio_to_face_cells_l(_g_face_cells,
2192 n_per_face_couples,
2193 per_face_couples);
2194
2195 }
2196
2197 if (_per_face_couples != NULL)
2198 BFT_FREE(_per_face_couples);
2199 }
2200
2201 else if (mb->n_perio > 0)
2202 bft_printf(_("\n"
2203 " Ignoring periodicity for graph-based partitioning.\n"));
2204
2205 /* Distribute faces if necessary */
2206
2207 #if defined(HAVE_MPI)
2208
2209 if (cs_glob_n_ranks > 1) {
2210
2211 cs_datatype_t gnum_type
2212 = (sizeof(cs_gnum_t) == 8) ? CS_UINT64 : CS_UINT32;
2213 cs_gnum_t *g_face_cells_tmp = NULL;
2214
2215 cs_all_to_all_t
2216 *d = cs_block_to_part_create_by_adj_s(cs_glob_mpi_comm,
2217 mb->face_bi,
2218 cell_bi,
2219 2,
2220 *g_face_cells,
2221 NULL,
2222 NULL,
2223 n_faces,
2224 NULL);
2225
2226 BFT_MALLOC(g_face_cells_tmp, (*n_faces)*2, cs_gnum_t);
2227
2228 /* Face -> cell connectivity */
2229
2230 cs_all_to_all_copy_array(d,
2231 gnum_type,
2232 2,
2233 true, /* reverse */
2234 *g_face_cells,
2235 g_face_cells_tmp);
2236
2237 if (_g_face_cells != NULL) /* in case of periodicity */
2238 BFT_FREE(_g_face_cells);
2239
2240 *g_face_cells = g_face_cells_tmp;
2241
2242 cs_all_to_all_destroy(&d);
2243 }
2244
2245 #endif /* defined(HAVE_MPI) */
2246
2247 if (cs_glob_n_ranks == 1)
2248 *n_faces = mb->n_g_faces;
2249
2250 /* Prepare auxiliary return values */
2251
2252 cell_range[0] = cell_bi.gnum_range[0];
2253 cell_range[1] = cell_bi.gnum_range[1];
2254 }
2255
2256 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS) \
2257 || defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH)
2258
2259 /*----------------------------------------------------------------------------
2260 * Distribute partitioning info so as to match mesh builder block info.
2261 *
2262 * parameters:
2263 * mb <-- pointer to mesh builder structure
2264 * rank_step <-- Step between active partitioning ranks
2265 * (1 in basic case, > 1 if we seek to partition on a
2266 * reduced number of ranks)
2267 * cell_range <-- first and past-the-last cell numbers for this rank
2268 * cell_rank <-> pointer to pointer to cell rank
2269 *----------------------------------------------------------------------------*/
2270
2271 static void
_distribute_output(const cs_mesh_builder_t * mb,int rank_step,const cs_gnum_t cell_range[2],int ** cell_rank)2272 _distribute_output(const cs_mesh_builder_t *mb,
2273 int rank_step,
2274 const cs_gnum_t cell_range[2],
2275 int **cell_rank)
2276 {
2277 /* Distribute partitioning info if necessary */
2278
2279 #if defined(HAVE_MPI)
2280
2281 if (cs_glob_n_ranks > 1 && (mb->cell_bi.rank_step != rank_step)) {
2282
2283 cs_gnum_t i;
2284 cs_gnum_t n_b_cells = 0, n_p_cells = 0;
2285 cs_datatype_t int_type = (sizeof(int) == 8) ? CS_INT64 : CS_INT32;
2286
2287 cs_part_to_block_t *d = NULL;
2288 cs_gnum_t *global_cell_num = NULL;
2289
2290 int *_cell_rank = NULL;
2291
2292 if (mb->cell_bi.gnum_range[1] > mb->cell_bi.gnum_range[0])
2293 n_b_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
2294
2295 if (cell_range[1] > cell_range[0])
2296 n_p_cells = cell_range[1] - cell_range[0];
2297
2298 BFT_MALLOC(_cell_rank, n_b_cells, int);
2299 BFT_MALLOC(global_cell_num, n_p_cells, cs_gnum_t);
2300
2301 for (i = 0; i < n_p_cells; i++)
2302 global_cell_num[i] = cell_range[0] + i;
2303
2304 d = cs_part_to_block_create_by_gnum(cs_glob_mpi_comm,
2305 mb->cell_bi,
2306 n_p_cells,
2307 global_cell_num);
2308 cs_part_to_block_transfer_gnum(d, global_cell_num);
2309 global_cell_num = NULL;
2310
2311 /* Cell rank id */
2312
2313 cs_part_to_block_copy_array(d,
2314 int_type,
2315 1,
2316 *cell_rank,
2317 _cell_rank);
2318
2319 cs_part_to_block_destroy(&d);
2320
2321 BFT_FREE(*cell_rank);
2322 *cell_rank = _cell_rank;
2323 }
2324
2325 #endif /* defined(HAVE_MPI) */
2326 }
2327
2328 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) \
2329 || defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH) */
2330
2331 /*----------------------------------------------------------------------------
2332 * Write output file.
2333 *
2334 * parameters:
2335 * n_g_cells <-- global number of cells
2336 * cell_range <-- first and past-the-last cell numbers for this rank
2337 * n_ranks <-- number of ranks corresonding to output file
2338 * domain_rank <-- domain rank array (size: n_cells)
2339 *----------------------------------------------------------------------------*/
2340
2341 static void
_write_output(cs_gnum_t n_g_cells,cs_gnum_t cell_range[2],int n_ranks,const int domain_rank[])2342 _write_output(cs_gnum_t n_g_cells,
2343 cs_gnum_t cell_range[2],
2344 int n_ranks,
2345 const int domain_rank[])
2346 {
2347 size_t i;
2348 int n_ranks_size;
2349 cs_file_access_t method;
2350 char *filename = NULL;
2351 cs_io_t *fh = NULL;
2352 int *domain_num = NULL;
2353 cs_datatype_t datatype_gnum = CS_DATATYPE_NULL;
2354 cs_datatype_t datatype_int = CS_DATATYPE_NULL;
2355
2356 const char dir[] = "partition_output";
2357 const char magic_string[] = "Domain partitioning, R0";
2358
2359 if (sizeof(int) == 4)
2360 datatype_int = CS_INT32;
2361 else if (sizeof(int) == 8)
2362 datatype_int = CS_INT64;
2363 else {
2364 assert(0);
2365 }
2366
2367 if (sizeof(cs_gnum_t) == 4)
2368 datatype_gnum = CS_UINT32;
2369 else if (sizeof(cs_gnum_t) == 8)
2370 datatype_gnum = CS_UINT64;
2371 else {
2372 assert(0);
2373 }
2374
2375 if (cell_range[1] > cell_range[0]) {
2376 cs_gnum_t _n_cells = cell_range[1] - cell_range[0];
2377 BFT_MALLOC(domain_num, _n_cells, int);
2378 for (i = 0; i < _n_cells; i++)
2379 domain_num[i] = domain_rank[i] + 1;
2380 }
2381
2382 /* Create directory if required */
2383
2384 if (cs_glob_rank_id < 1) {
2385 if (cs_file_isdir(dir) != 1) {
2386 if (cs_file_mkdir_default(dir) != 0)
2387 bft_error(__FILE__, __LINE__, errno,
2388 _("The partitioning directory cannot be created"));
2389 }
2390 }
2391 #if defined(HAVE_MPI)
2392 if (cs_glob_n_ranks > 1)
2393 MPI_Barrier(cs_glob_mpi_comm);
2394 #endif
2395
2396 /* Open file */
2397
2398 for (i = n_ranks, n_ranks_size = 1;
2399 i >= 10;
2400 i /= 10, n_ranks_size += 1);
2401
2402 BFT_MALLOC(filename,
2403 strlen(dir) + strlen("domain_number_") + n_ranks_size + 2,
2404 char);
2405
2406 sprintf(filename,
2407 "%s%cdomain_number_%d",
2408 dir, _dir_separator, n_ranks);
2409
2410 #if defined(HAVE_MPI)
2411 {
2412 MPI_Info hints;
2413 MPI_Comm block_comm, comm;
2414 cs_file_get_default_access(CS_FILE_MODE_WRITE, &method, &hints);
2415 cs_file_get_default_comm(NULL, &block_comm, &comm);
2416 assert(comm == cs_glob_mpi_comm || comm == MPI_COMM_NULL);
2417 fh = cs_io_initialize(filename,
2418 magic_string,
2419 CS_IO_MODE_WRITE,
2420 method,
2421 CS_IO_ECHO_OPEN_CLOSE,
2422 hints,
2423 block_comm,
2424 comm);
2425 }
2426 #else
2427 {
2428 cs_file_get_default_access(CS_FILE_MODE_WRITE, &method);
2429 fh = cs_io_initialize(filename,
2430 magic_string,
2431 CS_IO_MODE_WRITE,
2432 method,
2433 CS_IO_ECHO_OPEN_CLOSE);
2434 }
2435 #endif
2436
2437 BFT_FREE(filename);
2438
2439 /* Write headers */
2440
2441 cs_io_write_global("n_cells",
2442 1,
2443 1,
2444 0,
2445 1,
2446 datatype_gnum,
2447 &n_g_cells,
2448 fh);
2449
2450 cs_io_write_global("n_ranks",
2451 1,
2452 1,
2453 0,
2454 1,
2455 datatype_int,
2456 &n_ranks,
2457 fh);
2458
2459 cs_io_write_block_buffer("cell:domain number",
2460 n_g_cells,
2461 cell_range[0],
2462 cell_range[1],
2463 1,
2464 0,
2465 1,
2466 datatype_int,
2467 domain_num,
2468 fh);
2469
2470 cs_io_finalize(&fh);
2471
2472 BFT_FREE(domain_num);
2473 }
2474
2475 /*----------------------------------------------------------------------------
2476 * Read cell rank if available
2477 *
2478 * parameters:
2479 * mesh <-- pointer to mesh structure
2480 * mb <-> pointer to mesh builder helper structure
2481 * echo <-- echo (verbosity) level
2482 *----------------------------------------------------------------------------*/
2483
2484 static void
_read_cell_rank(cs_mesh_t * mesh,cs_mesh_builder_t * mb,long echo)2485 _read_cell_rank(cs_mesh_t *mesh,
2486 cs_mesh_builder_t *mb,
2487 long echo)
2488 {
2489 char file_name[64]; /* more than enough for
2490 "partition_input/domain_number_<n_ranks>" */
2491 size_t i;
2492 cs_file_access_t method;
2493 cs_io_sec_header_t header;
2494
2495 cs_io_t *rank_pp_in = NULL;
2496 cs_lnum_t n_ranks = 0;
2497 cs_gnum_t n_elts = 0;
2498 cs_gnum_t n_g_cells = 0;
2499
2500 const char magic_string[] = "Domain partitioning, R0";
2501 const char *unexpected_msg = N_("Section of type <%s> on <%s>\n"
2502 "unexpected or of incorrect size");
2503
2504 if (n_ranks == 1)
2505 return;
2506
2507 #if (__STDC_VERSION__ < 199901L)
2508 sprintf(file_name,
2509 "partition_input%cdomain_number_%d",
2510 _dir_separator, cs_glob_n_ranks);
2511 #else
2512 snprintf(file_name, 64,
2513 "partition_input%cdomain_number_%d",
2514 _dir_separator, cs_glob_n_ranks);
2515 #endif
2516 file_name[63] = '\0'; /* Just in case; processor counts would need to be
2517 in the exa-range for this to be necessary. */
2518
2519 /* Test if file exists */
2520
2521 if (! cs_file_isreg(file_name)) {
2522 bft_printf(_(" No \"%s\" file available;\n"), file_name);
2523 return;
2524 }
2525
2526 /* Open file */
2527
2528 #if defined(HAVE_MPI)
2529 {
2530 MPI_Info hints;
2531 MPI_Comm block_comm, comm;
2532 cs_file_get_default_access(CS_FILE_MODE_WRITE, &method, &hints);
2533 cs_file_get_default_comm(NULL, &block_comm, &comm);
2534 assert(comm == cs_glob_mpi_comm || comm == MPI_COMM_NULL);
2535 rank_pp_in = cs_io_initialize(file_name,
2536 magic_string,
2537 CS_IO_MODE_READ,
2538 method,
2539 echo,
2540 hints,
2541 block_comm,
2542 comm);
2543 }
2544 #else
2545 {
2546 cs_file_get_default_access(CS_FILE_MODE_WRITE, &method);
2547 rank_pp_in = cs_io_initialize(file_name,
2548 magic_string,
2549 CS_IO_MODE_READ,
2550 method,
2551 echo);
2552 }
2553 #endif
2554
2555 if (echo > 0)
2556 bft_printf("\n");
2557
2558 /* Loop on read sections */
2559
2560 while (rank_pp_in != NULL) {
2561
2562 /* Receive headers */
2563
2564 cs_io_read_header(rank_pp_in, &header);
2565
2566 /* Treatment according to the header name */
2567
2568 if (strncmp(header.sec_name, "n_cells",
2569 CS_IO_NAME_LEN) == 0) {
2570
2571 if (header.n_vals != 1)
2572 bft_error(__FILE__, __LINE__, 0,
2573 _(unexpected_msg), header.sec_name,
2574 cs_io_get_name(rank_pp_in));
2575 else {
2576 cs_io_set_cs_gnum(&header, rank_pp_in);
2577 cs_io_read_global(&header, &n_g_cells, rank_pp_in);
2578 if (n_g_cells != mesh->n_g_cells)
2579 bft_error(__FILE__, __LINE__, 0,
2580 _("The number of cells reported by file\n"
2581 "\"%s\" (%llu)\n"
2582 "does not correspond to those of the mesh (%llu)."),
2583 cs_io_get_name(rank_pp_in),
2584 (unsigned long long)(n_g_cells),
2585 (unsigned long long)(mesh->n_g_cells));
2586 }
2587
2588 }
2589 else if (strncmp(header.sec_name, "n_ranks",
2590 CS_IO_NAME_LEN) == 0) {
2591
2592 if (header.n_vals != 1)
2593 bft_error(__FILE__, __LINE__, 0,
2594 _(unexpected_msg), header.sec_name,
2595 cs_io_get_name(rank_pp_in));
2596 else {
2597 cs_io_set_cs_lnum(&header, rank_pp_in);
2598 cs_io_read_global(&header, &n_ranks, rank_pp_in);
2599 if (n_ranks != cs_glob_n_ranks)
2600 bft_error(__FILE__, __LINE__, 0,
2601 _("The number of ranks reported by file\n"
2602 "\"%s\" (%d) does not\n"
2603 "correspond to the current number of ranks (%d)."),
2604 cs_io_get_name(rank_pp_in), (int)n_ranks,
2605 (int)cs_glob_n_ranks);
2606 }
2607
2608 }
2609 else if (strncmp(header.sec_name, "cell:domain number",
2610 CS_IO_NAME_LEN) == 0) {
2611
2612 n_elts = mesh->n_g_cells;
2613 if (header.n_vals != (cs_file_off_t)n_elts)
2614 bft_error(__FILE__, __LINE__, 0,
2615 _(unexpected_msg), header.sec_name,
2616 cs_io_get_name(rank_pp_in));
2617 else {
2618 mb->have_cell_rank = true;
2619 cs_io_set_cs_lnum(&header, rank_pp_in);
2620 if (mb->cell_bi.gnum_range[0] > 0)
2621 n_elts = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
2622 BFT_MALLOC(mb->cell_rank, n_elts, int);
2623 cs_io_read_block(&header,
2624 mb->cell_bi.gnum_range[0],
2625 mb->cell_bi.gnum_range[1],
2626 mb->cell_rank, rank_pp_in);
2627 for (i = 0; i < n_elts; i++) /* Convert 1 to n to 0 to n-1 */
2628 mb->cell_rank[i] -= 1;
2629 }
2630 cs_io_finalize(&rank_pp_in);
2631 rank_pp_in = NULL;
2632
2633 }
2634
2635 else
2636 bft_error(__FILE__, __LINE__, 0,
2637 _("Section of type <%s> on <%s> is unexpected."),
2638 header.sec_name, cs_io_get_name(rank_pp_in));
2639 }
2640
2641 if (rank_pp_in != NULL)
2642 cs_io_finalize(&rank_pp_in);
2643 }
2644
2645 /*----------------------------------------------------------------------------*
2646 * Define a naive partitioning by blocks.
2647 *
2648 * parameters:
2649 * mesh <-- pointer to mesh structure
2650 * mb <-- pointer to mesh builder structure
2651 * cell_part --> assigned cell partition
2652 *----------------------------------------------------------------------------*/
2653
2654 static void
_block_partititioning(const cs_mesh_t * mesh,const cs_mesh_builder_t * mb,int * cell_part)2655 _block_partititioning(const cs_mesh_t *mesh,
2656 const cs_mesh_builder_t *mb,
2657 int *cell_part)
2658 {
2659 cs_lnum_t i;
2660
2661 int n_ranks = cs_glob_n_ranks;
2662 cs_lnum_t block_size = mesh->n_g_cells / n_ranks;
2663 cs_lnum_t n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
2664
2665 if (mesh->n_g_cells % n_ranks)
2666 block_size += 1;
2667
2668 /* Use variable block size if necessary so that all ranks have some
2669 cells assigned if possible */
2670
2671 if ((cs_lnum_t)((mesh->n_g_cells - 1) % block_size) < n_ranks - 1) {
2672
2673 cs_gnum_t cells_per_rank = mesh->n_g_cells / n_ranks;
2674 cs_lnum_t rmdr = mesh->n_g_cells - cells_per_rank * (cs_gnum_t)n_ranks;
2675
2676 if (rmdr == 0) {
2677 for (i = 0; i < n_cells; i++) {
2678 cs_gnum_t cell_num = mb->cell_bi.gnum_range[0] + i;
2679 cell_part[i] = (cell_num - 1) / cells_per_rank;
2680 }
2681 }
2682 else {
2683 cs_gnum_t n_ranks_rmdr = n_ranks - rmdr;
2684 cs_gnum_t n_ranks_cells_per_rank = n_ranks_rmdr * cells_per_rank;
2685 for (i = 0; i < n_cells; i++) {
2686 cs_gnum_t cell_num = mb->cell_bi.gnum_range[0] + i;
2687 if ((cell_num - 1) < n_ranks_cells_per_rank)
2688 cell_part[i] = (cell_num - 1) / cells_per_rank;
2689 else
2690 cell_part[i] = (cell_num + n_ranks_rmdr - 1) / (cells_per_rank + 1);
2691 }
2692 }
2693
2694 }
2695
2696 else {
2697 for (i = 0; i < n_cells; i++) {
2698 cs_gnum_t cell_num = mb->cell_bi.gnum_range[0] + i;
2699 cell_part[i] = ((cell_num - 1) / block_size);
2700 }
2701 }
2702
2703 }
2704
2705 /*----------------------------------------------------------------------------*
2706 * Indicate if a usable graph partitioning algorithm is available
2707 * for a given partitioning stage.
2708 *
2709 * If a non-default partitioning algorithm for this stage has ben defined,
2710 * it is considered usable here (checking may be done later).
2711 *
2712 * parameters:
2713 * stage <-- associated partitioning stage
2714 *
2715 * returns:
2716 * true if graph partitioning is available, false otherwise.
2717 *----------------------------------------------------------------------------*/
2718
2719 static bool
_have_usable_graph_partitioning(cs_partition_stage_t stage)2720 _have_usable_graph_partitioning(cs_partition_stage_t stage)
2721 {
2722 bool retval = false;
2723
2724 cs_partition_algorithm_t a = _part_algorithm[stage];
2725
2726 int n_part_ranks = cs_glob_n_ranks / _part_rank_step[stage];
2727 if (n_part_ranks < 1)
2728 n_part_ranks = 1;
2729
2730 if (a >= CS_PARTITION_SCOTCH && a <= CS_PARTITION_METIS)
2731 retval = true;
2732
2733 #if defined(HAVE_PTSCOTCH)
2734 if (a == CS_PARTITION_DEFAULT)
2735 retval = true;
2736 #elif defined(HAVE_SCOTCH)
2737 if (n_part_ranks == 1 && a == CS_PARTITION_DEFAULT)
2738 retval = true;
2739 #elif defined(HAVE_PARMETIS)
2740 if (a == CS_PARTITION_DEFAULT)
2741 retval = true;
2742 #elif defined(HAVE_METIS)
2743 if (n_part_ranks == 1 && a == CS_PARTITION_DEFAULT)
2744 retval = true;
2745 #endif
2746
2747 return retval;
2748 }
2749
2750 /*----------------------------------------------------------------------------*
2751 * Return default algorithm for domain partitioning.
2752 *
2753 * parameters:
2754 * stage <-- associated partitioning stage
2755 *
2756 * returns:
2757 * default partitioning algorithm for this stage
2758 *----------------------------------------------------------------------------*/
2759
2760 static cs_partition_algorithm_t
_select_algorithm(cs_partition_stage_t stage)2761 _select_algorithm(cs_partition_stage_t stage)
2762 {
2763 cs_partition_algorithm_t retval = _part_algorithm[stage];
2764
2765 if (retval == CS_PARTITION_DEFAULT) {
2766
2767 int n_part_ranks;
2768
2769 n_part_ranks = cs_glob_n_ranks / _part_rank_step[stage];
2770 if (n_part_ranks < 1)
2771 n_part_ranks = 1;
2772
2773 /* last stage of 1 or 2 */
2774
2775 #if defined(HAVE_PTSCOTCH)
2776 if (retval == CS_PARTITION_DEFAULT)
2777 retval = CS_PARTITION_SCOTCH;
2778 #elif defined(HAVE_SCOTCH)
2779 if (n_part_ranks == 1 && retval == CS_PARTITION_DEFAULT)
2780 retval = CS_PARTITION_SCOTCH;
2781 #elif defined(HAVE_PARMETIS)
2782 retval = CS_PARTITION_METIS;
2783 #elif defined(HAVE_METIS)
2784 if (n_part_ranks == 1 && retval == CS_PARTITION_DEFAULT)
2785 retval = CS_PARTITION_METIS;
2786 #endif
2787 if (retval == CS_PARTITION_DEFAULT)
2788 retval = CS_PARTITION_SFC_MORTON_BOX;
2789
2790 /* 1st stage of 2:
2791 If 2nd stage uses a space-filling curve, use same curve by default;
2792 otherwise, use default space-filling curve. */
2793
2794 if (stage == CS_PARTITION_FOR_PREPROCESS) {
2795
2796 if ( _part_algorithm[CS_PARTITION_MAIN] >= CS_PARTITION_SFC_MORTON_BOX
2797 && _part_algorithm[CS_PARTITION_MAIN] <= CS_PARTITION_SFC_HILBERT_CUBE)
2798 retval = _part_algorithm[CS_PARTITION_MAIN];
2799 else
2800 retval = CS_PARTITION_SFC_MORTON_BOX;
2801
2802 }
2803
2804 }
2805
2806 return retval;
2807 }
2808
2809 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
2810
2811 /*============================================================================
2812 * Public function definitions
2813 *============================================================================*/
2814
2815 /*----------------------------------------------------------------------------*/
2816 /*!
2817 * \brief Print information on external libraries
2818 *
2819 * \param[in] log_type log type
2820 */
2821 /*----------------------------------------------------------------------------*/
2822
2823 void
cs_partition_external_library_info(cs_log_t log_type)2824 cs_partition_external_library_info(cs_log_t log_type)
2825 {
2826 #if defined(HAVE_METIS)
2827 #if defined(HAVE_METIS_H) && defined(METIS_VER_MAJOR)
2828 cs_log_printf(log_type,
2829 " METIS %d.%d.%d\n",
2830 METIS_VER_MAJOR, METIS_VER_MINOR, METIS_VER_SUBMINOR);
2831 #else
2832 cs_log_printf(log_type,
2833 " METIS\n");
2834 #endif
2835 #elif defined(HAVE_PARMETIS)
2836 #if defined(PARMETIS_MAJOR_VERSION) && defined(PARMETIS_SUBMINOR_VERSION)
2837 cs_log_printf(log_type,
2838 " ParMETIS %d.%d.%d\n",
2839 PARMETIS_MAJOR_VERSION, PARMETIS_MINOR_VERSION,
2840 PARMETIS_SUBMINOR_VERSION);
2841 #elif defined(PARMETIS_MAJOR_VERSION)
2842 cs_log_printf(log_type,
2843 " ParMETIS %d.%d\n",
2844 PARMETIS_MAJOR_VERSION, PARMETIS_MINOR_VERSION);
2845 #else
2846 cs_log_printf(log_type,
2847 " ParMETIS\n");
2848 #endif
2849 #endif
2850
2851 #if defined(HAVE_SCOTCH)
2852 #if defined(SCOTCH_VERSION) && defined(SCOTCH_RELEASE)
2853 cs_log_printf(log_type,
2854 " SCOTCH %d.%d.%d\n",
2855 SCOTCH_VERSION, SCOTCH_RELEASE, SCOTCH_PATCHLEVEL);
2856 #else
2857 cs_log_printf(log_type,
2858 " SCOTCH\n");
2859 #endif
2860 #elif defined(HAVE_PTSCOTCH)
2861 #if defined(SCOTCH_VERSION) && defined(SCOTCH_RELEASE)
2862 cs_log_printf(log_type,
2863 " PT-SCOTCH %d.%d.%d\n",
2864 SCOTCH_VERSION, SCOTCH_RELEASE, SCOTCH_PATCHLEVEL);
2865 #else
2866 cs_log_printf(log_type,
2867 " PT-SCOTCH\n");
2868 #endif
2869 #endif
2870 }
2871
2872 /*----------------------------------------------------------------------------*/
2873 /*!
2874 * \brief Set algorithm for domain partitioning.
2875 *
2876 * \param[in] stage associated partitioning stage
2877 * \param[in] algorithm partitioning algorithm choice
2878 * \param[in] rank_step if > 1, partitioning done on at most
2879 * n_ranks / rank_step processes
2880 * (for graph-based partitioning only)
2881 * \param[in] ignore_perio if true, ignore periodicity information when
2882 * present (for graph-based partitioning only)
2883 */
2884 /*----------------------------------------------------------------------------*/
2885
2886 void
cs_partition_set_algorithm(cs_partition_stage_t stage,cs_partition_algorithm_t algorithm,int rank_step,bool ignore_perio)2887 cs_partition_set_algorithm(cs_partition_stage_t stage,
2888 cs_partition_algorithm_t algorithm,
2889 int rank_step,
2890 bool ignore_perio)
2891 {
2892 int n_part_ranks = cs_glob_n_ranks / rank_step;
2893
2894 if (n_part_ranks < 1) {
2895 rank_step = cs_glob_n_ranks;
2896 n_part_ranks = 1;
2897 }
2898
2899 /* Check consistency of choice */
2900
2901 #if !defined(HAVE_PTSCOTCH)
2902 if (algorithm == CS_PARTITION_SCOTCH) {
2903 #if defined(HAVE_SCOTCH)
2904 if (n_part_ranks > 1) {
2905 rank_step = cs_glob_n_ranks;
2906 cs_base_warn(__FILE__, __LINE__);
2907 bft_printf(
2908 _("Partitioning with %s requested, but %s is not available,\n"
2909 "so serial %s will be used."), "LibSCOTCH", "PT-SCOTCH", "SCOTCH");
2910 }
2911 #else
2912 bft_error(__FILE__, __LINE__, 0,
2913 _("Partitioning with %s required but neither\n"
2914 "%s nor %s is available."), "LibSCOTCH", "PT-SCOTCH", "SCOTCH");
2915 #endif
2916 }
2917 #endif /* defined(HAVE_PTSCOTCH) */
2918
2919 #if !defined(HAVE_PARMETIS)
2920 if (algorithm == CS_PARTITION_METIS) {
2921 #if defined(HAVE_METIS)
2922 if (n_part_ranks > 1) {
2923 rank_step = cs_glob_n_ranks;
2924 cs_base_warn(__FILE__, __LINE__);
2925 bft_printf(
2926 _("Partitioning with %s requested, but %s is not available,\n"
2927 "so serial %s will be used."), "METIS", "ParMETIS", "METIS");
2928 }
2929 #else
2930 bft_error(__FILE__, __LINE__, 0,
2931 _("Partitioning with %s required but neither\n"
2932 "%s nor %s is available."), "METIS", "ParMETIS", "METIS");
2933 #endif
2934 }
2935 #endif /* defined(HAVE_PARMETIS) */
2936
2937 /* Set options */
2938
2939 _part_algorithm[stage] = algorithm;
2940 _part_rank_step[stage] = rank_step;
2941 _part_ignore_perio[stage] = ignore_perio;
2942 }
2943
2944 /*----------------------------------------------------------------------------*/
2945 /*!
2946 * \brief Set partitioning write to file option.
2947 *
2948 * Partitioning information for subsequent calculations is written to file
2949 * after the last partitioning stage depending on the output level.
2950 *
2951 * Note that partitioning information for additional partitionings is
2952 * always written to file, regardless of this option.
2953 *
2954 * \param[in] write_flag option to save partitioning information:
2955 * 0: never
2956 * 1: for graph-based partitioning only
2957 * 2: always
2958 */
2959 /*----------------------------------------------------------------------------*/
2960
2961 void
cs_partition_set_write_level(int write_flag)2962 cs_partition_set_write_level(int write_flag)
2963 {
2964 _part_write_output = write_flag;
2965 }
2966
2967 /*----------------------------------------------------------------------------*/
2968 /*!
2969 * \brief Define hints indicating if initial partitioning fo a preprocessing
2970 * stage is required.
2971 *
2972 * \param[in] join true if a mesh joining operation is planned.
2973 * \param[in] join_periodic true if a mesh periodic matching operation
2974 * is planned
2975 */
2976 /*----------------------------------------------------------------------------*/
2977
2978 void
cs_partition_set_preprocess_hints(bool join,bool join_periodic)2979 cs_partition_set_preprocess_hints(bool join,
2980 bool join_periodic)
2981 {
2982 _part_compute_join_hint = join;
2983 _part_compute_perio_hint = join_periodic;
2984 }
2985
2986 /*----------------------------------------------------------------------------*/
2987 /*!
2988 * \brief Activate or deactivate initial partitioning for preprocessing.
2989 *
2990 * \param[in] active true to activate pre-partitiong for the preprocessing
2991 * stage, false to de-activate it
2992 */
2993 /*----------------------------------------------------------------------------*/
2994
2995 void
cs_partition_set_preprocess(bool active)2996 cs_partition_set_preprocess(bool active)
2997 {
2998 if (active)
2999 _part_preprocess_active = 2;
3000 else
3001 _part_preprocess_active = 0;
3002 }
3003
3004 /*----------------------------------------------------------------------------*/
3005 /*!
3006 * \brief Indicate if re-partitiong for the computation stage is required.
3007 *
3008 * \return true if initial partitioning for preprocessing is active,
3009 * false otherwise
3010 */
3011 /*----------------------------------------------------------------------------*/
3012
3013 bool
cs_partition_get_preprocess(void)3014 cs_partition_get_preprocess(void)
3015 {
3016 bool retval = false;
3017
3018 if (_part_preprocess_active < 1)
3019 retval = false;
3020
3021 else if (_part_preprocess_active == 1) {
3022
3023 if (_have_usable_graph_partitioning(CS_PARTITION_MAIN)) {
3024 if (_part_compute_join_hint)
3025 retval = true;
3026 if ( _part_compute_perio_hint
3027 && _part_ignore_perio[CS_PARTITION_MAIN] == false)
3028 retval = true;
3029 }
3030 }
3031
3032 else
3033 retval = true;
3034
3035 if (cs_glob_n_ranks < 2)
3036 retval = false;
3037
3038 return retval;
3039 }
3040
3041 /*----------------------------------------------------------------------------*/
3042 /*!
3043 * \brief Define list of extra partitionings to build.
3044 *
3045 * Partitionings in this list will be output to file, and may be used for
3046 * subsequent calculations.
3047 *
3048 * When partitioning for both preprocessing and calculation stages, output to
3049 * file of partioning data or generation of additional partitionings
3050 * (see \ref cs_partition_add_partitions) will only be done for the
3051 * second stage.
3052 *
3053 * \param[in] n_extra_partitions number of extra partitionings to compute
3054 * \param[in] extra_partitions_list list of extra partitions to compute
3055 */
3056 /*----------------------------------------------------------------------------*/
3057
3058 void
cs_partition_add_partitions(int n_extra_partitions,int extra_partitions_list[])3059 cs_partition_add_partitions(int n_extra_partitions,
3060 int extra_partitions_list[])
3061 {
3062 _part_n_extra_partitions = n_extra_partitions;
3063
3064 BFT_REALLOC(_part_extra_partitions_list, n_extra_partitions, int);
3065
3066 if (n_extra_partitions > 0)
3067 memcpy(_part_extra_partitions_list,
3068 extra_partitions_list,
3069 sizeof(int)*n_extra_partitions);
3070 }
3071
3072 /*----------------------------------------------------------------------------*/
3073 /*!
3074 * \brief Partition mesh based on current options.
3075 *
3076 * \param[in] mesh pointer to mesh structure
3077 * \param[in, out] mb pointer to mesh builder structure
3078 * \param[in] stage associated partitioning stage
3079 */
3080 /*----------------------------------------------------------------------------*/
3081
3082 void
cs_partition(cs_mesh_t * mesh,cs_mesh_builder_t * mb,cs_partition_stage_t stage)3083 cs_partition(cs_mesh_t *mesh,
3084 cs_mesh_builder_t *mb,
3085 cs_partition_stage_t stage)
3086 {
3087 cs_timer_t t0, t1;
3088 cs_timer_counter_t dt;
3089
3090 int n_part_ranks = cs_glob_n_ranks / _part_rank_step[stage];
3091 cs_partition_algorithm_t _algorithm = _select_algorithm(stage);
3092
3093 bool write_output = false;
3094 int n_extra_partitions = 0;
3095
3096 int *cell_part = NULL;
3097
3098 cs_gnum_t cell_range[2] = {0, 0};
3099 cs_lnum_t n_cells = 0;
3100 cs_lnum_t n_faces = 0;
3101 cs_gnum_t *face_cells = NULL;
3102
3103 /* Initialize local options */
3104
3105 if (stage == CS_PARTITION_MAIN) {
3106 if ( ( _algorithm == CS_PARTITION_METIS
3107 || _algorithm == CS_PARTITION_SCOTCH)
3108 && _part_write_output > 0)
3109 write_output = true;
3110 else if (_part_write_output > 1)
3111 write_output = true;
3112 n_extra_partitions = _part_n_extra_partitions;
3113 }
3114
3115 if (n_part_ranks < 1)
3116 n_part_ranks = 1;
3117
3118 /* Free previous cell rank info if present */
3119
3120 if (mb->cell_rank != NULL)
3121 BFT_FREE(mb->cell_rank);
3122
3123 /* Read cell rank data if available */
3124
3125 if (cs_glob_n_ranks > 1) {
3126 if ( stage != CS_PARTITION_MAIN
3127 || cs_partition_get_preprocess() == false) {
3128 _read_cell_rank(mesh, mb, CS_IO_ECHO_OPEN_CLOSE);
3129 if (mb->have_cell_rank)
3130 return;
3131 }
3132 }
3133 else { /* if (cs_glob_n_ranks == 1) */
3134 if (stage != CS_PARTITION_MAIN || n_extra_partitions < 1)
3135 return;
3136 }
3137
3138 (void)cs_timer_wtime();
3139
3140 /* Print header */
3141
3142 bft_printf("\n ----------------------------------------------------------\n");
3143
3144 cs_log_printf(CS_LOG_PERFORMANCE,
3145 _("\n"
3146 "Partitioning:\n\n"));
3147
3148 /* Start timer */
3149
3150 t0 = cs_timer_time();
3151
3152 /* Adapt builder data for partitioning */
3153
3154 if (_algorithm == CS_PARTITION_METIS || _algorithm == CS_PARTITION_SCOTCH) {
3155
3156 _prepare_input(mesh,
3157 mb,
3158 _part_rank_step[stage],
3159 _part_ignore_perio[stage],
3160 cell_range,
3161 &n_faces,
3162 &face_cells);
3163
3164 n_cells = cell_range[1] - cell_range[0];
3165
3166 }
3167 else {
3168
3169 n_part_ranks = cs_glob_n_ranks;
3170 n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
3171
3172 }
3173
3174 /* Build and partition graph */
3175
3176 #if defined(HAVE_METIS) || defined(HAVE_PARMETIS)
3177
3178 if (_algorithm == CS_PARTITION_METIS) {
3179
3180 int i;
3181 cs_timer_t t2;
3182 idx_t *cell_idx = NULL, *cell_neighbors = NULL;
3183
3184 _metis_cell_cells(n_cells,
3185 n_faces,
3186 cell_range[0],
3187 face_cells,
3188 &cell_idx,
3189 &cell_neighbors);
3190
3191 if (face_cells != mb->face_cells)
3192 BFT_FREE(face_cells);
3193
3194 t2 = cs_timer_time();
3195 dt = cs_timer_diff(&t0, &t2);
3196
3197 cs_log_printf(CS_LOG_PERFORMANCE,
3198 _(" preparing graph: %.3g s\n"),
3199 (double)(dt.nsec)/1.e9);
3200
3201 #if defined(HAVE_PARMETIS)
3202
3203 if (n_part_ranks > 1) {
3204
3205 MPI_Comm part_comm = cs_glob_mpi_comm;
3206
3207 if (_part_rank_step[stage] > 1)
3208 part_comm = _init_reduced_communicator(_part_rank_step[stage]);
3209
3210 for (i = 0; i < n_extra_partitions + 1; i++) {
3211
3212 int n_ranks = cs_glob_n_ranks;
3213
3214 if (i < n_extra_partitions) {
3215 n_ranks = _part_extra_partitions_list[i];
3216 if (n_ranks == cs_glob_n_ranks) {
3217 write_output = true;
3218 continue;
3219 }
3220 }
3221
3222 if (n_ranks < 2)
3223 continue;
3224
3225 BFT_REALLOC(cell_part, n_cells, int);
3226
3227 if (cs_glob_rank_id % _part_rank_step[stage] == 0)
3228 _part_parmetis(mesh->n_g_cells,
3229 cell_range,
3230 n_ranks,
3231 cell_idx,
3232 cell_neighbors,
3233 cell_part,
3234 part_comm);
3235
3236 _distribute_output(mb,
3237 _part_rank_step[stage],
3238 cell_range,
3239 &cell_part);
3240
3241 _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3242
3243 if (write_output || i < n_extra_partitions)
3244 _write_output(mesh->n_g_cells,
3245 mb->cell_bi.gnum_range,
3246 n_ranks,
3247 cell_part);
3248 }
3249
3250 if (part_comm != cs_glob_mpi_comm && part_comm != MPI_COMM_NULL)
3251 MPI_Comm_free(&part_comm);
3252 }
3253
3254 #endif
3255
3256 if (n_part_ranks == 1) {
3257
3258 for (i = 0; i < n_extra_partitions + 1; i++) {
3259
3260 int n_ranks = cs_glob_n_ranks;
3261
3262 if (i < n_extra_partitions) {
3263 n_ranks = _part_extra_partitions_list[i];
3264 if (n_ranks == cs_glob_n_ranks) {
3265 write_output = true;
3266 continue;
3267 }
3268 }
3269
3270 if (n_ranks < 2)
3271 continue;
3272
3273 BFT_REALLOC(cell_part, n_cells, int);
3274
3275 if (cs_glob_rank_id < 0 || (cs_glob_rank_id % _part_rank_step[stage] == 0))
3276 _part_metis(n_cells,
3277 n_ranks,
3278 cell_idx,
3279 cell_neighbors,
3280 cell_part);
3281
3282 _distribute_output(mb,
3283 _part_rank_step[stage],
3284 cell_range,
3285 &cell_part);
3286
3287 _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3288
3289 if (write_output || i < n_extra_partitions)
3290 _write_output(mesh->n_g_cells,
3291 mb->cell_bi.gnum_range,
3292 n_ranks,
3293 cell_part);
3294 }
3295 }
3296
3297 BFT_FREE(cell_idx);
3298 BFT_FREE(cell_neighbors);
3299 }
3300
3301 #endif /* defined(HAVE_METIS) || defined(HAVE_PARMETIS) */
3302
3303 #if defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH)
3304
3305 if (_algorithm == CS_PARTITION_SCOTCH) {
3306
3307 int i;
3308 cs_timer_t t2;
3309 SCOTCH_Num *cell_idx = NULL, *cell_neighbors = NULL;
3310
3311 _scotch_cell_cells(n_cells,
3312 n_faces,
3313 cell_range[0],
3314 face_cells,
3315 &cell_idx,
3316 &cell_neighbors);
3317
3318 if (face_cells != mb->face_cells)
3319 BFT_FREE(face_cells);
3320
3321 t2 = cs_timer_time();
3322 dt = cs_timer_diff(&t0, &t2);
3323
3324 cs_log_printf(CS_LOG_PERFORMANCE,
3325 _(" preparing graph: %.3g s\n"),
3326 (double)(dt.nsec)/1.e9);
3327
3328 #if defined(HAVE_PTSCOTCH)
3329
3330 if (n_part_ranks > 1) {
3331
3332 MPI_Comm part_comm = cs_glob_mpi_comm;
3333
3334 if (_part_rank_step[stage] > 1)
3335 part_comm = _init_reduced_communicator(_part_rank_step[stage]);
3336
3337 for (i = 0; i < n_extra_partitions + 1; i++) {
3338
3339 int n_ranks = cs_glob_n_ranks;
3340
3341 if (i < n_extra_partitions) {
3342 n_ranks = _part_extra_partitions_list[i];
3343 if (n_ranks == cs_glob_n_ranks) {
3344 write_output = true;
3345 continue;
3346 }
3347 }
3348
3349 if (n_ranks < 2)
3350 continue;
3351
3352 BFT_REALLOC(cell_part, n_cells, int);
3353
3354 if (cs_glob_rank_id % _part_rank_step[stage] == 0)
3355 _part_ptscotch(mesh->n_g_cells,
3356 cell_range,
3357 n_ranks,
3358 cell_idx,
3359 cell_neighbors,
3360 cell_part,
3361 part_comm);
3362
3363 _distribute_output(mb,
3364 _part_rank_step[stage],
3365 cell_range,
3366 &cell_part);
3367
3368 _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3369
3370 if (write_output || i < n_extra_partitions)
3371 _write_output(mesh->n_g_cells,
3372 mb->cell_bi.gnum_range,
3373 n_ranks,
3374 cell_part);
3375 }
3376
3377 if (part_comm != cs_glob_mpi_comm && part_comm != MPI_COMM_NULL)
3378 MPI_Comm_free(&part_comm);
3379 }
3380
3381 #endif
3382
3383 if (n_part_ranks == 1) {
3384
3385 for (i = 0; i < n_extra_partitions + 1; i++) {
3386
3387 int n_ranks = cs_glob_n_ranks;
3388
3389 if (i < n_extra_partitions) {
3390 n_ranks = _part_extra_partitions_list[i];
3391 if (n_ranks == cs_glob_n_ranks) {
3392 write_output = true;
3393 continue;
3394 }
3395 }
3396
3397 if (n_ranks < 2)
3398 continue;
3399
3400 BFT_REALLOC(cell_part, n_cells, int);
3401
3402 if (cs_glob_rank_id < 0 || (cs_glob_rank_id % _part_rank_step[stage] == 0))
3403 _part_scotch(n_cells,
3404 n_ranks,
3405 cell_idx,
3406 cell_neighbors,
3407 cell_part);
3408
3409 _distribute_output(mb,
3410 _part_rank_step[stage],
3411 cell_range,
3412 &cell_part);
3413
3414 _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3415
3416 if (write_output || i < n_extra_partitions)
3417 _write_output(mesh->n_g_cells,
3418 mb->cell_bi.gnum_range,
3419 n_ranks,
3420 cell_part);
3421 }
3422 }
3423
3424 BFT_FREE(cell_idx);
3425 BFT_FREE(cell_neighbors);
3426 }
3427
3428 #endif /* defined(HAVE_SCOTCH) || defined(HAVE_PTSCOTCH) */
3429
3430 if ( _algorithm >= CS_PARTITION_SFC_MORTON_BOX
3431 && _algorithm <= CS_PARTITION_SFC_HILBERT_CUBE) {
3432
3433 int i;
3434 fvm_io_num_sfc_t sfc_type = _algorithm - CS_PARTITION_SFC_MORTON_BOX;
3435
3436 BFT_MALLOC(cell_part, n_cells, int);
3437
3438 for (i = 0; i < n_extra_partitions + 1; i++) {
3439
3440 int n_ranks = cs_glob_n_ranks;
3441
3442 if (i < n_extra_partitions) {
3443 n_ranks = _part_extra_partitions_list[i];
3444 if (n_ranks == cs_glob_n_ranks) {
3445 write_output = true;
3446 continue;
3447 }
3448 }
3449
3450 if (n_ranks < 2)
3451 continue;
3452
3453 #if defined(HAVE_MPI)
3454 _cell_rank_by_sfc(mesh->n_g_cells,
3455 n_ranks,
3456 mb,
3457 sfc_type,
3458 cell_part,
3459 cs_glob_mpi_comm);
3460 #else
3461 _cell_rank_by_sfc(mesh->n_g_cells, n_ranks, mb, sfc_type, cell_part);
3462 #endif
3463
3464 _cell_part_histogram(mb->cell_bi.gnum_range, n_ranks, cell_part);
3465
3466 if (write_output || i < n_extra_partitions)
3467 _write_output(mesh->n_g_cells,
3468 mb->cell_bi.gnum_range,
3469 n_ranks,
3470 cell_part);
3471 }
3472
3473 }
3474
3475 /* Naive partitioner */
3476
3477 else if (_algorithm == CS_PARTITION_BLOCK) {
3478
3479 BFT_MALLOC(cell_part, n_cells, int);
3480
3481 _block_partititioning(mesh, mb, cell_part);
3482
3483 }
3484
3485 /* Reset extra partitions list if used */
3486
3487 if (n_extra_partitions > 0) {
3488 BFT_FREE(_part_extra_partitions_list);
3489 _part_n_extra_partitions = 0;
3490 }
3491
3492 /* Copy to mesh builder */
3493
3494 mb->have_cell_rank = true;
3495 mb->cell_rank = cell_part;
3496
3497 n_cells = mb->cell_bi.gnum_range[1] - mb->cell_bi.gnum_range[0];
3498
3499 /* End of this section for log file */
3500
3501 t1 = cs_timer_time();
3502
3503 dt = cs_timer_diff(&t0, &t1);
3504
3505 bft_printf(_("\n"
3506 " Partitioning finished (%.3g s)\n"),
3507 (double)(dt.nsec)/1.e9);
3508 bft_printf_flush();
3509
3510 cs_log_printf(CS_LOG_PERFORMANCE,
3511 _(" wall clock time: %.3g s\n\n"),
3512 (double)(dt.nsec)/1.e9);
3513
3514 cs_log_separator(CS_LOG_PERFORMANCE);
3515 }
3516
3517 /*----------------------------------------------------------------------------*/
3518
3519 END_C_DECLS
3520