1 /*============================================================================
2 * Convert between block distribution and general domain partition.
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 #include "cs_defs.h"
28
29 /*----------------------------------------------------------------------------
30 * Standard C library headers
31 *----------------------------------------------------------------------------*/
32
33 #include <assert.h>
34 #include <errno.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 /*----------------------------------------------------------------------------
40 * Local headers
41 *----------------------------------------------------------------------------*/
42
43 #include "bft_mem.h"
44 #include "bft_error.h"
45
46 #include "cs_all_to_all.h"
47 #include "cs_block_dist.h"
48 #include "cs_order.h"
49
50 /*----------------------------------------------------------------------------
51 * Header for the current file
52 *----------------------------------------------------------------------------*/
53
54 #include "cs_block_to_part.h"
55
56 /*----------------------------------------------------------------------------*/
57
58 BEGIN_C_DECLS
59
60 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
61
62 /*=============================================================================
63 * Macro definitions
64 *============================================================================*/
65
66 /*=============================================================================
67 * Local type definitions
68 *============================================================================*/
69
70 /*============================================================================
71 * Local function defintions
72 *============================================================================*/
73
74 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
75
76 /*============================================================================
77 * Public function definitions
78 *============================================================================*/
79
80 #if defined(HAVE_MPI)
81
82 /*----------------------------------------------------------------------------*/
83 /*!
84 * \brief Initialize block to partition distributor with block data using
85 * strided adjacency array.
86 *
87 * The adjacency array uses 1-n based global numbers. 0 values are
88 * allowed and may be used to represent empty adjacencies.
89 *
90 * For example, in a face -> element adjacency relation, each face
91 * is adjacent to 2 elements (thus a stride of 2), except for
92 * boundary faces which are adjacent to only 1 element; in this case,
93 * the adjacent element number for the exterior side of the face is 0.
94 *
95 * It is also possible to define a default destination rank,
96 * so that elements with no adjacency are redistributed.
97 * If the default rank for a given element is < 0, or no default
98 * ranks are defined, elements with no adjacency are not distributed.
99 *
100 * The returned all to all distributor should be used in reverse
101 * mode to copy data from the block to partition distribution.
102 *
103 * If the part_gnum array is requested (i.e. passed an non-NULL pointer),
104 * the caller is responsible for freeing it.
105 *
106 * \param[in] comm communicator
107 * \param[in] block block size and range info
108 * \param[in] adjacent_block block info for adjacent entities
109 * \param[in] stride stride of adjacency array (1 or 2)
110 * \param[in] adjacency entity adjacency (1 to n numbering)
111 * \param[in] adjacent_ent_rank destination rank for adjacent entities, or
112 * NULL if based on block size and range only.
113 * \param[in] default_rank default rank in case there is no adjacency,
114 * or NULL
115 * \param[out] n_part_elts number of elements in partition, or NULL
116 * \param[out] part_gnum global elements numbers in partition,
117 * or NULL
118 *
119 * \return initialized all to all block to partition distributor
120 */
121 /*----------------------------------------------------------------------------*/
122
123 cs_all_to_all_t *
cs_block_to_part_create_by_adj_s(MPI_Comm comm,cs_block_dist_info_t block,cs_block_dist_info_t adjacent_block,int stride,const cs_gnum_t adjacency[],const int adjacent_ent_rank[],const int default_rank[],cs_lnum_t * n_part_elts,cs_gnum_t ** part_gnum)124 cs_block_to_part_create_by_adj_s(MPI_Comm comm,
125 cs_block_dist_info_t block,
126 cs_block_dist_info_t adjacent_block,
127 int stride,
128 const cs_gnum_t adjacency[],
129 const int adjacent_ent_rank[],
130 const int default_rank[],
131 cs_lnum_t *n_part_elts,
132 cs_gnum_t **part_gnum)
133 {
134 const cs_lnum_t n_ents = block.gnum_range[1] - block.gnum_range[0];
135
136 int rank = -1;
137 MPI_Comm_rank(comm, &rank);
138
139 /* Determine an entity's adjacent entities, and use their
140 global number to determine by which ranks they were read;
141 using this knowledge, we can query these ranks to know to
142 which ranks these adjacent entities were sent
143 --------------------------------------------------------- */
144
145 /* Count values to send and receive to each processor */
146
147 cs_lnum_t _n_ents = n_ents*stride;
148
149 int *query_rank;
150 BFT_MALLOC(query_rank, _n_ents, int);
151
152 for (cs_lnum_t j = 0; j < _n_ents; j++) {
153 cs_gnum_t adj_g_num = adjacency[j];
154 if (adj_g_num > 0) {
155 int adj_ent_rank = ((adj_g_num-1) / adjacent_block.block_size)
156 * adjacent_block.rank_step;
157 query_rank[j] = adj_ent_rank;
158 }
159 else
160 query_rank[j] = rank; /* leave on current rank */
161 }
162
163 cs_all_to_all_t *qd = cs_all_to_all_create(_n_ents,
164 0, /* flags */
165 NULL,
166 query_rank,
167 comm);
168
169 cs_all_to_all_transfer_dest_rank(qd, &query_rank);
170
171 cs_gnum_t *adj_query
172 = cs_all_to_all_copy_array(qd,
173 CS_GNUM_TYPE,
174 1,
175 false, /* reverse */
176 adjacency,
177 NULL);
178
179 cs_lnum_t n_elts_query = cs_all_to_all_n_elts_dest(qd);
180
181 int *sent_rank;
182 BFT_MALLOC(sent_rank, n_elts_query, int);
183
184 if (adjacent_ent_rank != NULL) {
185 for (cs_lnum_t j = 0; j < n_elts_query; j++) {
186 if (adj_query[j] > 0) {
187 cs_lnum_t adj_l_id = (adj_query[j] - 1) % adjacent_block.block_size;
188 sent_rank[j] = adjacent_ent_rank[adj_l_id];
189 }
190 else
191 sent_rank[j] = -1;
192 }
193 }
194 else {
195 for (cs_lnum_t j = 0; j < n_elts_query; j++) {
196 if (adj_query[j] > 0)
197 sent_rank[j] = rank;
198 else
199 sent_rank[j] = -1;
200 }
201 }
202
203 BFT_FREE(adj_query);
204
205 /* Return communication */
206
207 int *dest_rank
208 = cs_all_to_all_copy_array(qd,
209 CS_INT_TYPE,
210 1,
211 true, /* reverse */
212 sent_rank,
213 NULL);
214
215 BFT_FREE(sent_rank);
216
217 cs_all_to_all_destroy(&qd);
218
219 /* We now need to extract the ranks to which each entity will be sent,
220 based on where its adjacent entities were sent (and thus where
221 it will be needed).
222 -------------------------------------------------------------------- */
223
224 int *send_rank;
225 BFT_MALLOC(send_rank, _n_ents, int);
226 cs_gnum_t *send_gnum;
227 BFT_MALLOC(send_gnum, _n_ents, cs_gnum_t);
228
229 cs_lnum_t n_send = 0;
230
231 if (stride == 1) {
232 for (cs_lnum_t j = 0; j < n_ents; j++) {
233 int _send_rank = dest_rank[j];
234 if (_send_rank != -1) {
235 send_rank[n_send] = _send_rank;
236 send_gnum[n_send] = block.gnum_range[0] + j;
237 n_send++;
238 }
239 else if (default_rank != NULL) {
240 send_rank[n_send] = default_rank[j];
241 send_gnum[n_send] = block.gnum_range[0] + j;
242 n_send++;
243 }
244 }
245 }
246
247 else if (stride == 2) {
248 for (cs_lnum_t j = 0; j < n_ents; j++) {
249 int _send_rank = -1;
250 int prev_rank = -1;
251 for (cs_lnum_t k = 0; k < 2; k++) {
252 _send_rank = dest_rank[j*2 + k];
253 if (_send_rank != -1 && _send_rank != prev_rank) {
254 send_rank[n_send] = _send_rank;
255 send_gnum[n_send] = block.gnum_range[0] + j;
256 n_send++;
257 prev_rank = _send_rank;
258 }
259 }
260 if (prev_rank == -1 && default_rank != NULL) {
261 send_rank[n_send] = default_rank[j];
262 send_gnum[n_send] = block.gnum_range[0] + j;
263 n_send++;
264 }
265 }
266 }
267
268 else
269 bft_error(__FILE__, __LINE__, 0,
270 "%s currently only allows stride 1 or 2, not %d.",
271 __func__, (int)stride);
272
273 BFT_FREE(dest_rank);
274
275 /* Exchange send count and global number */
276
277 /* Remark: we could save a global cs_all_to_all exchange by adding a
278 source element list argument to the cs_all_to_all_create_*
279 functions (such as the src_id in cs_crystal_router_create*),
280 or adding a creation function with such a list;
281 such a list is used internally for reverse cs_all_to_all
282 exchanges, but allowing it would probably imply
283 CS_ALL_TO_ALL_NO_REVERSE to avoid a too complex logic. */
284
285 cs_all_to_all_t *d
286 = cs_all_to_all_create(n_send,
287 CS_ALL_TO_ALL_ORDER_BY_SRC_RANK,
288 NULL,
289 send_rank,
290 comm);
291
292 cs_gnum_t *recv_gnum
293 = cs_all_to_all_copy_array(d,
294 CS_GNUM_TYPE,
295 1,
296 false, /* reverse */
297 send_gnum,
298 NULL);
299
300 cs_lnum_t _n_part_elts = cs_all_to_all_n_elts_dest(d);
301
302 BFT_FREE(send_rank);
303 BFT_FREE(send_gnum);
304
305 cs_all_to_all_destroy(&d);
306
307 /* Now build final distributor */
308
309 d = cs_all_to_all_create_from_block(_n_part_elts,
310 CS_ALL_TO_ALL_USE_DEST_ID,
311 recv_gnum,
312 block,
313 comm);
314
315 if (n_part_elts != NULL)
316 *n_part_elts = _n_part_elts;
317
318 if (part_gnum != NULL)
319 *part_gnum = recv_gnum;
320 else
321 BFT_FREE(recv_gnum);
322
323 /* Return initialized structure */
324
325 return d;
326 }
327
328 #endif /* defined(HAVE_MPI) */
329
330 /*----------------------------------------------------------------------------*/
331 /*!
332 * \brief Determine local references from references to global numbers.
333 *
334 * This is based on finding the local id of a given global number
335 * using a binary search.
336 *
337 * Global numbers use a 1 to n numbering, while local numbers use a
338 * 0+base to n-1+base numbering. If an entity's global number does not
339 * appear in the global list, base-1 is assigned for that entity's
340 * local list.
341 *
342 * If list contains duplicate values, any local id having a multiple
343 * global number (i.e not necessarily the smallest one) may be
344 * assigned to the corresponding local_number[] entry.
345 *
346 * \param[in] n_ents number of entities
347 * \param[in] base base numbering (typically 0 or 1)
348 * \param[in] global_list_size size of global entity list
349 * \param[in] global_list_is_sorted true if global entity list is
350 * guaranteed to be sorted
351 * \param[in] global_list global entity list
352 * \param[in] global_number entity global numbers (size: n_ents)
353 * \param[out] local_number entity local numbers (size: n_ents)
354 */
355 /*----------------------------------------------------------------------------*/
356
357 void
cs_block_to_part_global_to_local(cs_lnum_t n_ents,cs_lnum_t base,cs_lnum_t global_list_size,bool global_list_is_sorted,const cs_gnum_t global_list[],const cs_gnum_t global_number[],cs_lnum_t local_number[])358 cs_block_to_part_global_to_local(cs_lnum_t n_ents,
359 cs_lnum_t base,
360 cs_lnum_t global_list_size,
361 bool global_list_is_sorted,
362 const cs_gnum_t global_list[],
363 const cs_gnum_t global_number[],
364 cs_lnum_t local_number[])
365 {
366 cs_lnum_t i;
367 cs_lnum_t *order = NULL;
368 cs_gnum_t *_g_list = NULL;
369 const cs_gnum_t *g_list = global_list;
370
371 if (n_ents == 0)
372 return;
373
374 #if defined(DEBUG) && !defined(NDEBUG)
375 if (global_list_is_sorted) {
376 for (i = 1; i < global_list_size; i++)
377 assert(global_list[i] > global_list[i-1]);
378 }
379 #endif
380
381 if (global_list_is_sorted == false) {
382 BFT_MALLOC(_g_list, global_list_size, cs_gnum_t);
383 order = cs_order_gnum(NULL, global_list, global_list_size);
384 for (i = 0; i < global_list_size; i++)
385 _g_list[i] = global_list[order[i]];
386 g_list = _g_list;
387 }
388
389 for (i = 0; i < n_ents; i++) {
390
391 cs_lnum_t start_id = 0;
392 cs_lnum_t end_id = global_list_size;
393
394 const cs_gnum_t num_1 = global_number[i];
395
396 /* Use binary search */
397
398 while (start_id < end_id) {
399 cs_lnum_t mid_id = start_id + ((end_id - start_id) / 2);
400 if (g_list[mid_id] < num_1)
401 start_id = mid_id + 1;
402 else
403 end_id = mid_id; /* Can't be end_id = mid_id -1;
404 g_list[mid_id] >= num_1, so
405 end_id must not be < mid_id in case
406 g_list[mid_id] == num_1 */
407 }
408
409 /* start_id == end_id at this stage; */
410
411 if (start_id < global_list_size && g_list[start_id] == num_1)
412 local_number[i] = start_id + base;
413 else
414 local_number[i] = base - 1;
415
416 }
417
418 BFT_FREE(_g_list);
419
420 if (order != NULL) {
421 for (i = 0 ; i < n_ents ; i++)
422 local_number[i] = order[local_number[i] - base] + base;
423 BFT_FREE(order);
424 }
425
426 }
427
428 /*----------------------------------------------------------------------------*/
429
430 END_C_DECLS
431