1 /*============================================================================
2  * Convert between block distribution and general domain partition.
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <assert.h>
34 #include <errno.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 
39 /*----------------------------------------------------------------------------
40  *  Local headers
41  *----------------------------------------------------------------------------*/
42 
43 #include "bft_mem.h"
44 #include "bft_error.h"
45 
46 #include "cs_all_to_all.h"
47 #include "cs_block_dist.h"
48 #include "cs_order.h"
49 
50 /*----------------------------------------------------------------------------
51  *  Header for the current file
52  *----------------------------------------------------------------------------*/
53 
54 #include "cs_block_to_part.h"
55 
56 /*----------------------------------------------------------------------------*/
57 
58 BEGIN_C_DECLS
59 
60 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
61 
62 /*=============================================================================
63  * Macro definitions
64  *============================================================================*/
65 
66 /*=============================================================================
67  * Local type definitions
68  *============================================================================*/
69 
70 /*============================================================================
71  * Local function defintions
72  *============================================================================*/
73 
74 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
75 
76 /*============================================================================
77  * Public function definitions
78  *============================================================================*/
79 
80 #if defined(HAVE_MPI)
81 
82 /*----------------------------------------------------------------------------*/
83 /*!
84  * \brief Initialize block to partition distributor with block data using
85  *        strided adjacency array.
86  *
87  * The adjacency array uses 1-n based global numbers. 0 values are
88  * allowed and may be used to represent empty adjacencies.
89  *
90  * For example, in a face -> element adjacency relation, each face
91  * is adjacent to 2 elements (thus a stride of 2), except for
92  * boundary faces which are adjacent to only 1 element; in this case,
93  * the adjacent element number for the exterior side of the face is 0.
94  *
95  * It is also possible to define a default destination rank,
96  * so that elements with no adjacency are redistributed.
97  * If the default rank for a given element is < 0, or no default
98  * ranks are defined, elements with no adjacency are not distributed.
99  *
100  * The returned all to all distributor should be used in reverse
101  * mode to copy data from the block to partition distribution.
102  *
103  * If the part_gnum array is requested (i.e. passed an non-NULL pointer),
104  * the caller is responsible for freeing it.
105  *
106  * \param[in]   comm               communicator
107  * \param[in]   block              block size and range info
108  * \param[in]   adjacent_block     block info for adjacent entities
109  * \param[in]   stride             stride of adjacency array (1 or 2)
110  * \param[in]   adjacency          entity adjacency (1 to n numbering)
111  * \param[in]   adjacent_ent_rank  destination rank for adjacent entities, or
112  *                                 NULL if based on block size and range only.
113  * \param[in]   default_rank       default rank in case there is no adjacency,
114  *                                 or NULL
115  * \param[out]  n_part_elts        number of elements in partition, or NULL
116  * \param[out]  part_gnum          global elements numbers in partition,
117  *                                 or NULL
118  *
119  * \return  initialized all to all block to partition distributor
120  */
121 /*----------------------------------------------------------------------------*/
122 
123 cs_all_to_all_t *
cs_block_to_part_create_by_adj_s(MPI_Comm comm,cs_block_dist_info_t block,cs_block_dist_info_t adjacent_block,int stride,const cs_gnum_t adjacency[],const int adjacent_ent_rank[],const int default_rank[],cs_lnum_t * n_part_elts,cs_gnum_t ** part_gnum)124 cs_block_to_part_create_by_adj_s(MPI_Comm               comm,
125                                  cs_block_dist_info_t   block,
126                                  cs_block_dist_info_t   adjacent_block,
127                                  int                    stride,
128                                  const cs_gnum_t        adjacency[],
129                                  const int              adjacent_ent_rank[],
130                                  const int              default_rank[],
131                                  cs_lnum_t             *n_part_elts,
132                                  cs_gnum_t            **part_gnum)
133 {
134   const cs_lnum_t n_ents = block.gnum_range[1] - block.gnum_range[0];
135 
136   int rank = -1;
137   MPI_Comm_rank(comm, &rank);
138 
139   /* Determine an entity's adjacent entities, and use their
140      global number to determine by which ranks they were read;
141      using this knowledge, we can query these ranks to know to
142      which ranks these adjacent entities were sent
143      --------------------------------------------------------- */
144 
145   /* Count values to send and receive to each processor */
146 
147   cs_lnum_t _n_ents = n_ents*stride;
148 
149   int *query_rank;
150   BFT_MALLOC(query_rank, _n_ents, int);
151 
152   for (cs_lnum_t j = 0; j < _n_ents; j++) {
153     cs_gnum_t adj_g_num = adjacency[j];
154     if (adj_g_num > 0) {
155       int adj_ent_rank =   ((adj_g_num-1) / adjacent_block.block_size)
156                          * adjacent_block.rank_step;
157       query_rank[j] = adj_ent_rank;
158     }
159     else
160       query_rank[j] = rank; /* leave on current rank */
161   }
162 
163   cs_all_to_all_t *qd = cs_all_to_all_create(_n_ents,
164                                              0, /* flags */
165                                              NULL,
166                                              query_rank,
167                                              comm);
168 
169   cs_all_to_all_transfer_dest_rank(qd, &query_rank);
170 
171   cs_gnum_t *adj_query
172     = cs_all_to_all_copy_array(qd,
173                                CS_GNUM_TYPE,
174                                1,
175                                false, /* reverse */
176                                adjacency,
177                                NULL);
178 
179   cs_lnum_t n_elts_query = cs_all_to_all_n_elts_dest(qd);
180 
181   int *sent_rank;
182   BFT_MALLOC(sent_rank, n_elts_query, int);
183 
184   if (adjacent_ent_rank != NULL) {
185     for (cs_lnum_t j = 0; j < n_elts_query; j++) {
186       if (adj_query[j] > 0) {
187         cs_lnum_t adj_l_id = (adj_query[j] - 1) % adjacent_block.block_size;
188         sent_rank[j] = adjacent_ent_rank[adj_l_id];
189       }
190       else
191         sent_rank[j] = -1;
192     }
193   }
194   else {
195     for (cs_lnum_t j = 0; j < n_elts_query; j++) {
196       if (adj_query[j] > 0)
197         sent_rank[j] = rank;
198       else
199         sent_rank[j] = -1;
200     }
201   }
202 
203   BFT_FREE(adj_query);
204 
205   /* Return communication */
206 
207   int *dest_rank
208     = cs_all_to_all_copy_array(qd,
209                                CS_INT_TYPE,
210                                1,
211                                true, /* reverse */
212                                sent_rank,
213                                NULL);
214 
215   BFT_FREE(sent_rank);
216 
217   cs_all_to_all_destroy(&qd);
218 
219   /* We now need to extract the ranks to which each entity will be sent,
220      based on where its adjacent entities were sent (and thus where
221      it will be needed).
222      -------------------------------------------------------------------- */
223 
224   int  *send_rank;
225   BFT_MALLOC(send_rank, _n_ents, int);
226   cs_gnum_t *send_gnum;
227   BFT_MALLOC(send_gnum, _n_ents, cs_gnum_t);
228 
229   cs_lnum_t n_send = 0;
230 
231   if (stride == 1) {
232     for (cs_lnum_t j = 0; j < n_ents; j++) {
233       int _send_rank = dest_rank[j];
234       if (_send_rank != -1) {
235         send_rank[n_send] = _send_rank;
236         send_gnum[n_send] = block.gnum_range[0] + j;
237         n_send++;
238       }
239       else if (default_rank != NULL) {
240         send_rank[n_send] = default_rank[j];
241         send_gnum[n_send] = block.gnum_range[0] + j;
242         n_send++;
243       }
244     }
245   }
246 
247   else if (stride == 2) {
248     for (cs_lnum_t j = 0; j < n_ents; j++) {
249       int _send_rank = -1;
250       int prev_rank = -1;
251       for (cs_lnum_t k = 0; k < 2; k++) {
252         _send_rank = dest_rank[j*2 + k];
253         if (_send_rank != -1 && _send_rank != prev_rank) {
254           send_rank[n_send] = _send_rank;
255           send_gnum[n_send] = block.gnum_range[0] + j;
256           n_send++;
257           prev_rank = _send_rank;
258         }
259       }
260       if (prev_rank == -1 && default_rank != NULL) {
261         send_rank[n_send] = default_rank[j];
262         send_gnum[n_send] = block.gnum_range[0] + j;
263         n_send++;
264       }
265     }
266   }
267 
268   else
269     bft_error(__FILE__, __LINE__, 0,
270               "%s currently only allows stride 1 or 2, not %d.",
271               __func__, (int)stride);
272 
273   BFT_FREE(dest_rank);
274 
275   /* Exchange send count and global number */
276 
277   /* Remark: we could save a global cs_all_to_all exchange by adding a
278      source element list argument to the cs_all_to_all_create_*
279      functions (such as the src_id in cs_crystal_router_create*),
280      or adding a creation function with such a list;
281      such a list is used internally for reverse cs_all_to_all
282      exchanges, but allowing it would probably imply
283      CS_ALL_TO_ALL_NO_REVERSE to avoid a too complex logic. */
284 
285   cs_all_to_all_t *d
286     = cs_all_to_all_create(n_send,
287                            CS_ALL_TO_ALL_ORDER_BY_SRC_RANK,
288                            NULL,
289                            send_rank,
290                            comm);
291 
292   cs_gnum_t *recv_gnum
293     = cs_all_to_all_copy_array(d,
294                                CS_GNUM_TYPE,
295                                1,
296                                false, /* reverse */
297                                send_gnum,
298                                NULL);
299 
300   cs_lnum_t  _n_part_elts = cs_all_to_all_n_elts_dest(d);
301 
302   BFT_FREE(send_rank);
303   BFT_FREE(send_gnum);
304 
305   cs_all_to_all_destroy(&d);
306 
307   /* Now build final distributor */
308 
309   d = cs_all_to_all_create_from_block(_n_part_elts,
310                                       CS_ALL_TO_ALL_USE_DEST_ID,
311                                       recv_gnum,
312                                       block,
313                                       comm);
314 
315   if (n_part_elts != NULL)
316     *n_part_elts = _n_part_elts;
317 
318   if (part_gnum != NULL)
319     *part_gnum = recv_gnum;
320   else
321     BFT_FREE(recv_gnum);
322 
323   /* Return initialized structure */
324 
325   return d;
326 }
327 
328 #endif /* defined(HAVE_MPI) */
329 
330 /*----------------------------------------------------------------------------*/
331 /*!
332  * \brief Determine local references from references to global numbers.
333  *
334  * This is based on finding the local id of a given global number
335  * using a binary search.
336  *
337  * Global numbers use a 1 to n numbering, while local numbers use a
338  * 0+base to n-1+base numbering. If an entity's global number does not
339  * appear in the global list, base-1 is assigned for that entity's
340  * local list.
341  *
342  * If list contains duplicate values, any local id having a multiple
343  * global number (i.e not necessarily the smallest one) may be
344  * assigned to the corresponding local_number[] entry.
345  *
346  * \param[in]   n_ents                 number of entities
347  * \param[in]   base                   base numbering (typically 0 or 1)
348  * \param[in]   global_list_size       size of global entity list
349  * \param[in]   global_list_is_sorted  true if global entity list is
350  *                                     guaranteed to be sorted
351  * \param[in]   global_list            global entity list
352  * \param[in]   global_number          entity global numbers (size: n_ents)
353  * \param[out]  local_number           entity local numbers (size: n_ents)
354  */
355 /*----------------------------------------------------------------------------*/
356 
357 void
cs_block_to_part_global_to_local(cs_lnum_t n_ents,cs_lnum_t base,cs_lnum_t global_list_size,bool global_list_is_sorted,const cs_gnum_t global_list[],const cs_gnum_t global_number[],cs_lnum_t local_number[])358 cs_block_to_part_global_to_local(cs_lnum_t        n_ents,
359                                  cs_lnum_t        base,
360                                  cs_lnum_t        global_list_size,
361                                  bool             global_list_is_sorted,
362                                  const cs_gnum_t  global_list[],
363                                  const cs_gnum_t  global_number[],
364                                  cs_lnum_t        local_number[])
365 {
366   cs_lnum_t i;
367   cs_lnum_t *order = NULL;
368   cs_gnum_t *_g_list = NULL;
369   const cs_gnum_t *g_list = global_list;
370 
371   if (n_ents == 0)
372     return;
373 
374  #if defined(DEBUG) && !defined(NDEBUG)
375   if (global_list_is_sorted) {
376     for (i = 1; i < global_list_size; i++)
377       assert(global_list[i] > global_list[i-1]);
378   }
379 #endif
380 
381   if (global_list_is_sorted == false) {
382     BFT_MALLOC(_g_list, global_list_size, cs_gnum_t);
383     order = cs_order_gnum(NULL, global_list, global_list_size);
384     for (i = 0; i < global_list_size; i++)
385       _g_list[i] = global_list[order[i]];
386     g_list = _g_list;
387   }
388 
389   for (i = 0; i < n_ents; i++) {
390 
391     cs_lnum_t start_id = 0;
392     cs_lnum_t end_id = global_list_size;
393 
394     const cs_gnum_t num_1 = global_number[i];
395 
396     /* Use binary search */
397 
398     while (start_id < end_id) {
399       cs_lnum_t mid_id = start_id + ((end_id - start_id) / 2);
400       if (g_list[mid_id] < num_1)
401         start_id = mid_id + 1;
402       else
403         end_id = mid_id;  /* Can't be end_id = mid_id -1;
404                              g_list[mid_id] >= num_1, so
405                              end_id must not be < mid_id in case
406                              g_list[mid_id] == num_1 */
407     }
408 
409     /* start_id == end_id at this stage; */
410 
411     if (start_id < global_list_size && g_list[start_id] == num_1)
412       local_number[i] = start_id + base;
413     else
414       local_number[i] = base - 1;
415 
416   }
417 
418   BFT_FREE(_g_list);
419 
420   if (order != NULL) {
421     for (i = 0 ; i < n_ents ; i++)
422       local_number[i] = order[local_number[i] - base] + base;
423     BFT_FREE(order);
424   }
425 
426 }
427 
428 /*----------------------------------------------------------------------------*/
429 
430 END_C_DECLS
431