1 #ifndef __CS_HALO_H__
2 #define __CS_HALO_H__
3 
4 /*============================================================================
5  * Structure and function headers handling with ghost cells
6  *============================================================================*/
7 
8 /*
9   This file is part of Code_Saturne, a general-purpose CFD tool.
10 
11   Copyright (C) 1998-2021 EDF S.A.
12 
13   This program is free software; you can redistribute it and/or modify it under
14   the terms of the GNU General Public License as published by the Free Software
15   Foundation; either version 2 of the License, or (at your option) any later
16   version.
17 
18   This program is distributed in the hope that it will be useful, but WITHOUT
19   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
21   details.
22 
23   You should have received a copy of the GNU General Public License along with
24   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25   Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 */
27 
28 /*----------------------------------------------------------------------------*/
29 
30 /*----------------------------------------------------------------------------
31  *  Local headers
32  *----------------------------------------------------------------------------*/
33 
34 #include "cs_defs.h"
35 #include "cs_base.h"
36 #include "cs_base_accel.h"
37 #include "cs_interface.h"
38 #include "cs_rank_neighbors.h"
39 
40 #include "fvm_periodicity.h"
41 
42 /*----------------------------------------------------------------------------*/
43 
44 BEGIN_C_DECLS
45 
46 /*============================================================================
47  * Macro definitions
48  *============================================================================*/
49 
50 /*=============================================================================
51  * Type definitions
52  *============================================================================*/
53 
54 /*!> Halo type */
55 
56 typedef enum {
57 
58   CS_HALO_STANDARD,   /*!< standard halo */
59   CS_HALO_EXTENDED,   /*!< extended halo (vertex-adjacent cells) */
60   CS_HALO_N_TYPES
61 
62 } cs_halo_type_t;
63 
64 /* Halo communication mode */
65 
66 typedef enum {
67 
68   CS_HALO_COMM_P2P,      /*!< non-blocking point-to-point communication */
69   CS_HALO_COMM_RMA_GET   /*!< MPI-3 one-sided with get semantics and
70                            active target synchronization */
71 
72 } cs_halo_comm_mode_t;
73 
74 /* Structure for halo management */
75 /* ----------------------------- */
76 
77 typedef struct {
78 
79   int       n_c_domains;     /* Number of communicating domains. */
80   int       n_transforms;    /* Number of periodic transformations */
81 
82   int       *c_domain_rank;  /* List of communicating ranks */
83 
84   const fvm_periodicity_t * periodicity; /* Pointer to periodicity
85                                             structure describing transforms */
86 
87   int       n_rotations;     /* Number of periodic transformations
88                                 involving rotations */
89 
90   cs_lnum_t  n_local_elts;   /* Number of local elements */
91 
92   /* send_halo features : send to distant ranks */
93 
94   cs_lnum_t  n_send_elts[2];   /* Numer of ghost elements in send_list
95                                 n_elts[0] = standard elements
96                                 n_elts[1] = extended + standard elements */
97 
98   cs_lnum_t  *send_list;       /* List of local elements in distant halos
99                                   (0 to n-1 numbering) */
100 
101   cs_lnum_t  *send_index;      /* Index on send_list
102                                   Size = 2*n_c_domains + 1. For each rank, we
103                                   have an index for standard halo and one
104                                   for extended halo. */
105 
106   cs_lnum_t  *send_perio_lst ; /* For each transformation and for each type of
107                                   halo on each communicating rank, we store
108                                   2 values:
109                                    - start index,
110                                    - number of elements. */
111 
112   /* halo features : receive from distant ranks */
113 
114   cs_lnum_t  n_elts[2];       /* Numer of ghost elements in halo
115                                  n_elts[0] = standard elements
116                                  n_elts[1] = extended + standard elements */
117 
118   cs_lnum_t  *index;        /* Index on halo sections;
119                                Size = 2*n_c_domains. For each rank, we
120                                have an index for the standard halo and one
121                                for the extended halo. */
122 
123   cs_lnum_t  *perio_lst;    /* For each transformation and for each type of halo
124                                on each communicating rank, we store 2 values:
125                                  - start index,
126                                  - number of elements. */
127 
128   /* Organisation of perio_lst:
129 
130          -------------------------------------------------
131     T1:  |   |   |   |   |   |   |   |   |   |   |   |   |
132          -------------------------------------------------
133           idx  n  idx  n  idx  n  idx  n  idx  n  idx  n
134           ______  ______  ______  ______  ______  ______
135            std     ext     std     ext     std     ext
136            ___________     ___________     ___________
137              rank 0          rank 1          rank 2
138 
139          -------------------------------------------------
140     T2:  |   |   |   |   |   |   |   |   |   |   |   |   |
141          -------------------------------------------------
142           idx  n  idx  n  idx  n  idx  n  idx  n  idx  n
143           ______  ______  ______  ______  ______  ______
144            std     ext     std     ext     std     ext
145            ___________     ___________     ___________
146              rank 0          rank 1          rank 2
147 
148          -------------------------------------------------
149     T3:  |   |   |   |   |   |   |   |   |   |   |   |   |
150          -------------------------------------------------
151           idx  n  idx  n  idx  n  idx  n  idx  n  idx  n
152           ______  ______  ______  ______  ______  ______
153            std     ext     std     ext     std     ext
154            ___________     ___________     ___________
155              rank 0          rank 1          rank 2
156 
157   etc...
158 
159   */
160 
161 #if defined(HAVE_MPI)
162 
163   MPI_Group   c_domain_group;    /* Group of connected domains */
164   cs_lnum_t  *c_domain_s_shift;  /* Target buffer shift for distant
165                                     ranks using one-sided get */
166 #endif
167 
168 } cs_halo_t;
169 
170 /*! Structure to maintain halo exchange state */
171 
172 typedef struct _cs_halo_state_t  cs_halo_state_t;
173 
174 /*=============================================================================
175  * Global static variables
176  *============================================================================*/
177 
178 /*============================================================================
179  *  Public function header for Fortran API
180  *============================================================================*/
181 
182 /*=============================================================================
183  * Public function prototypes
184  *============================================================================*/
185 
186 /*----------------------------------------------------------------------------*/
187 /*!
188  * \brief Create a halo structure given an interface set.
189  *
190  * \param[in]  ifs  pointer to a cs_interface_set structure
191  *
192  * \return  pointer to created cs_halo_t structure
193  */
194 /*----------------------------------------------------------------------------*/
195 
196 cs_halo_t *
197 cs_halo_create(const cs_interface_set_t  *ifs);
198 
199 /*----------------------------------------------------------------------------*/
200 /*!
201  * \brief Ready halo for use.
202  *
203  * This function should be called after building a halo using the
204  * cs_halo_create_function and defined locally.
205  * It is called automatically by cs_halo_create_from_ref and
206  * cs_halo_create_from_rank_neigbors so does not need to be called again
207  * using these functions.
208  *
209  * \param[in]  halo  pointer to halo structure
210  */
211 /*----------------------------------------------------------------------------*/
212 
213 void
214 cs_halo_create_complete(cs_halo_t  *halo);
215 
216 /*----------------------------------------------------------------------------*/
217 /*!
218  * \brief Create a halo structure, given a reference halo.
219  *
220  * \param[in]  ref  pointer to reference halo
221  *
222  * \return  pointer to created cs_halo_t structure
223  */
224 /*----------------------------------------------------------------------------*/
225 
226 cs_halo_t *
227 cs_halo_create_from_ref(const cs_halo_t  *ref);
228 
229 #if defined(HAVE_MPI)
230 
231 /*----------------------------------------------------------------------------*/
232 /*!
233  * \brief Create a halo structure from distant element distant ranks and ids.
234  *
235  * \remark  This function does not handle periodicity. For most matrix-vector,
236  *          products and similar operations, periodicity of translation an
237  *          even rotation could be handled with no specific halo information,
238  *          simply by assigning an equivalence between two periodic elements.
239  *          For rotation, this would require also applying a rotation through
240  *          the matrix coefficients (this would have the advantage of being
241  *          compatible with external libraries). An alternative would be
242  *          to add rotation information to a given halo as a second stage,
243  *          through a specialized operator which can be added in the future.
244  *
245  * \param[in]  rn              associated rank neighbors info
246  * \param[in]  n_local_elts    number of elements for local rank
247  * \param[in]  n_distant_elts  number of distant elements for local rank
248  * \param[in]  elt_rank_id     distant element rank index in rank neighbors,
249  *                             ordered by rank (size: n_distant_elts)
250  * \param[in]  elt_id          distant element id (at distant rank),
251  *                             ordered by rank (size: n_distant_elts)
252  *
253  * \return  pointer to created cs_halo_t structure
254  */
255 /*----------------------------------------------------------------------------*/
256 
257 cs_halo_t *
258 cs_halo_create_from_rank_neighbors(const cs_rank_neighbors_t  *rn,
259                                    cs_lnum_t                   n_local_elts,
260                                    cs_lnum_t                   n_distant_elts,
261                                    const int                   elt_rank_id[],
262                                    const cs_lnum_t             elt_id[]);
263 
264 #endif /* HAVE_MPI */
265 
266 /*----------------------------------------------------------------------------*/
267 /*!
268  * brief Destroy a halo structure.
269  *
270  * \param[in, out]  halo  pointer to pointer to cs_halo structure to destroy.
271  */
272 /*----------------------------------------------------------------------------*/
273 
274 void
275 cs_halo_destroy(cs_halo_t  **halo);
276 
277 /*----------------------------------------------------------------------------*/
278 /*!
279  * \brief Create a halo state structure.
280  *
281  * \return  pointer to created cs_halo_state_t structure.
282  */
283 /*----------------------------------------------------------------------------*/
284 
285 cs_halo_state_t *
286 cs_halo_state_create(void);
287 
288 /*----------------------------------------------------------------------------*/
289 /*!
290  * \brief Destroy a halo state structure.
291  *
292  * \param[in, out]  halo_state  pointer to pointer to cs_halo_state
293  *                              structure to destroy.
294  */
295 /*----------------------------------------------------------------------------*/
296 
297 void
298 cs_halo_state_destroy(cs_halo_state_t  **halo_state);
299 
300 /*----------------------------------------------------------------------------*/
301 /*!
302  * \brief Get pointer to default halo state structure.
303  *
304  * \return]  halo  pointer to pointer to cs_halo structure to destroy.
305  */
306 /*----------------------------------------------------------------------------*/
307 
308 cs_halo_state_t *
309 cs_halo_state_get_default(void);
310 
311 /*----------------------------------------------------------------------------*/
312 /*!
313  * \brief Compute required size for packing send data into dense buffer.
314  *
315  * \param[in]   halo        pointer to halo structure
316  * \param[in]   data_type   data type
317  * \param[in]   stride       number of (interlaced) values by entity
318  *
319  * \return  required size, in bytes
320  */
321 /*----------------------------------------------------------------------------*/
322 
323 static inline size_t
cs_halo_pack_size(const cs_halo_t * halo,cs_datatype_t data_type,int stride)324 cs_halo_pack_size(const cs_halo_t  *halo,
325                   cs_datatype_t     data_type,
326                   int               stride)
327 {
328   size_t elt_size = cs_datatype_size[data_type]*stride;
329   size_t pack_size = halo->n_send_elts[CS_HALO_EXTENDED] * elt_size;
330 
331   return pack_size;
332 }
333 
334 /*----------------------------------------------------------------------------
335  * Apply local cells renumbering to a halo
336  *
337  * parameters:
338  *   halo        <-- pointer to halo structure
339  *   new_cell_id <-- array indicating old -> new cell id (0 to n-1)
340  *---------------------------------------------------------------------------*/
341 
342 void
343 cs_halo_renumber_cells(cs_halo_t        *halo,
344                        const cs_lnum_t   new_cell_id[]);
345 
346 /*----------------------------------------------------------------------------
347  * Apply ghost cells renumbering to a halo
348  *
349  * parameters:
350  *   halo        <-- pointer to halo structure
351  *   old_cell_id <-- array indicating new -> old cell id (0 to n-1)
352  *---------------------------------------------------------------------------*/
353 
354 void
355 cs_halo_renumber_ghost_cells(cs_halo_t        *halo,
356                              const cs_lnum_t   old_cell_id[]);
357 
358 /*----------------------------------------------------------------------------*/
359 /*!
360  * \brief Initialize halo state prior to packing halo data to send.
361  *
362  * A local state handler may be provided, or the default state handler will
363  * be used.
364  *
365  * This function is included in \ref cs_halo_sync_pack, but may be called
366  * separately for specific implementations, such as for accelerator devices.
367  *
368  * A local state and/or buffer may be provided, or the default (global) state
369  * and buffer will be used. If provided explicitely,
370  * the buffer must be of sufficient size.
371  *
372  * \param[in]       halo        pointer to halo structure
373  * \param[in]       sync_mode   synchronization mode (standard or extended)
374  * \param[in]       data_type   data type
375  * \param[in]       stride      number of (interlaced) values by entity
376  * \param[out]      send_buf    pointer to send buffer, NULL for global buffer
377  * \param[in, out]  hs          pointer to halo state, NULL for global state
378  *
379  * \return  pointer to halo send buffer
380  */
381 /*----------------------------------------------------------------------------*/
382 
383 void *
384 cs_halo_sync_pack_init_state(const cs_halo_t  *halo,
385                              cs_halo_type_t    sync_mode,
386                              cs_datatype_t     data_type,
387                              int               stride,
388                              void             *send_buf,
389                              cs_halo_state_t  *hs);
390 
391 /*----------------------------------------------------------------------------*/
392 /*!
393  * \brief Pack halo data to send into dense buffer.
394  *
395  * A local state handler may be provided, or the default state handler will
396  * be used.
397  *
398  * A local state and/or buffer may be provided, or the default (global) state
399  * and buffer will be used. If provided explicitely,
400  * the buffer must be of sufficient size.
401  *
402  * \param[in]       halo        pointer to halo structure
403  * \param[in]       sync_mode   synchronization mode (standard or extended)
404  * \param[in]       data_type   data type
405  * \param[in]       stride      number of (interlaced) values by entity
406  * \param[in]       val         pointer to variable value array
407  * \param[out]      send_buf    pointer to send buffer, NULL for global buffer
408  * \param[in, out]  hs          pointer to halo state, NULL for global state
409  */
410 /*----------------------------------------------------------------------------*/
411 
412 void
413 cs_halo_sync_pack(const cs_halo_t  *halo,
414                   cs_halo_type_t    sync_mode,
415                   cs_datatype_t     data_type,
416                   int               stride,
417                   void             *val,
418                   void             *send_buf,
419                   cs_halo_state_t  *hs);
420 
421 #if defined(HAVE_ACCEL)
422 
423 /*----------------------------------------------------------------------------*/
424 /*!
425  * \brief Pack halo data to send into dense buffer on accelerator device.
426  *
427  * A local state handler may be provided, or the default state handler will
428  * be used.
429  *
430  * A local state and/or buffer may be provided, or the default (global) state
431  * and buffer will be used. If provided explicitely,
432  * the buffer must be of sufficient size.
433  *
434  * \param[in]       halo        pointer to halo structure
435  * \param[in]       sync_mode   synchronization mode (standard or extended)
436  * \param[in]       data_type   data type
437  * \param[in]       stride      number of (interlaced) values by entity
438  * \param[in]       val         pointer to variable value array (on device)
439  * \param[out]      send_buf    pointer to send buffer (on device),
440  *                              NULL for global buffer
441  * \param[in, out]  hs          pointer to halo state, NULL for global state
442  */
443 /*----------------------------------------------------------------------------*/
444 
445 void
446 cs_halo_sync_pack_d(const cs_halo_t  *halo,
447                     cs_halo_type_t    sync_mode,
448                     cs_datatype_t     data_type,
449                     int               stride,
450                     void             *val,
451                     void             *send_buf,
452                     cs_halo_state_t  *hs);
453 
454 #endif /* defined(HAVE_ACCEL) */
455 
456 /*----------------------------------------------------------------------------*/
457 /*!
458  * \brief Launch update array of values in case of parallelism or periodicity.
459  *
460  * This function aims at copying main values from local elements
461  * (id between 1 and n_local_elements) to ghost elements on distant ranks
462  * (id between n_local_elements + 1 to n_local_elements_with_halo).
463  *
464  * The cs_halo_sync_pack function should have been called before this function,
465  * using the same hs argument.
466  *
467  * \param[in]       halo        pointer to halo structure
468  * \param[in]       val         pointer to variable value array
469  * \param[in, out]  hs          pointer to halo state, NULL for global state
470  */
471 /*----------------------------------------------------------------------------*/
472 
473 void
474 cs_halo_sync_start(const cs_halo_t  *halo,
475                    void             *val,
476                    cs_halo_state_t  *hs);
477 
478 /*----------------------------------------------------------------------------*/
479 /*!
480  * \brief Wait for completion of update array of values in case of
481  *  parallelism or periodicity.
482  *
483  * This function aims at copying main values from local elements
484  * (id between 1 and n_local_elements) to ghost elements on distant ranks
485  * (id between n_local_elements + 1 to n_local_elements_with_halo).
486  *
487  * The cs_halo_sync_start function should have been called before this function,
488  * using the same hs argument.
489  *
490  * \param[in]       halo        pointer to halo structure
491  * \param[in]       val         pointer to variable value array
492  * \param[in, out]  hs          pointer to halo state, NULL for global state
493  */
494 /*----------------------------------------------------------------------------*/
495 
496 void
497 cs_halo_sync_wait(const cs_halo_t  *halo,
498                   void             *val,
499                   cs_halo_state_t  *hs);
500 
501 /*----------------------------------------------------------------------------*/
502 /*!
503  * \brief Update array of values in case of parallelism or periodicity.
504  *
505  * This function aims at copying main values from local elements
506  * (id between 1 and n_local_elements) to ghost elements on distant ranks
507  * (id between n_local_elements + 1 to n_local_elements_with_halo).
508  *
509  * \param[in]   halo        pointer to halo structure
510  * \param[in]   sync_mode   synchronization mode (standard or extended)
511  * \param[in]   data_type   data type
512  * \param[in]   stride      number of (interlaced) values by entity
513  * \param[in]   val         pointer to variable value array
514  */
515 /*----------------------------------------------------------------------------*/
516 
517 void
518 cs_halo_sync(const cs_halo_t  *halo,
519              cs_halo_type_t    sync_mode,
520              cs_datatype_t     data_type,
521              int               stride,
522              void             *val);
523 
524 /*----------------------------------------------------------------------------
525  * Update array of any type of halo values in case of parallelism or
526  * periodicity.
527  *
528  * Data is untyped; only its size is given, so this function may also
529  * be used to synchronize interleaved multidimendsional data, using
530  * size = element_size*dim (assuming a homogeneous environment, at least
531  * as far as data encoding goes).
532  *
533  * This function aims at copying main values from local elements
534  * (id between 1 and n_local_elements) to ghost elements on distant ranks
535  * (id between n_local_elements + 1 to n_local_elements_with_halo).
536  *
537  * parameters:
538  *   halo      <-- pointer to halo structure
539  *   sync_mode <-- synchronization mode (standard or extended)
540  *   size      <-- size of each element
541  *   num       <-> pointer to local number value array
542  *----------------------------------------------------------------------------*/
543 
544 void
545 cs_halo_sync_untyped(const cs_halo_t  *halo,
546                      cs_halo_type_t    sync_mode,
547                      size_t            size,
548                      void             *val);
549 
550 /*----------------------------------------------------------------------------
551  * Update array of integer halo values in case of parallelism or periodicity.
552  *
553  * This function aims at copying main values from local elements
554  * (id between 1 and n_local_elements) to ghost elements on distant ranks
555  * (id between n_local_elements + 1 to n_local_elements_with_halo).
556  *
557  * parameters:
558  *   halo      <-- pointer to halo structure
559  *   sync_mode <-- synchronization mode (standard or extended)
560  *   num       <-> pointer to local number value array
561  *----------------------------------------------------------------------------*/
562 
563 void
564 cs_halo_sync_num(const cs_halo_t  *halo,
565                  cs_halo_type_t    sync_mode,
566                  cs_lnum_t         num[]);
567 
568 /*----------------------------------------------------------------------------
569  * Update array of variable (floating-point) halo values in case of
570  * parallelism or periodicity.
571  *
572  * This function aims at copying main values from local elements
573  * (id between 1 and n_local_elements) to ghost elements on distant ranks
574  * (id between n_local_elements + 1 to n_local_elements_with_halo).
575  *
576  * parameters:
577  *   halo      <-- pointer to halo structure
578  *   sync_mode <-- synchronization mode (standard or extended)
579  *   var       <-> pointer to variable value array
580  *----------------------------------------------------------------------------*/
581 
582 void
583 cs_halo_sync_var(const cs_halo_t  *halo,
584                  cs_halo_type_t    sync_mode,
585                  cs_real_t         var[]);
586 
587 /*----------------------------------------------------------------------------
588  * Update array of strided variable (floating-point) halo values in case
589  * of parallelism or periodicity.
590  *
591  * This function aims at copying main values from local elements
592  * (id between 1 and n_local_elements) to ghost elements on distant ranks
593  * (id between n_local_elements + 1 to n_local_elements_with_halo).
594  *
595  * parameters:
596  *   halo      <-- pointer to halo structure
597  *   sync_mode <-- synchronization mode (standard or extended)
598  *   var       <-> pointer to variable value array
599  *   stride    <-- number of (interlaced) values by entity
600  *----------------------------------------------------------------------------*/
601 
602 void
603 cs_halo_sync_var_strided(const cs_halo_t  *halo,
604                          cs_halo_type_t    sync_mode,
605                          cs_real_t         var[],
606                          int               stride);
607 
608 /*----------------------------------------------------------------------------
609  * Return MPI_Barrier usage flag.
610  *
611  * returns:
612  *   true if MPI barriers are used after posting receives and before posting
613  *   sends, false otherwise
614  *---------------------------------------------------------------------------*/
615 
616 bool
617 cs_halo_get_use_barrier(void);
618 
619 /*----------------------------------------------------------------------------
620  * Set MPI_Barrier usage flag.
621  *
622  * parameters:
623  *   use_barrier <-- true if MPI barriers should be used after posting
624  *                   receives and before posting sends, false otherwise.
625  *---------------------------------------------------------------------------*/
626 
627 void
628 cs_halo_set_use_barrier(bool use_barrier);
629 
630 /*----------------------------------------------------------------------------*/
631 /*!
632  * \brief Get default communication mode for halo exchange.
633  *
634  * \return  allocation mode
635  */
636 /*----------------------------------------------------------------------------*/
637 
638 cs_halo_comm_mode_t
639 cs_halo_get_comm_mode(void);
640 
641 /*----------------------------------------------------------------------------*/
642 /*!
643  * \brief Set default communication mode for halo exchange.
644  *
645  * \param[in]  mode  allocation mode to set
646  */
647 /*----------------------------------------------------------------------------*/
648 
649 void
650 cs_halo_set_comm_mode(cs_halo_comm_mode_t  mode);
651 
652 /*----------------------------------------------------------------------------*/
653 /*!
654  * \brief Get default host/device allocation mode for message packing arrays.
655  *
656  * \return  allocation mode
657  */
658 /*----------------------------------------------------------------------------*/
659 
660 cs_alloc_mode_t
661 cs_halo_get_buffer_alloc_mode(void);
662 
663 /*----------------------------------------------------------------------------*/
664 /*!
665  * \brief Set default host/device allocation mode for message packing arrays.
666  *
667  * \param[in]  mode  allocation mode to set
668  */
669 /*----------------------------------------------------------------------------*/
670 
671 void
672 cs_halo_set_buffer_alloc_mode(cs_alloc_mode_t  mode);
673 
674 /*----------------------------------------------------------------------------
675  * Dump a cs_halo_t structure.
676  *
677  * parameters:
678  *   halo           <--  pointer to cs_halo_t struture
679  *   print_level    <--  0 only dimensions and indexes are printed, else (1)
680  *                       everything is printed
681  *---------------------------------------------------------------------------*/
682 
683 void
684 cs_halo_dump(const cs_halo_t  *halo,
685              int               print_level);
686 
687 /*----------------------------------------------------------------------------*/
688 
689 END_C_DECLS
690 
691 #endif /* __CS_HALO_H__ */
692