1 /*============================================================================
2  * \file Handling of interfaces associating mesh elements (such as
3  * inter-processor or periodic connectivity between cells, faces,
4  * or vertices).
5  *============================================================================*/
6 
7 /*
8   This file is part of Code_Saturne, a general-purpose CFD tool.
9 
10   Copyright (C) 1998-2021 EDF S.A.
11 
12   This program is free software; you can redistribute it and/or modify it under
13   the terms of the GNU General Public License as published by the Free Software
14   Foundation; either version 2 of the License, or (at your option) any later
15   version.
16 
17   This program is distributed in the hope that it will be useful, but WITHOUT
18   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
20   details.
21 
22   You should have received a copy of the GNU General Public License along with
23   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
24   Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 */
26 
27 /*----------------------------------------------------------------------------*/
28 
29 #include "cs_defs.h"
30 
31 /*----------------------------------------------------------------------------
32  * Standard C library headers
33  *----------------------------------------------------------------------------*/
34 
35 #include <assert.h>
36 #include <stdio.h>
37 #include <string.h>
38 
39 /*----------------------------------------------------------------------------
40  *  Local headers
41  *----------------------------------------------------------------------------*/
42 
43 #include "bft_error.h"
44 #include "bft_mem.h"
45 #include "bft_printf.h"
46 
47 #include "cs_all_to_all.h"
48 #include "cs_base.h"
49 #include "cs_block_dist.h"
50 #include "cs_order.h"
51 
52 #include "fvm_periodicity.h"
53 
54 /*----------------------------------------------------------------------------
55  *  Header for the current file
56  *----------------------------------------------------------------------------*/
57 
58 #include "cs_interface.h"
59 
60 /*----------------------------------------------------------------------------*/
61 
62 BEGIN_C_DECLS
63 
64 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
65 
66 /*============================================================================
67  * Local structure definitions
68  *============================================================================*/
69 
70 /*----------------------------------------------------------------------------
71  * Structure defining an interface
72  *----------------------------------------------------------------------------*/
73 
74 struct _cs_interface_t {
75 
76   int          rank;           /* Associated rank */
77 
78   cs_lnum_t    size;           /* Number of equivalent elements */
79 
80   int          tr_index_size;  /* Size of perio_index */
81   cs_lnum_t   *tr_index;       /* Index of sub-sections in elt_id, distant_id,
82                                   and match_id for different transformations;
83                                   purely parallel equivalences appear at
84                                   position 0, equivalences through periodic
85                                   transform i appear at position i+1;
86                                   note that send_order crosses subsection
87                                   boundaries, and is not indexed by this array.
88                                   NULL in absence of transformations */
89 
90   cs_lnum_t   *elt_id;         /* Local element ids (ordered, always present) */
91   cs_lnum_t   *match_id;       /* Matching element ids for same-rank interface,
92                                   distant element ids such that match_id[i]
93                                   on distant rank matches local_id[i]
94                                   (temporary life cycle even in parallel) */
95   cs_lnum_t   *send_order;     /* Local element ids ordered so that
96                                   receive matches elt_id for other-rank
97                                   interfaces, and match_id[send_order[i]]
98                                   equals elt_id[i] on same-rank interface */
99 
100 };
101 
102 /*----------------------------------------------------------------------------
103  * Structure defining a set of interfaces
104  *----------------------------------------------------------------------------*/
105 
106 struct _cs_interface_set_t {
107 
108   int                       size;          /* Number of interfaces */
109 
110   cs_interface_t          **interfaces;    /* Interface structures array */
111 
112   const fvm_periodicity_t  *periodicity;   /* Optional periodicity structure */
113 
114   int                       match_id_rc;   /* Match_id reference count */
115 
116 #if defined(HAVE_MPI)
117   MPI_Comm                  comm;          /* Associated communicator */
118 #endif
119 };
120 
121 /*----------------------------------------------------------------------------
122  * Local structure defining a temporary list of interfaces
123  *----------------------------------------------------------------------------*/
124 
125 typedef struct {
126 
127   int          count;    /* Number of equivalences */
128   cs_lnum_t   *shift;    /* Index of per-equivalence data in rank[] and num[] */
129   int         *rank;     /* Rank associated with each element */
130   int         *tr_id;    /* Transformation id associated with each element,
131                             + 1, with 0 indicating no transformation
132                             (NULL in absence of periodicity) */
133   cs_lnum_t   *num;      /* Local number associated with each element */
134 
135 } _per_block_equiv_t;
136 
137 /*----------------------------------------------------------------------------
138  * Local structure defining a temporary list of periodic interfaces
139  *----------------------------------------------------------------------------*/
140 
141 typedef struct {
142 
143   int          count;     /* Number of periodic couples */
144   cs_lnum_t   *block_id;  /* local id in block */
145   int         *tr_id;     /* Transform id associated with each couple */
146   int         *shift;     /* Index of per-couple data in rank[] and num[] */
147   int         *rank;      /* Ranks associated with periodic elements */
148   cs_lnum_t   *num;       /* Local numbers associated with periodic elements */
149 
150 } _per_block_period_t;
151 
152 /*=============================================================================
153  * Private function definitions
154  *============================================================================*/
155 
156 /*----------------------------------------------------------------------------
157  * Creation of an empty interface between elements of a same type.
158  *
159  * This interface may be used to identify equivalent vertices or faces using
160  * domain splitting, as well as periodic elements (on the same or on
161  * distant ranks).
162  *
163  * returns:
164  *  pointer to allocated interface structure
165  *----------------------------------------------------------------------------*/
166 
167 static cs_interface_t *
_cs_interface_create(void)168 _cs_interface_create(void)
169 {
170   cs_interface_t  *_interface;
171 
172   BFT_MALLOC(_interface, 1, cs_interface_t);
173 
174   _interface->rank = -1;
175   _interface->size = 0;
176 
177   _interface->tr_index_size = 0;
178   _interface->tr_index = NULL;
179 
180   _interface->elt_id = NULL;
181   _interface->match_id = NULL;
182   _interface->send_order = NULL;
183 
184   return _interface;
185 }
186 
187 /*----------------------------------------------------------------------------
188  * Destruction of an interface.
189  *
190  * parameters:
191  *   itf <-> pointer to pointer to structure to destroy
192  *
193  * returns:
194  *   NULL pointer
195  *----------------------------------------------------------------------------*/
196 
197 static void
_cs_interface_destroy(cs_interface_t ** itf)198 _cs_interface_destroy(cs_interface_t  **itf)
199 {
200   cs_interface_t  *_itf = *itf;
201 
202   if (_itf != NULL) {
203     BFT_FREE(_itf->tr_index);
204     BFT_FREE(_itf->elt_id);
205     BFT_FREE(_itf->match_id);
206     BFT_FREE(_itf->send_order);
207     BFT_FREE(_itf);
208   }
209 
210   *itf = _itf;
211 }
212 
213 /*----------------------------------------------------------------------------
214  * Dump printout of an interface.
215  *
216  * parameters:
217  *   itf <-- pointer to structure that should be dumped
218  *----------------------------------------------------------------------------*/
219 
220 static void
_cs_interface_dump(const cs_interface_t * itf)221 _cs_interface_dump(const cs_interface_t  *itf)
222 {
223   int i, section_id;
224   cs_lnum_t start_id, end_id;
225 
226   int  tr_index_size = 2;
227   cs_lnum_t   _tr_index[2] = {0, 0};
228   const cs_lnum_t   *tr_index = _tr_index;
229 
230   if (itf == NULL) {
231     bft_printf("  interface: nil\n");
232     return;
233   }
234 
235   bft_printf("  interface:             %p\n"
236              "  associated rank:       %d\n"
237              "  size:                  %llu\n"
238              "  transform index size:  %d\n",
239              (const void *)itf,
240              itf->rank,
241              (unsigned long long)(itf->size),
242              itf->tr_index_size);
243 
244   if (itf->tr_index_size > 0) {
245     bft_printf("  transform index:\n");
246     for (i = 0; i < itf->tr_index_size; i++)
247       bft_printf("    %5d %lu\n", i, (unsigned long)itf->tr_index[i]);
248   }
249 
250 
251   _tr_index[1] = itf->size;
252 
253   if (itf->tr_index_size > 0) {
254     tr_index_size = itf->tr_index_size;
255     tr_index = itf->tr_index;
256   }
257 
258   /* Case for other-rank interface */
259 
260   if (itf->match_id != NULL) {
261 
262     for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
263 
264       if (section_id == 0)
265         bft_printf("\n"
266                    "            id      elt_id   match_id (parallel)\n");
267       else
268         bft_printf("\n"
269                    "            id      elt_id   match_id (transform %d)\n",
270                    section_id - 1);
271 
272       start_id = tr_index[section_id];
273       end_id = tr_index[section_id + 1];
274 
275       for (i = start_id; i < end_id; i++)
276         bft_printf("    %10ld %10ld %10ld\n",
277                    (long)i, (long)itf->elt_id[i], (long)itf->match_id[i]);
278 
279     }
280 
281   }
282 
283   else {
284 
285     for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
286 
287       if (section_id == 0)
288         bft_printf("\n"
289                    "            id      elt_id (parallel)\n");
290       else
291         bft_printf("\n"
292                    "            id      elt_id (transform %d)\n",
293                    section_id - 1);
294 
295       start_id = tr_index[section_id];
296       end_id = tr_index[section_id + 1];
297 
298       for (i = start_id; i < end_id; i++)
299         bft_printf("    %10ld %10ld\n", (long)i, (long)itf->elt_id[i]);
300 
301     }
302 
303   }
304 
305   /* Print send order separately, as it is section-independant */
306 
307   if (itf->send_order != NULL) {
308 
309     bft_printf("\n"
310                "            id      send_order\n");
311 
312     for (i = 0; i < itf->size; i++)
313       bft_printf("    %10ld %10ld\n", (long)i, (long)itf->send_order[i]);
314 
315   }
316 
317   bft_printf("\n");
318 }
319 
320 /*----------------------------------------------------------------------------
321  * Sort and remove duplicates from periodic tuple information.
322  *
323  * parameters:
324  *   n_block_tuples  <-> number of tuples in current block
325  *   block_tuples    <-> tuple information for this rank: for each tuple,
326  *                       {global number of local element,
327  *                        global number of periodic element,
328  *                        transform id}
329  *----------------------------------------------------------------------------*/
330 
331 static void
_sort_periodic_tuples(cs_lnum_t * n_block_tuples,cs_gnum_t ** block_tuples)332 _sort_periodic_tuples(cs_lnum_t    *n_block_tuples,
333                       cs_gnum_t   **block_tuples)
334 {
335   cs_lnum_t   i, j, k;
336 
337   cs_lnum_t    n_tuples = *n_block_tuples;
338   cs_lnum_t   *order = NULL;
339   cs_gnum_t   *tuples = *block_tuples;
340   cs_gnum_t   *tuples_tmp = NULL;
341 
342   if (n_tuples < 1)
343     return;
344 
345   /* Sort periodic tuples by local correspondant */
346 
347   BFT_MALLOC(order, n_tuples, cs_lnum_t);
348   BFT_MALLOC(tuples_tmp, n_tuples*3, cs_gnum_t);
349 
350   cs_order_gnum_allocated_s(NULL,
351                             tuples,
352                             3,
353                             order,
354                             n_tuples);
355 
356   /* Copy to temporary array, ignoring duplicates */
357 
358   k = order[0]*3;
359   tuples_tmp[0] = tuples[k];
360   tuples_tmp[1] = tuples[k + 1];
361   tuples_tmp[2] = tuples[k + 2];
362   j = 3;
363 
364   for (i = 1; i < n_tuples; i++) {
365     k = order[i] * 3;
366     if (   (tuples[k]   != tuples_tmp[j-3])
367         || (tuples[k+1] != tuples_tmp[j-2])
368         || (tuples[k+2] != tuples_tmp[j-1])) {
369       tuples_tmp[j]     = tuples[k];
370       tuples_tmp[j + 1] = tuples[k + 1];
371       tuples_tmp[j + 2] = tuples[k + 2];
372       j += 3;
373     }
374   }
375   n_tuples = j / 3;
376 
377   BFT_FREE(order);
378 
379   /* Resize input/outpout array if duplicates were removed */
380 
381   if (n_tuples <= *n_block_tuples) {
382     BFT_REALLOC(tuples, n_tuples*3, cs_gnum_t);
383     *n_block_tuples = n_tuples;
384     *block_tuples = tuples;
385   }
386 
387   /* Copy sorted data to input/output array and free temporary storage */
388 
389   memcpy(tuples, tuples_tmp, sizeof(cs_gnum_t)*n_tuples*3);
390 
391   BFT_FREE(tuples_tmp);
392 }
393 
394 /*----------------------------------------------------------------------------
395  * Extract periodicity transform data necessary for periodic combinations.
396  *
397  * Builds a square transformation combination matrix associating a combination
398  * transform id with transform ids of levels lower than that given by
399  * the level argument; the number of rows and columns is thus equal to the
400  * number of transformations of level lower than the argument.
401  * Entries corresponding to impossible combinations are set to -1;
402  *
403  * The caller is responsible for freeing the tr_combine array returned by
404  * this function.
405  *
406  * parameters:
407  *   periodicity  <-- periodicity information
408  *   level        --> transform combination level (1 or 2)
409  *   n_rows       --> number of rows of combination matrix
410  *                    (equals transform_level_index[level])
411  *   tr_combine   --> combination id matrix (size n_rows * n_rows)
412  *----------------------------------------------------------------------------*/
413 
414 static void
_transform_combine_info(const fvm_periodicity_t * periodicity,int level,int * n_rows,int ** tr_combine)415 _transform_combine_info(const fvm_periodicity_t   *periodicity,
416                         int                        level,
417                         int                       *n_rows,
418                         int                      **tr_combine)
419 {
420   int  i;
421   int  tr_level_idx[4], parent_id[2];
422 
423   int n_vals_1 = 0, n_vals_2 = 0;
424   int n_rows_1 = 0, n_rows_2 = 0;
425   int *tr_combine_1, *tr_combine_2 = NULL;
426 
427   assert(periodicity != NULL);
428   assert(level == 1 || level == 2);
429 
430   /* Extract base periodicity dimension info */
431 
432   fvm_periodicity_get_tr_level_idx(periodicity, tr_level_idx);
433 
434   /* We always need the level0 x level0 -> level1 array */
435 
436   n_rows_1 = tr_level_idx[1];
437   n_vals_1 = n_rows_1*n_rows_1;
438 
439   BFT_MALLOC(tr_combine_1, n_vals_1, int);
440 
441   for (i = 0; i < n_vals_1; i++)
442     tr_combine_1[i] = -1;
443 
444   for (i = tr_level_idx[1]; i < tr_level_idx[2]; i++) {
445 
446     fvm_periodicity_get_parent_ids(periodicity, i, parent_id);
447 
448     assert(parent_id[0] > -1 && parent_id[1] > -1);
449     assert(parent_id[0] < n_rows_1 && parent_id[1] < n_rows_1);
450 
451     tr_combine_1[parent_id[0]*n_rows_1 + parent_id[1]] = i;
452     tr_combine_1[parent_id[1]*n_rows_1 + parent_id[0]] = i;
453 
454   }
455 
456   /* For level 1 transforms, we are done once return values are set */
457 
458   if (level == 1) {
459 
460     *n_rows = n_rows_1;
461     *tr_combine = tr_combine_1;
462 
463   }
464 
465   /* Handle level 2 transforms */
466 
467   else { /* if (level == 2) */
468 
469     int  tr_01, tr_02, tr_12;
470     int  comp_id[3];
471 
472     n_rows_2 = tr_level_idx[2];
473     n_vals_2 = n_rows_2*n_rows_2;
474 
475     /* Allocate and populate array */
476 
477     BFT_MALLOC(tr_combine_2, n_vals_2, int);
478 
479     for (i = 0; i < n_vals_2; i++)
480       tr_combine_2[i] = -1;
481 
482     for (i = tr_level_idx[2]; i < tr_level_idx[3]; i++) {
483 
484       fvm_periodicity_get_components(periodicity, i, comp_id);
485 
486       assert(comp_id[0] > -1 && comp_id[1] > -1 && comp_id[2] > -1);
487 
488       tr_01 = tr_combine_1[comp_id[0]*n_rows_1 + comp_id[1]];
489       tr_02 = tr_combine_1[comp_id[0]*n_rows_1 + comp_id[2]];
490       tr_12 = tr_combine_1[comp_id[1]*n_rows_1 + comp_id[2]];
491 
492       assert(tr_01 > -1 && tr_02 > -1 && tr_12 > -1);
493 
494       tr_combine_2[tr_01*n_rows_2 + comp_id[2]] = i;
495       tr_combine_2[comp_id[2]*n_rows_2 + tr_01] = i;
496 
497       tr_combine_2[tr_02*n_rows_2 + comp_id[1]] = i;
498       tr_combine_2[comp_id[1]*n_rows_2 + tr_02] = i;
499 
500       tr_combine_2[tr_12*n_rows_2 + comp_id[0]] = i;
501       tr_combine_2[comp_id[0]*n_rows_2 + tr_12] = i;
502 
503     }
504 
505     BFT_FREE(tr_combine_1);
506 
507     /* Set return values */
508 
509     *n_rows = n_rows_2;
510     *tr_combine = tr_combine_2;
511   }
512 
513 }
514 
515 #if defined(HAVE_MPI)
516 
517 /*----------------------------------------------------------------------------
518  * Maximum global number associated with an I/O numbering structure
519  *
520  * parameters:
521  *   n_elts     <-- local number of elements
522  *   global_num <-- global number (id) associated with each element
523  *   comm       <-- associated MPI communicator
524  *
525  * returns:
526  *   maximum global number associated with the I/O numbering
527  *----------------------------------------------------------------------------*/
528 
529 static cs_gnum_t
_global_num_max(cs_lnum_t n_elts,const cs_gnum_t global_num[],MPI_Comm comm)530 _global_num_max(cs_lnum_t         n_elts,
531                 const cs_gnum_t   global_num[],
532                 MPI_Comm          comm)
533 {
534   cs_gnum_t  global_max;
535   cs_gnum_t  local_max = 0;
536 
537   /* Get maximum global number value */
538 
539   for (cs_lnum_t i = 0; i < n_elts; i++) {
540     if (global_num[i] > local_max)
541       local_max = global_num[i];
542   }
543 
544   MPI_Allreduce(&local_max, &global_max, 1, CS_MPI_GNUM, MPI_MAX, comm);
545 
546   return global_max;
547 }
548 
549 /*----------------------------------------------------------------------------
550  * Build temporary equivalence structure for data in a given block,
551  * and associate an equivalence id to received elements (-1 for elements
552  * with no correponding elements)
553  *
554  * parameters:
555  *   n_elts_recv     <-- number of elements received
556  *   recv_rank       <-- source ranks received
557  *   recv_global_num <-- global numbering received
558  *   recv_num        <-- local numbering received
559  *   equiv_id        --> equivalence id for each element (-1 if none)
560  *
561  * returns:
562  *   temporary equivalence structure for block
563  *----------------------------------------------------------------------------*/
564 
565 #if defined(__INTEL_COMPILER)
566 #pragma optimization_level 2 /* Crash with O3 on IA64 with icc 9.1 20070320 */
567 #endif
568 
569 static _per_block_equiv_t
_block_global_num_to_equiv(cs_lnum_t n_elts_recv,const int recv_rank[],const cs_gnum_t recv_global_num[],const cs_lnum_t recv_num[],cs_lnum_t equiv_id[])570 _block_global_num_to_equiv(cs_lnum_t          n_elts_recv,
571                            const int          recv_rank[],
572                            const cs_gnum_t    recv_global_num[],
573                            const cs_lnum_t    recv_num[],
574                            cs_lnum_t          equiv_id[])
575 {
576   /* Initialize return structure */
577 
578   _per_block_equiv_t  e = {.count = 0,
579                            .shift = NULL,
580                            .rank  = NULL,
581                            .tr_id = NULL,
582                            .num   = NULL};
583 
584   if (n_elts_recv == 0)
585     return e;
586 
587   /* Determine equivalent elements; requires ordering to loop through buffer
588      by increasing number. */
589 
590   cs_lnum_t   *recv_order = NULL;
591   BFT_MALLOC(recv_order, n_elts_recv, cs_lnum_t);
592 
593   cs_order_gnum_allocated(NULL,
594                           recv_global_num,
595                           recv_order,
596                           n_elts_recv);
597 
598   /* Loop by increasing number: if two elements have the same global
599      number, they are equivalent. We do not increment equivalence counts
600      as soon as two elements are equivalent, as three equivalent elements
601      should have the same equivalence id. Rather, we increment the
602      equivalence counter when the previous element was part of an
603      equivalence and the current element is not part of this same
604      equivalence. */
605 
606   equiv_id[recv_order[0]] = -1;
607   cs_gnum_t prev_num = recv_global_num[recv_order[0]];
608 
609   for (cs_lnum_t i = 1; i < n_elts_recv; i++) {
610     cs_gnum_t cur_num = recv_global_num[recv_order[i]];
611     if (cur_num == prev_num) {
612       equiv_id[recv_order[i-1]] = e.count;
613       equiv_id[recv_order[i]]   = e.count;
614     }
615     else {
616       if (equiv_id[recv_order[i-1]] > -1)
617         e.count++;
618       equiv_id[recv_order[i]] = -1;
619     }
620     prev_num = cur_num;
621   }
622   if (equiv_id[recv_order[n_elts_recv-1]] > -1)
623     e.count++;
624 
625   BFT_FREE(recv_order);
626 
627   /* Count number of elements associated with each equivalence */
628 
629   int  *multiple;
630   BFT_MALLOC(multiple, e.count, int);
631   for (cs_lnum_t i = 0; i < e.count; multiple[i++] = 0);
632   for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
633     if (equiv_id[i] > -1)
634       multiple[equiv_id[i]] += 1;
635   }
636 
637   BFT_MALLOC(e.shift, e.count+1, cs_lnum_t);
638   e.shift[0] = 0;
639   for (cs_lnum_t i = 0; i < e.count; i++) {
640     e.shift[i+1] = e.shift[i] + multiple[i];
641     multiple[i] = 0;
642   }
643 
644   /* Build equivalence data */
645 
646   BFT_MALLOC(e.rank, e.shift[e.count], int);
647   BFT_MALLOC(e.num, e.shift[e.count], cs_lnum_t);
648 
649   for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
650     if (equiv_id[i] > -1) {
651       cs_lnum_t j = e.shift[equiv_id[i]] + multiple[equiv_id[i]];
652       e.rank[j] = recv_rank[i];
653       e.num[j] = recv_num[i];
654       multiple[equiv_id[i]] += 1;
655     }
656   }
657 
658   BFT_FREE(multiple);
659 
660   return e;
661 }
662 
663 /*----------------------------------------------------------------------------
664  * Build global interface data from flat equivalence data
665  * (usually prepared and received from distant ranks).
666  *
667  * parameters:
668  *   ifs  <-> pointer to structure that should be updated
669  *   tr_index_size       <-- size of transform index (number of transforms
670  *                           + 1 for idelement + 1 for past-the-end);
671  *                           0 or 1 if no transforms are present
672  *   n_elts_recv         <-- size of received data
673  *   equiv_recv          <-- flat (received) equivalence data; for each
674  *                           equivalence, we have:
675  *                           {local_number, n_equivalents,
676  *                            {distant_number, distant_rank}*n_equivalents}
677  *----------------------------------------------------------------------------*/
678 
679 static void
_interfaces_from_flat_equiv(cs_interface_set_t * ifs,int tr_index_size,cs_lnum_t n_elts_recv,const cs_lnum_t equiv_recv[])680 _interfaces_from_flat_equiv(cs_interface_set_t  *ifs,
681                             int                  tr_index_size,
682                             cs_lnum_t            n_elts_recv,
683                             const cs_lnum_t      equiv_recv[])
684 {
685   cs_lnum_t i, j, k, l;
686   cs_lnum_t local_num, distant_num, n_sub, n_elts_rank_tr_size;
687   int rank, tr_id;
688 
689   int _tr_index_size = tr_index_size;
690   int _tr_stride = tr_index_size > 1 ? tr_index_size - 1 : 1;
691   int max_rank = 0, n_ranks = 0, start_id = 0;
692   int recv_step = 1;
693 
694   cs_lnum_t   *n_elts_rank = NULL;
695   cs_lnum_t   *n_elts_rank_tr = NULL;
696   int *interface_id = NULL;
697 
698   cs_interface_t *_interface = NULL;
699 
700   if (_tr_index_size == 0) {
701     _tr_index_size = 1;
702     _tr_stride = 1;
703   }
704   else if (_tr_index_size > 1)
705     recv_step = 2;
706 
707   /* Compute size of subsections for each rank */
708 
709   i = 0;
710   while (i < n_elts_recv) {
711     i++;
712     n_sub = equiv_recv[i++];
713     for (j = 0; j < n_sub; j++) {
714       i += recv_step;
715       rank = equiv_recv[i++];
716       if (rank > max_rank)
717         max_rank = rank;
718     }
719   }
720 
721   BFT_MALLOC(n_elts_rank, max_rank + 1, cs_lnum_t);
722 
723   for (i = 0; i < max_rank + 1; n_elts_rank[i++] = 0);
724 
725   i = 0;
726   while (i < n_elts_recv) {
727     i++;
728     n_sub = equiv_recv[i++];
729     for (j = 0; j < n_sub; j++) {
730       i += recv_step;
731       rank = equiv_recv[i++];
732       n_elts_rank[rank] += 1;
733     }
734   }
735 
736   /* Build final data structures */
737 
738   n_ranks = 0;
739   for (i = 0; i < max_rank + 1; i++) {
740     if (n_elts_rank[i] > 0)
741       n_ranks++;
742   }
743 
744   /* (Re-)Allocate structures */
745 
746   start_id = ifs->size;
747 
748   ifs->size += n_ranks;
749 
750   BFT_REALLOC(ifs->interfaces,
751               ifs->size,
752               cs_interface_t *);
753 
754   for (i = start_id; i < ifs->size; i++)
755     ifs->interfaces[i] = _cs_interface_create();
756 
757   /* Initialize rank info and interface id */
758 
759   n_ranks = 0;
760   BFT_MALLOC(interface_id, max_rank + 1, int);
761   for (i = 0; i < max_rank + 1; i++) {
762     if (n_elts_rank[i] > 0) {
763       interface_id[i] = start_id + n_ranks++;
764       (ifs->interfaces[interface_id[i]])->rank = i;
765       (ifs->interfaces[interface_id[i]])->size = n_elts_rank[i];
766     }
767     else
768       interface_id[i] = -1;
769   }
770 
771   BFT_FREE(n_elts_rank);
772 
773   /* n_elts_rank_tr will be used as a position counter for new interfaces */
774 
775   n_elts_rank_tr_size = (ifs->size - start_id)*_tr_stride;
776   BFT_MALLOC(n_elts_rank_tr, n_elts_rank_tr_size, cs_lnum_t);
777   for (i = 0; i < n_elts_rank_tr_size; i++)
778     n_elts_rank_tr[i] = 0;
779 
780   for (i = start_id; i < ifs->size; i++) {
781 
782     _interface = ifs->interfaces[i];
783 
784     BFT_MALLOC(_interface->elt_id, _interface->size, cs_lnum_t);
785     BFT_MALLOC(_interface->match_id, _interface->size, cs_lnum_t);
786 
787     if (_tr_index_size > 1) {
788       _interface->tr_index_size = _tr_index_size;
789       BFT_MALLOC(_interface->tr_index, _interface->tr_index_size, cs_lnum_t);
790       for (k = 0; k < _interface->tr_index_size; k++)
791         _interface->tr_index[k] = 0;
792     }
793     else {
794       _interface->tr_index_size = 0;
795       _interface->tr_index = NULL;
796     }
797 
798   }
799 
800   /* In absence of transforms, we may build the interface in one pass */
801 
802   if (_tr_index_size < 2)  {
803 
804     i = 0;
805     while (i < n_elts_recv) {
806 
807       local_num = equiv_recv[i++];
808       n_sub = equiv_recv[i++];
809 
810       for (j = 0; j < n_sub; j++) {
811 
812         distant_num = equiv_recv[i++];
813         rank = equiv_recv[i++];
814 
815         _interface = ifs->interfaces[interface_id[rank]];
816         k = interface_id[rank] - start_id;
817 
818         _interface->elt_id[n_elts_rank_tr[k]] = local_num - 1;
819         _interface->match_id[n_elts_rank_tr[k]] = distant_num - 1;
820         n_elts_rank_tr[k] += 1;
821 
822       }
823     }
824 
825   }
826 
827   /* If we have transforms, build the transform index first */
828 
829   else { /* if (_tr_index_size > 1) */
830 
831     /* Initial count */
832 
833     i = 0;
834     while (i < n_elts_recv) {
835 
836       i++;
837       n_sub = equiv_recv[i++];
838 
839       for (j = 0; j < n_sub; j++) {
840 
841         i++;
842         tr_id = equiv_recv[i++];
843         rank = equiv_recv[i++];
844 
845         _interface = ifs->interfaces[interface_id[rank]];
846 
847         _interface->tr_index[tr_id + 1] += 1;
848 
849       }
850     }
851 
852     /* Build index from initial count */
853 
854     for (i = start_id; i < ifs->size; i++) {
855       _interface = ifs->interfaces[i];
856       _interface->tr_index[0] = 0;
857       for (j = 1; j < _tr_index_size; j++)
858         _interface->tr_index[j] += _interface->tr_index[j-1];
859     }
860 
861     /* Now populate the arrays */
862 
863     i = 0;
864     while (i < n_elts_recv) {
865 
866       local_num = equiv_recv[i++];
867       n_sub = equiv_recv[i++];
868 
869       for (j = 0; j < n_sub; j++) {
870 
871         distant_num = equiv_recv[i++];
872         tr_id = equiv_recv[i++];
873         rank = equiv_recv[i++];
874 
875         _interface = ifs->interfaces[interface_id[rank]];
876         k = (interface_id[rank] - start_id)*_tr_stride + tr_id;
877 
878         l = _interface->tr_index[tr_id] + n_elts_rank_tr[k];
879 
880         _interface->elt_id[l] = local_num - 1;
881         _interface->match_id[l] = distant_num - 1;
882 
883         n_elts_rank_tr[k] += 1;
884 
885       }
886     }
887 
888   }
889 
890   /* n_elts_rank will now be used as a position counter for new interfaces */
891 
892   BFT_FREE(n_elts_rank_tr);
893   BFT_FREE(interface_id);
894 }
895 
896 /*----------------------------------------------------------------------------
897  * Creation of a list of interfaces between elements of a same type.
898  *
899  * The global_num values need not be ordered or contiguous.
900  *
901  * parameters:
902  *   ifs  <-> pointer to structure that should be updated
903  *   n_elts              <-- local number of elements
904  *   global_num          <-- global number (id) associated with each element
905  *   comm                <-- associated MPI communicator
906  *----------------------------------------------------------------------------*/
907 
908 static void
_add_global_equiv(cs_interface_set_t * ifs,cs_lnum_t n_elts,const cs_gnum_t global_num[],MPI_Comm comm)909 _add_global_equiv(cs_interface_set_t  *ifs,
910                   cs_lnum_t            n_elts,
911                   const cs_gnum_t      global_num[],
912                   MPI_Comm             comm)
913 {
914   /* Initialization */
915 
916   int  size, local_rank;
917   MPI_Comm_size(comm, &size);
918   MPI_Comm_rank(comm, &local_rank);
919 
920   /* Get temporary maximum global number value */
921 
922   cs_gnum_t global_max = _global_num_max(n_elts,
923                                          global_num,
924                                          comm);
925 
926   cs_block_dist_info_t
927     bi = cs_block_dist_compute_sizes(local_rank,
928                                      size,
929                                      1,
930                                      0,
931                                      global_max);
932 
933   int flags = CS_ALL_TO_ALL_ORDER_BY_SRC_RANK;
934 
935   cs_all_to_all_t
936     *d = cs_all_to_all_create_from_block(n_elts,
937                                          flags,
938                                          global_num,
939                                          bi,
940                                          comm);
941 
942   cs_gnum_t *recv_global_num = cs_all_to_all_copy_array(d,
943                                                         CS_GNUM_TYPE,
944                                                         1,
945                                                         false, /* reverse */
946                                                         global_num,
947                                                         NULL);
948 
949   cs_lnum_t  *send_num = NULL;
950   BFT_MALLOC(send_num, n_elts, cs_lnum_t);
951   for (cs_lnum_t i = 0; i < n_elts; i++)
952     send_num[i] = i+1;
953 
954   cs_lnum_t *recv_num = cs_all_to_all_copy_array(d,
955                                                  CS_LNUM_TYPE,
956                                                  1,
957                                                  false, /* reverse */
958                                                  send_num,
959                                                  NULL);
960 
961   BFT_FREE(send_num);
962 
963   cs_lnum_t n_elts_recv = cs_all_to_all_n_elts_dest(d);
964 
965   int *src_rank = cs_all_to_all_get_src_rank(d);
966 
967   /* Build equivalence data */
968 
969   cs_lnum_t  *equiv_id = NULL;
970   BFT_MALLOC(equiv_id, n_elts_recv, cs_lnum_t);
971 
972   _per_block_equiv_t  e = _block_global_num_to_equiv(n_elts_recv,
973                                                      src_rank,
974                                                      recv_global_num,
975                                                      recv_num,
976                                                      equiv_id);
977 
978   /* Now free Memory */
979 
980   BFT_FREE(recv_num);
981   BFT_FREE(recv_global_num);
982 
983   /* Now that equivalences are marked, count for each rank; for each
984      equivalence, we will need to send the corresponding element
985      numbers and ranks, for a total of 2*(e.multiple[...] - 1) values. */
986 
987   cs_lnum_t  *block_index;
988   BFT_MALLOC(block_index, n_elts_recv+1, cs_lnum_t);
989 
990   block_index[0] = 0;
991   for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
992     cs_lnum_t n_eq = 0;
993     if (equiv_id[i] > -1) {
994       size_t e_id = equiv_id[i];
995       n_eq = 2*(e.shift[e_id+1] - e.shift[e_id]);
996     }
997     block_index[i+1] = block_index[i] + n_eq;
998   }
999 
1000   cs_lnum_t *part_index = cs_all_to_all_copy_index(d,
1001                                                    true, /* reverse */
1002                                                    block_index,
1003                                                    NULL);
1004 
1005   /* Now prepare new send buffer */
1006 
1007   cs_lnum_t  *block_equiv;
1008   BFT_MALLOC(block_equiv, block_index[n_elts_recv], cs_lnum_t);
1009 
1010   for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
1011     if (equiv_id[i] > -1) {
1012 
1013       cs_lnum_t *block_equiv_p = block_equiv + block_index[i];
1014 
1015       cs_lnum_t        e_id = equiv_id[i];
1016       cs_lnum_t        k = 2;
1017       const int        multiple = e.shift[e_id+1] - e.shift[e_id];
1018       const int       *rank_p = e.rank + e.shift[e_id];
1019       const cs_lnum_t *num_p  = e.num  + e.shift[e_id];
1020 
1021       for (cs_lnum_t j = 0; j < multiple; j++) {
1022         if (rank_p[j] == src_rank[i]) {
1023           block_equiv_p[0] = num_p[j];
1024           block_equiv_p[1] = multiple - 1;
1025         }
1026         else {
1027           block_equiv_p[k++] = num_p[j];
1028           block_equiv_p[k++] = rank_p[j];
1029         }
1030       }
1031 
1032     }
1033   }
1034 
1035   /* Free temporary (block) equivalence info */
1036 
1037   e.count = 0;
1038   BFT_FREE(e.shift);
1039   BFT_FREE(e.rank);
1040   if (e.tr_id != NULL)
1041     BFT_FREE(e.tr_id);
1042   BFT_FREE(e.num);
1043 
1044   BFT_FREE(src_rank);
1045   BFT_FREE(equiv_id);
1046 
1047   /* Send prepared block data to destination rank */
1048 
1049   cs_lnum_t *part_equiv
1050     = cs_all_to_all_copy_indexed(d,
1051                                  CS_LNUM_TYPE,
1052                                  true, /* reverse */
1053                                  block_index,
1054                                  block_equiv,
1055                                  part_index,
1056                                  NULL);
1057 
1058   cs_lnum_t n_vals_part = part_index[n_elts];
1059 
1060   /* At this stage, MPI operations are finished; we may release
1061      the corresponding counts and indexes */
1062 
1063   BFT_FREE(block_equiv);
1064   BFT_FREE(part_index);
1065   BFT_FREE(block_index);
1066 
1067   cs_all_to_all_destroy(&d);
1068 
1069   /* Add interface */
1070 
1071   _interfaces_from_flat_equiv(ifs,
1072                               1,
1073                               n_vals_part,
1074                               part_equiv);
1075 
1076   BFT_FREE(part_equiv);
1077 }
1078 
1079 /*----------------------------------------------------------------------------
1080  * Build eventual tuples belonging to combined periodicities.
1081  *
1082  * parameters:
1083  *   block_size       <-- size of the block handled by each processor
1084  *   periodicit       <-- periodicity information (NULL if none)
1085  *   n_block_tuples   <-> number of tuples in current block
1086  *   block_tuples     <-> tuple information for this rank: for each tuple,
1087  *                        {global number of local element,
1088  *                         global number of periodic element,
1089  *                         transform id}
1090  *   comm             <-- associated MPI communicator
1091  *----------------------------------------------------------------------------*/
1092 
1093 static void
_combine_periodic_tuples(size_t block_size,const fvm_periodicity_t * periodicity,cs_lnum_t * n_block_tuples,cs_gnum_t ** block_tuples,MPI_Comm comm)1094 _combine_periodic_tuples(size_t                     block_size,
1095                          const fvm_periodicity_t   *periodicity,
1096                          cs_lnum_t                 *n_block_tuples,
1097                          cs_gnum_t                **block_tuples,
1098                          MPI_Comm                   comm)
1099 {
1100   int  *tr_reverse_id = NULL;
1101 
1102   int         *send_rank = NULL;
1103   cs_gnum_t   *send_tuples = NULL;
1104 
1105   cs_lnum_t   _n_block_tuples = *n_block_tuples;
1106   cs_gnum_t   *_block_tuples = *block_tuples;
1107 
1108   assert (periodicity != NULL);
1109 
1110   /* Initialization */
1111 
1112   /* Build periodicity related arrays for quick access */
1113 
1114   int n_tr = fvm_periodicity_get_n_transforms(periodicity);
1115 
1116   BFT_MALLOC(tr_reverse_id, n_tr, int);
1117 
1118   for (int i = 0; i < n_tr; i++)
1119     tr_reverse_id[i] = fvm_periodicity_get_reverse_id(periodicity, i);
1120 
1121   /* Loop on combination levels */
1122 
1123   for (int level = 1;
1124        level < fvm_periodicity_get_n_levels(periodicity);
1125        level++) {
1126 
1127     int  n_rows = 0;
1128     int  *tr_combine = NULL;
1129 
1130     _transform_combine_info(periodicity,
1131                             level,
1132                             &n_rows,
1133                             &tr_combine);
1134 
1135     /* Count values to exchange */
1136 
1137     size_t n_send = 0;
1138 
1139     cs_lnum_t start_id = 0;
1140     cs_lnum_t end_id = 1;
1141 
1142     while (end_id < _n_block_tuples) {
1143 
1144       if (_block_tuples[start_id*3] == _block_tuples[end_id*3]) {
1145 
1146         end_id++;
1147         while (end_id < _n_block_tuples) {
1148           if (_block_tuples[end_id*3] != _block_tuples[start_id*3])
1149             break;
1150           end_id++;
1151         }
1152 
1153         for (cs_lnum_t j = start_id; j < end_id; j++) {
1154           for (cs_lnum_t k = j+1; k < end_id; k++) {
1155 
1156             int tr_1 = tr_reverse_id[_block_tuples[j*3 + 2]];
1157             int tr_2 = _block_tuples[k*3 + 2];
1158 
1159             if (tr_combine[tr_1 *n_rows + tr_2] > -1)
1160               n_send += 2;
1161 
1162           }
1163         }
1164 
1165       }
1166 
1167       start_id = end_id;
1168       end_id += 1;
1169     }
1170 
1171     BFT_MALLOC(send_rank, n_send, int);
1172     BFT_MALLOC(send_tuples, n_send*3, cs_gnum_t);
1173 
1174     /* Now exchange combined tuples */
1175 
1176     start_id = 0;
1177     end_id = 1;
1178 
1179     cs_lnum_t l = 0;
1180 
1181     while (end_id < _n_block_tuples) {
1182 
1183       if (_block_tuples[start_id*3] == _block_tuples[end_id*3]) {
1184 
1185         end_id++;
1186         while (end_id < _n_block_tuples) {
1187           if (_block_tuples[end_id*3] != _block_tuples[start_id*3])
1188             break;
1189           end_id++;
1190         }
1191 
1192         for (cs_lnum_t j = start_id; j < end_id; j++) {
1193           for (cs_lnum_t k = j+1; k < end_id; k++) {
1194 
1195             int tr_1 = tr_reverse_id[_block_tuples[j*3 + 2]];
1196             int tr_2 = _block_tuples[k*3 + 2];
1197             int tr_c = tr_combine[tr_1 *n_rows + tr_2];
1198 
1199             if (tr_c > -1) {
1200 
1201               cs_gnum_t num_1 = _block_tuples[j*3 + 1];
1202               cs_gnum_t num_2 = _block_tuples[k*3 + 1];
1203 
1204               send_rank[l*2]   = (num_1 - 1) / block_size;
1205               send_rank[l*2+1] = (num_2 - 1) / block_size;
1206 
1207               send_tuples[l*6]   = num_1;
1208               send_tuples[l*6+1] = num_2;
1209               send_tuples[l*6+2] = tr_c;
1210 
1211               send_tuples[l*6+3] = num_2;
1212               send_tuples[l*6+4] = num_1;
1213               send_tuples[l*6+5] = tr_reverse_id[tr_c];
1214 
1215               l += 1;
1216 
1217             }
1218 
1219           }
1220         }
1221 
1222       }
1223 
1224       start_id = end_id;
1225       end_id += 1;
1226     }
1227 
1228     assert((cs_gnum_t)(l*2) == n_send);
1229 
1230     BFT_FREE(tr_combine);
1231 
1232     cs_all_to_all_t  *d = cs_all_to_all_create(n_send,
1233                                                0,
1234                                                NULL,
1235                                                send_rank,
1236                                                comm);
1237 
1238     cs_all_to_all_transfer_dest_rank(d, &send_rank);
1239 
1240     cs_gnum_t *recv_tuples = cs_all_to_all_copy_array(d,
1241                                                       CS_GNUM_TYPE,
1242                                                       3,
1243                                                       false, /* reverse */
1244                                                       send_tuples,
1245                                                       NULL);
1246 
1247     BFT_FREE(send_tuples);
1248 
1249     cs_lnum_t n_recv = cs_all_to_all_n_elts_dest(d);
1250 
1251     cs_all_to_all_destroy(&d);
1252 
1253     if (n_recv > 0) {
1254       BFT_REALLOC(_block_tuples, (_n_block_tuples + n_recv)*3, cs_gnum_t);
1255       memcpy(_block_tuples + _n_block_tuples*3,
1256              recv_tuples,
1257              n_recv*3*sizeof(cs_gnum_t));
1258     }
1259 
1260     BFT_FREE(recv_tuples);
1261 
1262     /* Finally, merge additional tuples received for block with
1263        existing periodicity information */
1264 
1265     if (n_recv > 0) {
1266 
1267       _n_block_tuples += n_recv;
1268 
1269       /* Sort and remove duplicates to update block periodicity info */
1270 
1271       _sort_periodic_tuples(&_n_block_tuples, &_block_tuples);
1272 
1273       *n_block_tuples = _n_block_tuples;
1274       *block_tuples = _block_tuples;
1275 
1276     }
1277 
1278   }
1279 
1280   BFT_FREE(tr_reverse_id);
1281 }
1282 
1283 /*----------------------------------------------------------------------------
1284  * Exchange periodic couple info between processors providing the data
1285  * and processors handling the related global numbering interval blocks.
1286  *
1287  * Note that the array pointed to by block_couples is allocated here,
1288  * and must be freed by the calling code.
1289  *
1290  * parameters:
1291  *   block_size          <-- size of the block handled by each processor
1292  *   periodicity         <-- periodicity information (NULL if none)
1293  *   n_periodic_lists    <-- number of periodic lists (may be local)
1294  *   periodicity_num     <-- periodicity number (1 to n) associated with
1295  *                           each periodic list (primary periodicities only)
1296  *   n_periodic_couples  <-- number of periodic couples associated with
1297  *                           each periodic list
1298  *   periodic_couples    <-- array indicating periodic couples (using
1299  *                           global numberings) for each list
1300  *   n_block_tuples      --> number of tuples in current block
1301  *   block_tuples       --> tuple information for block: for each tuple,
1302  *                           {global number of local element,
1303  *                            global number of periodic element,
1304  *                            transform id}
1305  *   comm                <-- associated MPI communicator
1306  *----------------------------------------------------------------------------*/
1307 
1308 static void
_exchange_periodic_tuples(size_t block_size,const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[],cs_lnum_t * n_block_tuples,cs_gnum_t ** block_tuples,MPI_Comm comm)1309 _exchange_periodic_tuples(size_t                    block_size,
1310                           const fvm_periodicity_t  *periodicity,
1311                           int                       n_periodic_lists,
1312                           const int                 periodicity_num[],
1313                           const cs_lnum_t           n_periodic_couples[],
1314                           const cs_gnum_t    *const periodic_couples[],
1315                           cs_lnum_t                *n_block_tuples,
1316                           cs_gnum_t               **block_tuples,
1317                           MPI_Comm                  comm)
1318 {
1319   int        *periodic_block_rank = NULL;
1320   cs_gnum_t  *send_tuples = NULL;
1321 
1322   cs_gnum_t n_g_periodic_tuples = 0;
1323 
1324   /* Initialization */
1325 
1326   *n_block_tuples = 0;
1327   *block_tuples = NULL;
1328 
1329   for (int list_id = 0; list_id < n_periodic_lists; list_id++)
1330     n_g_periodic_tuples += 2 * n_periodic_couples[list_id];
1331 
1332   BFT_MALLOC(periodic_block_rank, n_g_periodic_tuples, int);
1333   BFT_MALLOC(send_tuples, n_g_periodic_tuples*3, cs_gnum_t);
1334 
1335   /* Prepare lists to send to distant processors */
1336 
1337   cs_lnum_t k = 0;
1338 
1339   for (int list_id = 0; list_id < n_periodic_lists; list_id++) {
1340 
1341     const int external_num = periodicity_num[list_id];
1342     const int direct_id = fvm_periodicity_get_transform_id(periodicity,
1343                                                            external_num,
1344                                                            1);
1345     const int reverse_id = fvm_periodicity_get_transform_id(periodicity,
1346                                                             external_num,
1347                                                             -1);
1348 
1349     const cs_lnum_t   _n_periodic_couples = n_periodic_couples[list_id];
1350     const cs_gnum_t   *_periodic_couples = periodic_couples[list_id];
1351 
1352     assert(direct_id >= 0 && reverse_id >= 0);
1353 
1354     for (cs_lnum_t couple_id = 0; couple_id < _n_periodic_couples; couple_id++) {
1355 
1356       cs_gnum_t num_1 = _periodic_couples[couple_id*2];
1357       cs_gnum_t num_2 = _periodic_couples[couple_id*2 + 1];
1358 
1359       periodic_block_rank[k*2]   = (num_1 - 1) / block_size;
1360       periodic_block_rank[k*2+1] = (num_2 - 1) / block_size;
1361 
1362       send_tuples[k*6]   = num_1;
1363       send_tuples[k*6+1] = num_2;
1364       send_tuples[k*6+2] = direct_id;
1365 
1366       send_tuples[k*6+3] = num_2;
1367       send_tuples[k*6+4] = num_1;
1368       send_tuples[k*6+5] = reverse_id;
1369 
1370       k += 1;
1371 
1372     }
1373 
1374   }
1375 
1376   assert((cs_gnum_t)(k*2) == n_g_periodic_tuples);
1377 
1378   /* Exchange data */
1379 
1380   cs_all_to_all_t
1381     *d_periodic = cs_all_to_all_create(n_g_periodic_tuples,
1382                                        0,
1383                                        NULL,
1384                                        periodic_block_rank,
1385                                        comm);
1386 
1387   cs_gnum_t *recv_tuples = cs_all_to_all_copy_array(d_periodic,
1388                                                     CS_GNUM_TYPE,
1389                                                     3,
1390                                                     false, /* reverse */
1391                                                     send_tuples,
1392                                                     NULL);
1393 
1394   cs_lnum_t _n_block_tuples = cs_all_to_all_n_elts_dest(d_periodic);
1395 
1396   BFT_FREE(send_tuples);
1397 
1398   cs_all_to_all_destroy(&d_periodic);
1399 
1400   BFT_FREE(periodic_block_rank);
1401 
1402   /* Sort periodic couples by local correspondant, remove duplicates */
1403 
1404   _sort_periodic_tuples(&_n_block_tuples,
1405                         &recv_tuples);
1406 
1407   /* Set return values */
1408 
1409   *n_block_tuples = _n_block_tuples;
1410   *block_tuples = recv_tuples;
1411 }
1412 
1413 /*----------------------------------------------------------------------------
1414  * Associate block ids for periodic couples.
1415  *
1416  * If a global number appears multiple times in a block, the lowest
1417  * occurence id is returned.
1418  *
1419  * parameters:
1420  *   n_block_elements <-- number of elements in block
1421  *   order            <-- block ordering by global number received
1422  *   block_global_num <-- global numbering received
1423  *   n_block_couples  <-- number of couples in current block
1424  *   stride           <-- stride for global number of local element
1425  *                        in block_couples[]
1426  *   block_couples    <-- couple information for block: for each couple,
1427  *                        {global number of local element,
1428  *                         global number of periodic element,
1429  *                         transform id} if stride = 3,
1430  *                        {global number of local element} if stride = 1
1431  *   couple_block_id  --> id in block of local couple element
1432  *   comm             <-- associated MPI communicator
1433  *----------------------------------------------------------------------------*/
1434 
1435 static void
_periodic_couples_block_id(cs_lnum_t n_block_elements,const cs_lnum_t order[],const cs_gnum_t block_global_num[],cs_lnum_t n_block_couples,int stride,const cs_gnum_t block_couples[],cs_lnum_t couple_block_id[])1436 _periodic_couples_block_id(cs_lnum_t          n_block_elements,
1437                            const cs_lnum_t    order[],
1438                            const cs_gnum_t    block_global_num[],
1439                            cs_lnum_t          n_block_couples,
1440                            int                stride,
1441                            const cs_gnum_t    block_couples[],
1442                            cs_lnum_t          couple_block_id[])
1443 {
1444   cs_lnum_t    couple_id;
1445 
1446   /* Initialization */
1447 
1448   assert(stride == 3 || stride == 1);
1449 
1450   if (n_block_couples == 0)
1451     return;
1452 
1453   /* Use binary search */
1454 
1455   for (couple_id = 0; couple_id < n_block_couples; couple_id++) {
1456 
1457     cs_gnum_t num_cmp;
1458     cs_lnum_t start_id = 0;
1459     cs_lnum_t end_id = n_block_elements - 1;
1460     cs_lnum_t mid_id = (end_id -start_id) / 2;
1461 
1462     const cs_gnum_t num_1 = block_couples[couple_id*stride];
1463 
1464     /* use binary search */
1465 
1466     while (start_id <= end_id) {
1467       num_cmp = block_global_num[order[mid_id]];
1468       if (num_cmp < num_1)
1469         start_id = mid_id + 1;
1470       else if (num_cmp > num_1)
1471         end_id = mid_id - 1;
1472       else
1473         break;
1474       mid_id = start_id + ((end_id -start_id) / 2);
1475     }
1476 
1477     /* In case of multiple occurences, find lowest one */
1478 
1479     while (mid_id > 0 &&  block_global_num[order[mid_id-1]] == num_1)
1480       mid_id--;
1481 
1482     assert(block_global_num[order[mid_id]] == num_1);
1483 
1484     couple_block_id[couple_id] = order[mid_id];
1485   }
1486 
1487 }
1488 
1489 /*----------------------------------------------------------------------------
1490  * Exchange periodic couple info between processors providing the data
1491  * and processors handling the related global numbering interval blocks.
1492  *
1493  * _periodic_couples_block_id() should have been called first.
1494  *
1495  * parameters:
1496  *   block_size       <-- size of the block handled by each processor
1497  *   equiv_id         <-- equivalence id for each block element (-1 if none)
1498  *   equiv            <-- temporary equivalence structure for block
1499  *   n_block_couples  <-- number of couples in current block
1500  *   block_couples    <-- couple information for block: for each couple,
1501  *                        {global number of local element,
1502  *                         global number of periodic element,
1503  *                         transform id}
1504  *   couple_block_id  <-- local id in block
1505  *   dest_rank        --> local number of values to send to each rank
1506  *   src index        --> local number of values to receive from each rank
1507  *----------------------------------------------------------------------------*/
1508 
1509 static void
_count_periodic_equiv_exchange(size_t block_size,const cs_lnum_t equiv_id[],const _per_block_equiv_t * equiv,cs_lnum_t n_block_couples,const cs_gnum_t block_couples[],const cs_lnum_t couple_block_id[],int dest_rank[],cs_lnum_t src_index[])1510 _count_periodic_equiv_exchange(size_t                     block_size,
1511                                const cs_lnum_t            equiv_id[],
1512                                const _per_block_equiv_t  *equiv,
1513                                cs_lnum_t                  n_block_couples,
1514                                const cs_gnum_t            block_couples[],
1515                                const cs_lnum_t            couple_block_id[],
1516                                int                        dest_rank[],
1517                                cs_lnum_t                  src_index[])
1518 {
1519   src_index[0] = 0;
1520 
1521   if (equiv != NULL && equiv_id != NULL) {
1522 
1523     /* Compute list sizes to send to distant processors */
1524 
1525     for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1526 
1527       int e_mult;
1528       cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1529       cs_lnum_t e_id = equiv_id[couple_block_id[couple_id]];
1530 
1531       if (e_id > -1)
1532         e_mult = equiv->shift[e_id +1] - equiv->shift[e_id];
1533       else
1534         e_mult = 1;
1535 
1536       dest_rank[couple_id] = (num_2 - 1) / block_size;
1537       src_index[couple_id+1] = src_index[couple_id] + 3 + 2*e_mult;
1538 
1539     }
1540 
1541   }
1542   else { /* if (equiv == NULL || equiv_id == NULL) */
1543 
1544     for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1545 
1546       cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1547 
1548       dest_rank[couple_id] = (num_2 - 1) / block_size;
1549       src_index[couple_id+1] = src_index[couple_id] + 5;
1550 
1551     }
1552 
1553   }
1554 }
1555 
1556 /*----------------------------------------------------------------------------
1557  * Exchange periodic couple info between processors providing the data
1558  * and processors handling the related global numbering interval blocks.
1559  *
1560  * parameters:
1561  *   block_size       <-- size of the block handled by each processor
1562  *   n_block_elements <-- number of elements in local block
1563  *   src_rank         <-- source rank (size: block_size)
1564  *   block_global_num <-- global numbering received
1565  *   block_num        <-- local numbering received
1566  *   equiv_id         <-- equivalence id for each block element (-1 if none)
1567  *   equiv            <-- temporary equivalence structure for block
1568  *   periodicity      <-- periodicity information (NULL if none)
1569  *   n_block_couples  <-- number of couples in current block
1570  *   block_couples    <-- couple information for block: for each couple,
1571  *                        {global number of local element,
1572  *                         global number of periodic element,
1573  *                         transform id}
1574  *   comm             <-- associated MPI communicator
1575  *
1576  * returns:
1577  *   structure defining a temporary list of periodic interfaces
1578  *----------------------------------------------------------------------------*/
1579 
1580 static _per_block_period_t
_exchange_periodic_equiv(size_t block_size,cs_lnum_t n_block_elements,const int src_rank[],const cs_gnum_t block_global_num[],const cs_lnum_t block_num[],const cs_lnum_t equiv_id[],const _per_block_equiv_t * equiv,const fvm_periodicity_t * periodicity,cs_lnum_t n_block_couples,const cs_gnum_t block_couples[],MPI_Comm comm)1581 _exchange_periodic_equiv(size_t                     block_size,
1582                          cs_lnum_t                  n_block_elements,
1583                          const int                  src_rank[],
1584                          const cs_gnum_t            block_global_num[],
1585                          const cs_lnum_t            block_num[],
1586                          const cs_lnum_t            equiv_id[],
1587                          const _per_block_equiv_t  *equiv,
1588                          const fvm_periodicity_t   *periodicity,
1589                          cs_lnum_t                  n_block_couples,
1590                          const cs_gnum_t            block_couples[],
1591                          MPI_Comm                   comm)
1592 {
1593   cs_lnum_t *couple_block_id = NULL;
1594   int *reverse_tr_id = NULL;
1595   cs_lnum_t *order = NULL;
1596   cs_gnum_t *block_recv_num = NULL;
1597 
1598   _per_block_period_t pe;
1599 
1600   /* Initialize return structure */
1601 
1602   pe.count = 0;
1603   pe.block_id = NULL;
1604   pe.tr_id = NULL;
1605   pe.shift = NULL;
1606   pe.rank  = NULL;
1607   pe.num   = NULL;
1608 
1609   if (periodicity == NULL)
1610     return pe;
1611 
1612   /* Build ordering array for binary search */
1613 
1614   order = cs_order_gnum(NULL, block_global_num, n_block_elements);
1615 
1616   /* Associate id in block for periodic couples prior to sending */
1617 
1618   BFT_MALLOC(couple_block_id, n_block_couples, cs_lnum_t);
1619 
1620   _periodic_couples_block_id(n_block_elements,
1621                              order,
1622                              block_global_num,
1623                              n_block_couples,
1624                              3,
1625                              block_couples,
1626                              couple_block_id);
1627 
1628   /* build count and shift arrays for parallel exchange */
1629 
1630   int  *send_rank;
1631   cs_lnum_t  *src_index;
1632   BFT_MALLOC(send_rank, n_block_couples, int);
1633   BFT_MALLOC(src_index, n_block_couples+1, cs_lnum_t);
1634 
1635   _count_periodic_equiv_exchange(block_size,
1636                                  equiv_id,
1637                                  equiv,
1638                                  n_block_couples,
1639                                  block_couples,
1640                                  couple_block_id,
1641                                  send_rank,
1642                                  src_index);
1643 
1644   cs_all_to_all_t  *d = cs_all_to_all_create(n_block_couples,
1645                                              0,
1646                                              NULL,
1647                                              send_rank,
1648                                              comm);
1649 
1650   cs_all_to_all_transfer_dest_rank(d, &send_rank);
1651 
1652   cs_lnum_t *dest_index = cs_all_to_all_copy_index(d,
1653                                                    false, /* reverse */
1654                                                    src_index,
1655                                                    NULL);
1656 
1657   /* arrays to exchange; most of the exchanged data is of type int or
1658      cs_lnum_t, using only positive values. For each periodic couple,
1659      one value of type cs_gnum_t is exchanged, so to group all communication
1660      in one MPI call, all data is exchanged as cs_gnum_t, even if this
1661      means larger messages if cs_gnum_t is larger than cs_lnum_t.
1662      As the number of elements per couple is variable (depending on prior
1663      equivalence info), using an MPI datatype to mix int and cs_gnum_t
1664      types rather than casting all to cs_gnum_t is not feasible */
1665 
1666   cs_gnum_t *equiv_send;
1667   BFT_MALLOC(equiv_send, src_index[n_block_couples], cs_gnum_t);
1668 
1669   /* temporary array to find reverse transforms */
1670 
1671   int n_tr = fvm_periodicity_get_n_transforms(periodicity);
1672 
1673   BFT_MALLOC(reverse_tr_id, n_tr, int);
1674 
1675   for (int tr_id = 0; tr_id < n_tr; tr_id++)
1676     reverse_tr_id[tr_id] = fvm_periodicity_get_reverse_id(periodicity, tr_id);
1677 
1678   if (equiv != NULL && equiv_id != NULL) {
1679 
1680     /* Compute list sizes to send to distant processors */
1681 
1682     for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1683 
1684       const cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1685       const cs_lnum_t local_id = couple_block_id[couple_id];
1686       const cs_lnum_t e_id = equiv_id[local_id];
1687 
1688       cs_lnum_t i = src_index[couple_id];
1689 
1690       if (e_id > -1) {
1691 
1692         int j_start = equiv->shift[e_id];
1693         int j_end = equiv->shift[e_id + 1];
1694 
1695         equiv_send[i++] = j_end - j_start;
1696         equiv_send[i++] = num_2;
1697         equiv_send[i++] = reverse_tr_id[block_couples[couple_id*3 + 2]];
1698 
1699         for (int j = j_start; j < j_end; j++) {
1700           equiv_send[i++] = equiv->rank[j];
1701           equiv_send[i++] = equiv->num[j];
1702         }
1703 
1704       }
1705       else {
1706 
1707         equiv_send[i++] = 1;
1708         equiv_send[i++] = num_2;
1709         equiv_send[i++] = reverse_tr_id[block_couples[couple_id*3 + 2]];
1710         equiv_send[i++] = src_rank[local_id];
1711         equiv_send[i++] = block_num[local_id];
1712 
1713       }
1714 
1715     }
1716 
1717   }
1718   else { /* if (equiv == NULL || equiv_id == NULL) */
1719 
1720     for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1721 
1722       const cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1723       const cs_lnum_t local_id = couple_block_id[couple_id];
1724 
1725       cs_lnum_t i = src_index[couple_id];
1726 
1727       equiv_send[i++] = 1;
1728       equiv_send[i++] = num_2;
1729       equiv_send[i++] = reverse_tr_id[block_couples[couple_id*3 + 2]];
1730       equiv_send[i++] = src_rank[local_id];
1731       equiv_send[i++] = block_num[local_id];
1732 
1733     }
1734 
1735   }
1736 
1737   BFT_FREE(couple_block_id);
1738   BFT_FREE(reverse_tr_id);
1739 
1740   /* Parallel exchange */
1741 
1742   cs_gnum_t *equiv_recv
1743     = cs_all_to_all_copy_indexed(d,
1744                                  CS_GNUM_TYPE,
1745                                  false, /* reverse */
1746                                  src_index,
1747                                  equiv_send,
1748                                  dest_index,
1749                                  NULL);
1750 
1751   cs_lnum_t  n_elts_recv = cs_all_to_all_n_elts_dest(d);
1752   size_t  recv_size = dest_index[n_elts_recv];
1753 
1754   /* Free memory */
1755 
1756   BFT_FREE(src_index);
1757   BFT_FREE(dest_index);
1758   BFT_FREE(equiv_send);
1759 
1760   cs_all_to_all_destroy(&d);
1761 
1762   /* Build return structure */
1763 
1764   {
1765     size_t k, l, e_mult;
1766 
1767     pe.count = 0;
1768     size_t i = 0;
1769     size_t j = 0;
1770 
1771     while (i < recv_size) {
1772       pe.count += 1;
1773       j += equiv_recv[i];
1774       i += 3 + 2*equiv_recv[i];
1775     }
1776 
1777     BFT_MALLOC(block_recv_num, pe.count, cs_gnum_t);
1778 
1779     BFT_MALLOC(pe.tr_id, pe.count, int);
1780 
1781     BFT_MALLOC(pe.shift, pe.count + 1, int);
1782 
1783     BFT_MALLOC(pe.rank, j, int);
1784     BFT_MALLOC(pe.num, j, cs_lnum_t);
1785 
1786     pe.shift[0] = 0;
1787 
1788     for (i = 0, j = 0, k = 0, l = 0; i < (size_t)(pe.count); i++) {
1789 
1790       e_mult = equiv_recv[k++];
1791       block_recv_num[i] = equiv_recv[k++];
1792       pe.tr_id[i] = equiv_recv[k++];
1793 
1794       for (l = 0; l < e_mult; l++) {
1795         pe.rank[j] = equiv_recv[k++];
1796         pe.num[j] = equiv_recv[k++];
1797         pe.shift[i+1] = ++j;
1798       }
1799 
1800     }
1801 
1802   }
1803 
1804   BFT_FREE(equiv_recv);
1805 
1806   /* Associate id in block for received periodic equivalences */
1807 
1808   BFT_MALLOC(pe.block_id, pe.count, cs_lnum_t);
1809 
1810   _periodic_couples_block_id(n_block_elements,
1811                              order,
1812                              block_global_num,
1813                              pe.count,
1814                              1,
1815                              block_recv_num,
1816                              pe.block_id);
1817 
1818   /* Free remaining working arrays */
1819 
1820   BFT_FREE(block_recv_num);
1821   BFT_FREE(couple_block_id);
1822   BFT_FREE(order);
1823 
1824   return pe;
1825 }
1826 
1827 /*----------------------------------------------------------------------------
1828  * Merge periodic equivalent interface info with block equivalence info.
1829  *
1830  * Expands block equivalence info, and frees temporary list of periodic
1831  * interfaces.
1832  *
1833  * parameters:
1834  *   n_block_elts <-- number of block elements
1835  *   src_rank     <-- source rank
1836  *   block_num    <-- local numbering received
1837  *   equiv_id     <-> equivalence id for each block element (-1 if none)
1838  *   equiv        <-> temporary equivalence structure for block
1839  *   perio_equiv  <-> temporary list of periodic interfaces
1840  *
1841  * returns:
1842  *   structure defining a temporary equivalence structure
1843  *----------------------------------------------------------------------------*/
1844 
1845 #if defined(__INTEL_COMPILER)
1846 #pragma optimization_level 2 /* Crash with O3 on IA64 with icc 9.1 20070320 */
1847 #endif
1848 
1849 static void
_merge_periodic_equiv(cs_lnum_t n_block_elts,const int src_rank[],const cs_lnum_t block_num[],cs_lnum_t equiv_id[],_per_block_equiv_t * equiv,_per_block_period_t * perio_equiv)1850 _merge_periodic_equiv(cs_lnum_t             n_block_elts,
1851                       const int             src_rank[],
1852                       const cs_lnum_t       block_num[],
1853                       cs_lnum_t             equiv_id[],
1854                       _per_block_equiv_t   *equiv,
1855                       _per_block_period_t  *perio_equiv)
1856 {
1857   cs_lnum_t i;
1858   size_t j;
1859 
1860   cs_lnum_t  old_count, new_count;
1861   size_t new_size;
1862 
1863   cs_lnum_t  *eq_mult = NULL;
1864   cs_lnum_t  *new_shift = NULL;
1865 
1866   _per_block_period_t *pe = perio_equiv;
1867 
1868   assert(equiv != NULL);
1869 
1870   if (perio_equiv == NULL)
1871     return;
1872 
1873   old_count = equiv->count;
1874 
1875   /* Note that by construction, the global numbers of elements appearing
1876      in the original (parallel) equivalence must appear multiple times
1877      in the block (the original equivalences are built using this), and
1878      the global numbers of elements appearing only in the periodic
1879      equivalence must appear only once; thus only one equiv_id[] value
1880      needs to be updated when appending purely periodic equivalences */
1881 
1882   for (i = 0, new_count = old_count; i < pe->count; i++) {
1883     if (equiv_id[pe->block_id[i]] == -1)
1884       equiv_id[pe->block_id[i]] = new_count++;
1885   }
1886 
1887   BFT_MALLOC(eq_mult, new_count, cs_lnum_t);
1888 
1889   for (i = 0; i < old_count; i++)
1890     eq_mult[i] = equiv->shift[i+1] - equiv->shift[i];
1891 
1892   for (i = old_count; i < new_count; i++)
1893     eq_mult[i] = 0;
1894 
1895   for (i = 0; i < pe->count; i++) {
1896     if (eq_mult[equiv_id[pe->block_id[i]]] == 0)
1897       eq_mult[equiv_id[pe->block_id[i]]] += pe->shift[i+1] - pe->shift[i] + 1;
1898     else
1899       eq_mult[equiv_id[pe->block_id[i]]] += pe->shift[i+1] - pe->shift[i];
1900   }
1901 
1902   /* Build new (merged) index, resetting eq_mult to use as a counter */
1903 
1904   BFT_MALLOC(new_shift, new_count+1, cs_lnum_t);
1905 
1906   new_shift[0] = 0;
1907 
1908   for (i = 0; i < new_count; i++) {
1909     assert(eq_mult[i] > 0);
1910     new_shift[i+1] = new_shift[i] + eq_mult[i];
1911     eq_mult[i] = 0;
1912   }
1913 
1914   new_size = new_shift[new_count];
1915 
1916   /* Expand previous periodicity info */
1917   /*----------------------------------*/
1918 
1919   equiv->count = new_count;
1920 
1921   if (old_count > 0) {
1922 
1923     int k;
1924     int *new_rank = NULL;
1925     cs_lnum_t *new_num = NULL;
1926 
1927     BFT_MALLOC(new_rank, new_size, int);
1928 
1929     for (i = 0; i < old_count; i++) {
1930       eq_mult[i] = equiv->shift[i+1] - equiv->shift[i];
1931       for (k = 0; k < eq_mult[i]; k++)
1932         new_rank[new_shift[i] + k] = equiv->rank[equiv->shift[i] + k];
1933     }
1934 
1935     BFT_FREE(equiv->rank);
1936     equiv->rank = new_rank;
1937 
1938     BFT_MALLOC(new_num, new_size, cs_lnum_t);
1939 
1940     for (i = 0; i < old_count; i++) {
1941       for (k = 0; k < eq_mult[i]; k++)
1942         new_num[new_shift[i] + k] = equiv->num[equiv->shift[i] + k];
1943     }
1944     BFT_FREE(equiv->num);
1945     equiv->num = new_num;
1946 
1947     if (equiv->tr_id != NULL) {
1948 
1949       int *new_tr_id = NULL;
1950       BFT_MALLOC(new_tr_id, new_size, int);
1951       for (i = 0; i < old_count; i++) {
1952         for (k = 0; k < eq_mult[i]; k++)
1953           new_tr_id[new_shift[i] + k] = equiv->tr_id[equiv->shift[i] + k];
1954       }
1955       BFT_FREE(equiv->tr_id);
1956       equiv->tr_id = new_tr_id;
1957 
1958     }
1959 
1960     /* All is expanded at this stage, so old index may be replaced */
1961 
1962     BFT_FREE(equiv->shift);
1963     equiv->shift = new_shift;
1964 
1965   }
1966   else {
1967 
1968     BFT_FREE(equiv->shift);
1969     equiv->shift = new_shift;
1970     BFT_MALLOC(equiv->rank, new_size, int);
1971     BFT_MALLOC(equiv->num, new_size, cs_lnum_t);
1972 
1973   }
1974 
1975   if (equiv->tr_id == NULL) {
1976     BFT_MALLOC(equiv->tr_id, new_size, int);
1977     for (j = 0; j < new_size; j++)
1978       equiv->tr_id[j] = 0;
1979   }
1980 
1981   /* Now insert periodic equivalence info */
1982 
1983   for (cs_lnum_t k = 0; k < n_block_elts; k++) {
1984 
1985     if (equiv_id[k] >= old_count) {
1986 
1987       const cs_lnum_t eq_id = equiv_id[k];
1988       const cs_lnum_t l = equiv->shift[eq_id];
1989 
1990       assert(eq_mult[eq_id] == 0);
1991       equiv->rank[l] = src_rank[k];
1992       equiv->num[l] = block_num[k];
1993       equiv->tr_id[l] = 0;
1994       eq_mult[eq_id] = 1;
1995 
1996     }
1997   }
1998 
1999   for (i = 0; i < pe->count; i++) {
2000 
2001     int k, l;
2002     const cs_lnum_t _block_id = pe->block_id[i];
2003     const int eq_id = equiv_id[_block_id];
2004 
2005     for (k = pe->shift[i]; k < pe->shift[i+1]; k++) {
2006 
2007       l = equiv->shift[eq_id] + eq_mult[eq_id];
2008 
2009       equiv->rank[l] = pe->rank[k];
2010       equiv->num[l] = pe->num[k];
2011 
2012       equiv->tr_id[l] = pe->tr_id[i] + 1;
2013 
2014       eq_mult[eq_id] += 1;
2015 
2016     }
2017 
2018   }
2019 
2020   BFT_FREE(eq_mult);
2021 
2022   /* Free temporary periodic equivalence structure elements */
2023 
2024   BFT_FREE(pe->block_id);
2025   BFT_FREE(pe->tr_id);
2026   BFT_FREE(pe->shift);
2027   BFT_FREE(pe->rank);
2028   BFT_FREE(pe->num);
2029 }
2030 
2031 /*----------------------------------------------------------------------------
2032  * Creation of a list of interfaces between elements of a same type.
2033  *
2034  * parameters:
2035  *   ifs                 <-> pointer to structure that should be updated
2036  *   n_elts              <-- local number of elements
2037  *   global_num          <-- global number (id) associated with each element
2038  *   periodicity         <-- periodicity information (NULL if none)
2039  *   n_periodic_lists    <-- number of periodic lists (may be local)
2040  *   periodicity_num     <-- periodicity number (1 to n) associated with
2041  *                           each periodic list (primary periodicities only)
2042  *   n_periodic_couples  <-- number of periodic couples associated with
2043  *                           each periodic list
2044  *   periodic_couples    <-- array indicating periodic couples (using
2045  *                           global numberings) for each list
2046  *   comm                <-- associated MPI communicator
2047  *----------------------------------------------------------------------------*/
2048 
2049 static void
_add_global_equiv_periodic(cs_interface_set_t * ifs,cs_lnum_t n_elts,const cs_gnum_t global_num[],const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[],MPI_Comm comm)2050 _add_global_equiv_periodic(cs_interface_set_t       *ifs,
2051                            cs_lnum_t                 n_elts,
2052                            const cs_gnum_t           global_num[],
2053                            const fvm_periodicity_t  *periodicity,
2054                            int                       n_periodic_lists,
2055                            const int                 periodicity_num[],
2056                            const cs_lnum_t           n_periodic_couples[],
2057                            const cs_gnum_t    *const periodic_couples[],
2058                            MPI_Comm                  comm)
2059 {
2060   cs_lnum_t   n_block_couples = 0;
2061   cs_lnum_t   *equiv_id = NULL, *couple_equiv_id = NULL;
2062   cs_gnum_t   *block_couples = NULL;
2063 
2064   /* Initialization */
2065 
2066   int  size, local_rank;
2067   MPI_Comm_size(comm, &size);
2068   MPI_Comm_rank(comm, &local_rank);
2069 
2070   /* Get temporary maximum global number value */
2071 
2072   cs_gnum_t global_max = _global_num_max(n_elts,
2073                                          global_num,
2074                                          comm);
2075 
2076   cs_block_dist_info_t
2077     bi = cs_block_dist_compute_sizes(local_rank,
2078                                      size,
2079                                      1,
2080                                      0,
2081                                      global_max);
2082 
2083   int flags = CS_ALL_TO_ALL_NEED_SRC_RANK;
2084 
2085   cs_all_to_all_t
2086     *d = cs_all_to_all_create_from_block(n_elts,
2087                                          flags,
2088                                          global_num,
2089                                          bi,
2090                                          comm);
2091 
2092   assert(sizeof(cs_gnum_t) >= sizeof(cs_lnum_t));
2093 
2094   cs_gnum_t *recv_global_num = cs_all_to_all_copy_array(d,
2095                                                         CS_GNUM_TYPE,
2096                                                         1,
2097                                                         false, /* reverse */
2098                                                         global_num,
2099                                                         NULL);
2100 
2101   cs_lnum_t  *send_num = NULL;
2102   BFT_MALLOC(send_num, n_elts, cs_lnum_t);
2103   for (cs_lnum_t i = 0; i < n_elts; i++)
2104     send_num[i] = i+1;
2105 
2106   cs_lnum_t *recv_num = cs_all_to_all_copy_array(d,
2107                                                  CS_LNUM_TYPE,
2108                                                  1,
2109                                                  false, /* reverse */
2110                                                  send_num,
2111                                                  NULL);
2112 
2113   BFT_FREE(send_num);
2114 
2115   cs_lnum_t n_elts_recv = cs_all_to_all_n_elts_dest(d);
2116 
2117   int *src_rank = cs_all_to_all_get_src_rank(d);
2118 
2119   /* Exchange periodicity information */
2120 
2121   _exchange_periodic_tuples(bi.block_size,
2122                             periodicity,
2123                             n_periodic_lists,
2124                             periodicity_num,
2125                             n_periodic_couples,
2126                             periodic_couples,
2127                             &n_block_couples,
2128                             &block_couples,
2129                             comm);
2130 
2131   /* Combine periodic couples if necessary */
2132 
2133   if (fvm_periodicity_get_n_levels(periodicity) > 1)
2134     _combine_periodic_tuples(bi.block_size,
2135                              periodicity,
2136                              &n_block_couples,
2137                              &block_couples,
2138                              comm);
2139 
2140   /* Build purely parallel equivalence data first */
2141 
2142   if (n_elts_recv > 0)
2143     BFT_MALLOC(equiv_id, n_elts_recv, cs_lnum_t);
2144 
2145   _per_block_equiv_t
2146     e = _block_global_num_to_equiv(n_elts_recv,
2147                                    src_rank,
2148                                    recv_global_num,
2149                                    recv_num,
2150                                    equiv_id);
2151 
2152   /* Now combine periodic and parallel equivalences */
2153 
2154   _per_block_period_t
2155     pe = _exchange_periodic_equiv(bi.block_size,
2156                                   n_elts_recv,
2157                                   src_rank,
2158                                   recv_global_num,
2159                                   recv_num,
2160                                   equiv_id,
2161                                   &e,
2162                                   periodicity,
2163                                   n_block_couples,
2164                                   block_couples,
2165                                   comm);
2166 
2167   BFT_FREE(recv_global_num);
2168 
2169   _merge_periodic_equiv(n_elts_recv,
2170                         src_rank,
2171                         recv_num,
2172                         equiv_id,
2173                         &e,
2174                         &pe);
2175 
2176   /* Free all arrays not needed anymore */
2177 
2178   BFT_FREE(recv_num);
2179 
2180   BFT_FREE(couple_equiv_id);
2181   BFT_FREE(block_couples);
2182   n_block_couples = 0;
2183 
2184   /* Now that equivalences are marked, count for each rank; for each
2185      equivalence, we will need to send the corresponding element
2186      numbers, ranks, and transform ids,
2187      for a total of 2 + 3*(e.multiple[...] - 1)
2188      = (3*e.multiple[...]) - 1 values. */
2189 
2190   cs_lnum_t  *block_index;
2191   BFT_MALLOC(block_index, n_elts_recv+1, cs_lnum_t);
2192 
2193   block_index[0] = 0;
2194   for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
2195     cs_lnum_t n_eq = 0;
2196     if (equiv_id[i] > -1) {
2197       size_t e_id = equiv_id[i];
2198       n_eq = 3*(e.shift[e_id+1] - e.shift[e_id]) - 1;
2199     }
2200     block_index[i+1] = block_index[i] + n_eq;
2201   }
2202 
2203   cs_lnum_t *part_index = cs_all_to_all_copy_index(d,
2204                                                    true, /* reverse */
2205                                                    block_index,
2206                                                    NULL);
2207 
2208   /* Now prepare new send buffer */
2209 
2210   cs_lnum_t  *block_equiv;
2211   BFT_MALLOC(block_equiv, block_index[n_elts_recv], cs_lnum_t);
2212 
2213   for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
2214     if (equiv_id[i] > -1) {
2215 
2216       cs_lnum_t *block_equiv_p = block_equiv + block_index[i];
2217 
2218       cs_lnum_t        e_id = equiv_id[i];
2219       cs_lnum_t        k = 2;
2220       const int        multiple = e.shift[e_id+1] - e.shift[e_id];
2221       const int       *rank_p = e.rank + e.shift[e_id];
2222       const int       *tr_id_p = e.tr_id + e.shift[e_id];
2223       const cs_lnum_t *num_p  = e.num  + e.shift[e_id];
2224 
2225       for (cs_lnum_t j = 0; j < multiple; j++) {
2226         if (rank_p[j] == src_rank[i] && tr_id_p[j] == 0) {
2227           block_equiv_p[0] = num_p[j];
2228           block_equiv_p[1] = multiple - 1;
2229         }
2230         else {
2231           block_equiv_p[k++] = num_p[j];
2232           block_equiv_p[k++] = tr_id_p[j];
2233           block_equiv_p[k++] = rank_p[j];
2234         }
2235       }
2236 
2237     }
2238   }
2239 
2240   /* Free temporary (block) equivalence info */
2241 
2242   e.count = 0;
2243   BFT_FREE(e.shift);
2244   BFT_FREE(e.rank);
2245   BFT_FREE(e.tr_id);
2246   BFT_FREE(e.num);
2247 
2248   BFT_FREE(src_rank);
2249   BFT_FREE(equiv_id);
2250 
2251   cs_lnum_t *part_equiv
2252     = cs_all_to_all_copy_indexed(d,
2253                                  CS_LNUM_TYPE,
2254                                  true, /* reverse */
2255                                  block_index,
2256                                  block_equiv,
2257                                  part_index,
2258                                  NULL);
2259 
2260   cs_lnum_t n_vals_part = part_index[n_elts];
2261 
2262   /* At this stage, MPI operations are finished; we may release
2263      the corresponding counts and indexes */
2264 
2265   BFT_FREE(block_equiv);
2266   BFT_FREE(part_index);
2267   BFT_FREE(block_index);
2268 
2269   cs_all_to_all_destroy(&d);
2270 
2271   /* Add interface */
2272 
2273   int tr_index_size = fvm_periodicity_get_n_transforms(periodicity) + 2;
2274 
2275   _interfaces_from_flat_equiv(ifs,
2276                               tr_index_size,
2277                               n_vals_part,
2278                               part_equiv);
2279 
2280   BFT_FREE(part_equiv);
2281 }
2282 
2283 #endif /* defined(HAVE_MPI) */
2284 
2285 /*----------------------------------------------------------------------------
2286  * Prepare periodic couple info in single process mode.
2287  *
2288  * Note that the array pointed to by block_couples is allocated here,
2289  * and must be freed by the calling code.
2290  *
2291  * parameters:
2292  *   periodicity        <-- periodicity information (NULL if none)
2293  *   n_periodic_lists   <-- number of periodic lists (may be local)
2294  *   periodicity_num    <-- periodicity number (1 to n) associated with
2295  *                          each periodic list (primary periodicities only)
2296  *   n_periodic_couples <-- number of periodic couples associated with
2297  *                          each periodic list
2298  *   periodic_couples   <-- array indicating periodic couples (using
2299  *                          global numberings) for each list
2300  *   n_couples          --> number of couples
2301  *   couples            --> couple information: for each couple,
2302  *                          {global number of local element,
2303  *                           global number of periodic element,
2304  *                           transform id}
2305  *----------------------------------------------------------------------------*/
2306 
2307 static void
_define_periodic_couples_sp(const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[],cs_lnum_t * n_couples,cs_gnum_t ** couples)2308 _define_periodic_couples_sp(const fvm_periodicity_t  *periodicity,
2309                             int                       n_periodic_lists,
2310                             const int                 periodicity_num[],
2311                             const cs_lnum_t           n_periodic_couples[],
2312                             const cs_gnum_t    *const periodic_couples[],
2313                             cs_lnum_t                *n_couples,
2314                             cs_gnum_t               **couples)
2315 {
2316   int         list_id;
2317 
2318   cs_lnum_t   count = 0;
2319   cs_lnum_t    _n_couples = 0;
2320   cs_gnum_t   *_couples = NULL;
2321 
2322   /* Initialization */
2323 
2324   *n_couples = 0;
2325   *couples = NULL;
2326 
2327   for (list_id = 0, _n_couples = 0; list_id < n_periodic_lists; list_id++)
2328     _n_couples += n_periodic_couples[list_id] * 2;
2329 
2330   BFT_MALLOC(_couples, _n_couples*3, cs_gnum_t);
2331 
2332   /* Prepare lists */
2333 
2334   for (list_id = 0; list_id < n_periodic_lists; list_id++) {
2335 
2336     cs_lnum_t couple_id;
2337     cs_gnum_t num_1, num_2;
2338 
2339     const int external_num = periodicity_num[list_id];
2340     const int direct_id = fvm_periodicity_get_transform_id(periodicity,
2341                                                            external_num,
2342                                                            1);
2343     const int reverse_id = fvm_periodicity_get_transform_id(periodicity,
2344                                                             external_num,
2345                                                             -1);
2346 
2347     const cs_lnum_t   _n_periodic_couples = n_periodic_couples[list_id];
2348     const cs_gnum_t   *_periodic_couples = periodic_couples[list_id];
2349 
2350     assert(direct_id >= 0 && reverse_id >= 0);
2351 
2352     for (couple_id = 0; couple_id < _n_periodic_couples; couple_id++) {
2353 
2354       num_1 = _periodic_couples[couple_id*2];
2355       num_2 = _periodic_couples[couple_id*2 + 1];
2356 
2357       _couples[count] = num_1;
2358       _couples[count+1] = num_2;
2359       _couples[count+2] = direct_id;
2360 
2361       _couples[count+3] = num_2;
2362       _couples[count+4] = num_1;
2363       _couples[count+5] = reverse_id;
2364 
2365       count += 6;
2366 
2367     }
2368 
2369   }
2370 
2371   /* Sort periodic couples by local match, remove duplicates */
2372 
2373   _sort_periodic_tuples(&_n_couples, &_couples);
2374 
2375   /* Set return values */
2376 
2377   *n_couples = _n_couples;
2378   *couples = _couples;
2379 }
2380 
2381 /*----------------------------------------------------------------------------
2382  * Build eventual couples belonging to combined periodicities in single
2383  * process mode.
2384  *
2385  * parameters:
2386  *   periodicity <-- periodicity information (NULL if none)
2387  *   n_couples   <-> number of couples in current block
2388  *   couples     <-> couple information for this rank: for each couple,
2389  *                   {global number of local element,
2390  *                    global number of periodic element,
2391  *                    transform id}
2392  *----------------------------------------------------------------------------*/
2393 
2394 static void
_combine_periodic_couples_sp(const fvm_periodicity_t * periodicity,cs_lnum_t * n_couples,cs_gnum_t ** couples)2395 _combine_periodic_couples_sp(const fvm_periodicity_t   *periodicity,
2396                              cs_lnum_t                 *n_couples,
2397                              cs_gnum_t                **couples)
2398 {
2399   int  i, n_tr, level;
2400 
2401   cs_lnum_t   start_id, end_id, j, k;
2402 
2403   int  add_count = 0;
2404   int  *tr_reverse_id = NULL;
2405   cs_gnum_t *_add_couples = NULL;
2406 
2407   cs_lnum_t   _n_couples = *n_couples;
2408   cs_gnum_t   *_couples = *couples;
2409 
2410   assert (periodicity != NULL);
2411 
2412   /* Build periodicity related arrays for quick access */
2413 
2414   n_tr = fvm_periodicity_get_n_transforms(periodicity);
2415 
2416   BFT_MALLOC(tr_reverse_id, n_tr, int);
2417 
2418   for (i = 0; i < n_tr; i++)
2419     tr_reverse_id[i] = fvm_periodicity_get_reverse_id(periodicity, i);
2420 
2421   /* Loop on combination levels */
2422 
2423   for (level = 1;
2424        level < fvm_periodicity_get_n_levels(periodicity);
2425        level++) {
2426 
2427     int  n_rows = 0;
2428     int  *tr_combine = NULL;
2429 
2430     _transform_combine_info(periodicity,
2431                             level,
2432                             &n_rows,
2433                             &tr_combine);
2434 
2435     /* Count values to add */
2436 
2437     add_count = 0;
2438 
2439     start_id = 0;
2440     end_id = 1;
2441 
2442     while (end_id < _n_couples) {
2443 
2444       if (_couples[start_id*3] == _couples[end_id*3]) {
2445 
2446         end_id++;
2447         while (end_id < _n_couples) {
2448           if (_couples[end_id*3] != _couples[start_id*3])
2449             break;
2450           end_id++;
2451         }
2452 
2453         for (j = start_id; j < end_id; j++) {
2454           for (k = j+1; k < end_id; k++) {
2455 
2456             int tr_1 = tr_reverse_id[_couples[j*3 + 2]];
2457             int tr_2 = _couples[k*3 + 2];
2458 
2459             if (tr_combine[tr_1 *n_rows + tr_2] > -1)
2460               add_count += 6;
2461 
2462           }
2463         }
2464 
2465       }
2466 
2467       start_id = end_id;
2468       end_id += 1;
2469     }
2470 
2471     /* Nothing to do for this combination level if add_count = 0 */
2472 
2473     if (add_count == 0) {
2474       BFT_FREE(tr_combine);
2475       continue;
2476     }
2477 
2478     BFT_REALLOC(_couples, _n_couples*3 + add_count, cs_gnum_t);
2479 
2480     _add_couples = _couples + _n_couples*3;
2481 
2482     /* Now add combined couples */
2483 
2484     start_id = 0;
2485     end_id = 1;
2486 
2487     while (end_id < _n_couples) {
2488 
2489       if (_couples[start_id*3] == _couples[end_id*3]) {
2490 
2491         end_id++;
2492         while (end_id < _n_couples) {
2493           if (_couples[end_id*3] != _couples[start_id*3])
2494             break;
2495           end_id++;
2496         }
2497 
2498         /* Loop on couple combinations */
2499 
2500         for (j = start_id; j < end_id; j++) {
2501           for (k = j+1; k < end_id; k++) {
2502 
2503             cs_gnum_t num_1 = _couples[j*3 + 1];
2504             cs_gnum_t num_2 = _couples[k*3 + 1];
2505             int tr_1 = tr_reverse_id[_couples[j*3 + 2]];
2506             int tr_2 = _couples[k*3 + 2];
2507             int tr_c = tr_combine[tr_1 *n_rows + tr_2];
2508 
2509             if (tr_c > -1) {
2510 
2511               _add_couples[0] = num_1;
2512               _add_couples[1] = num_2;
2513               _add_couples[2] = tr_c;
2514 
2515               _add_couples[3] = num_2;
2516               _add_couples[4] = num_1;
2517               _add_couples[5] = tr_reverse_id[tr_c];
2518 
2519               _add_couples += 6;
2520 
2521             }
2522 
2523           }
2524         } /* End of loop on couple combinations */
2525 
2526       }
2527 
2528       start_id = end_id;
2529       end_id += 1;
2530     }
2531 
2532     BFT_FREE(tr_combine);
2533 
2534     /* Finally, merge additional couples received for block
2535        existing periodicity information */
2536 
2537     assert(add_count % 3 == 0);
2538 
2539     _n_couples += add_count / 3;
2540 
2541     /* Sort and remove duplicates to update periodicity info */
2542 
2543     _sort_periodic_tuples(&_n_couples, &_couples);
2544 
2545     *n_couples = _n_couples;
2546     *couples = _couples;
2547 
2548   }
2549 
2550   BFT_FREE(tr_reverse_id);
2551 }
2552 
2553 /*----------------------------------------------------------------------------
2554  * Creation of a list of interfaces between elements of a same type.
2555  *
2556  * This simplified algorithm is intended for single-process mode.
2557  *
2558  * parameters:
2559  *   ifs                <-> pointer to structure that should be updated
2560  *   n_elts              <-- local number of elements
2561  *   global_num          <-- global number (id) associated with each element
2562  *   periodicity        <-- periodicity information (NULL if none)
2563  *   n_periodic_lists   <-- number of periodic lists (may be local)
2564  *   periodicity_num    <-- periodicity number (1 to n) associated with
2565  *                          each periodic list (primary periodicities only)
2566  *   n_periodic_couples <-- number of periodic couples associated with
2567  *                          each periodic list
2568  *   periodic_couples   <-- array indicating periodic couples (using
2569  *                          global numberings) for each list
2570  *----------------------------------------------------------------------------*/
2571 
2572 static void
_add_global_equiv_periodic_sp(cs_interface_set_t * ifs,cs_lnum_t n_elts,const cs_gnum_t global_num[],const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[])2573 _add_global_equiv_periodic_sp(cs_interface_set_t       *ifs,
2574                               cs_lnum_t                 n_elts,
2575                               const cs_gnum_t           global_num[],
2576                               const fvm_periodicity_t  *periodicity,
2577                               int                       n_periodic_lists,
2578                               const int                 periodicity_num[],
2579                               const cs_lnum_t           n_periodic_couples[],
2580                               const cs_gnum_t    *const periodic_couples[])
2581 {
2582   cs_lnum_t   i, couple_id;
2583   cs_lnum_t   n_couples = 0;
2584   cs_lnum_t   *n_elts_tr = NULL;
2585   cs_gnum_t   *couples = NULL;
2586 
2587   cs_interface_t  *_interface = NULL;
2588 
2589   assert(sizeof(cs_gnum_t) >= sizeof(cs_lnum_t));
2590 
2591   /* Organize periodicity information */
2592 
2593   _define_periodic_couples_sp(periodicity,
2594                               n_periodic_lists,
2595                               periodicity_num,
2596                               n_periodic_couples,
2597                               periodic_couples,
2598                               &n_couples,
2599                               &couples);
2600 
2601   /* Combine periodic couples if necessary */
2602 
2603   if (fvm_periodicity_get_n_levels(periodicity) > 1)
2604     _combine_periodic_couples_sp(periodicity,
2605                                  &n_couples,
2606                                  &couples);
2607 
2608   /* Add interface to set */
2609 
2610   ifs->size += 1;
2611 
2612   BFT_REALLOC(ifs->interfaces,
2613               ifs->size,
2614               cs_interface_t *);
2615 
2616   _interface = _cs_interface_create();
2617 
2618   ifs->interfaces[ifs->size - 1] = _interface;
2619 
2620   /* Build interface */
2621 
2622   _interface->rank = 0;
2623   _interface->size = n_couples;
2624 
2625   _interface->tr_index_size
2626     = fvm_periodicity_get_n_transforms(periodicity) + 2;
2627 
2628   BFT_MALLOC(_interface->tr_index, _interface->tr_index_size, cs_lnum_t);
2629   BFT_MALLOC(_interface->elt_id, _interface->size, cs_lnum_t);
2630   BFT_MALLOC(_interface->match_id, _interface->size, cs_lnum_t);
2631 
2632   /* Count couples for each transform */
2633 
2634   BFT_MALLOC(n_elts_tr, _interface->tr_index_size - 2, cs_lnum_t);
2635 
2636   for (i = 0; i < _interface->tr_index_size - 2; i++)
2637     n_elts_tr[i] = 0;
2638 
2639   for (couple_id = 0; couple_id < n_couples; couple_id++)
2640     n_elts_tr[couples[couple_id*3 + 2]] += 1;
2641 
2642   /* Build index */
2643 
2644   _interface->tr_index[0] = 0;
2645   _interface->tr_index[1] = 0;
2646 
2647   for (i = 2; i < _interface->tr_index_size; i++) {
2648     _interface->tr_index[i] = _interface->tr_index[i-1] + n_elts_tr[i-2];
2649     n_elts_tr[i-2] = 0;
2650   }
2651 
2652   /* Build local and distant correspondants */
2653 
2654   if (global_num == NULL) {
2655 
2656     for (couple_id = 0; couple_id < n_couples; couple_id++) {
2657 
2658       const int tr_id = couples[couple_id*3 + 2];
2659       const cs_lnum_t j = _interface->tr_index[tr_id + 1] + n_elts_tr[tr_id];
2660 
2661       _interface->elt_id[j] = couples[couple_id*3] - 1;
2662       _interface->match_id[j] = couples[couple_id*3 + 1] - 1;
2663 
2664       n_elts_tr[tr_id] += 1;
2665 
2666     }
2667 
2668   }
2669   else {
2670 
2671     cs_lnum_t *renum;
2672     BFT_MALLOC(renum, n_elts, cs_lnum_t);
2673     for (i = 0; i < n_elts; i++) {
2674       cs_lnum_t j = global_num[i] - 1;
2675       assert(j >= 0 && j < n_elts);
2676       renum[j] = i;
2677     }
2678 
2679     for (couple_id = 0; couple_id < n_couples; couple_id++) {
2680 
2681       const int tr_id = couples[couple_id*3 + 2];
2682       const cs_lnum_t j = _interface->tr_index[tr_id + 1] + n_elts_tr[tr_id];
2683 
2684       _interface->elt_id[j] = renum[couples[couple_id*3] - 1];
2685       _interface->match_id[j] = renum[couples[couple_id*3 + 1] - 1];
2686 
2687       n_elts_tr[tr_id] += 1;
2688 
2689     }
2690 
2691     BFT_FREE(renum);
2692   }
2693 
2694   /* Free temporary arrays */
2695 
2696   BFT_FREE(n_elts_tr);
2697   BFT_FREE(couples);
2698 }
2699 
2700 /*----------------------------------------------------------------------------
2701  * Order element id lists and update matching id lists for each
2702  * section of each interface of a set.
2703  *
2704  * This must be called as part of a renumbering algorithm, before
2705  * matching element ids may be replaced by send orderings.
2706  *
2707  * parameters:
2708  *   ifs <-> pointer to interface set
2709  *----------------------------------------------------------------------------*/
2710 
2711 static void
_order_elt_id(cs_interface_set_t * ifs)2712 _order_elt_id(cs_interface_set_t  *ifs)
2713 {
2714   int i;
2715 
2716   for (i = 0; i < ifs->size; i++) {
2717 
2718     int section_id;
2719     cs_lnum_t j;
2720 
2721     int  tr_index_size = 2;
2722     cs_lnum_t  _tr_index[2] = {0, 0};
2723     cs_lnum_t  *order = NULL;
2724     cs_lnum_t  *tmp = NULL;
2725     const cs_lnum_t  *tr_index = _tr_index;
2726 
2727     cs_interface_t *itf = ifs->interfaces[i];
2728 
2729     if (itf == NULL)
2730       return;
2731 
2732     /* When this function is called, a distant-rank interface should have
2733        a match_id array, but not a send_order array */
2734 
2735     assert(itf->send_order == NULL);
2736 
2737     _tr_index[1] = itf->size;
2738 
2739     if (itf->tr_index_size > 0) {
2740       tr_index_size = itf->tr_index_size;
2741       tr_index = itf->tr_index;
2742     }
2743 
2744     BFT_MALLOC(order, tr_index[tr_index_size - 1], cs_lnum_t);
2745     BFT_MALLOC(tmp, tr_index[tr_index_size - 1], cs_lnum_t);
2746 
2747     for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
2748 
2749       cs_lnum_t start_id = tr_index[section_id];
2750       cs_lnum_t l = tr_index[section_id + 1] - start_id;
2751 
2752       /* Now compute distant ordering to build send_order */
2753 
2754       cs_order_lnum_allocated(NULL,
2755                               itf->elt_id + start_id,
2756                               order,
2757                               l);
2758 
2759       /* Update elt_id */
2760 
2761       for (j = 0; j < l; j++)
2762         tmp[j] = itf->elt_id[order[j] + start_id];
2763 
2764       memcpy(itf->elt_id + start_id, tmp, l*sizeof(cs_lnum_t));
2765 
2766       /* Update match_id */
2767 
2768       for (j = 0; j < l; j++)
2769         tmp[j] = itf->match_id[order[j] + start_id];
2770 
2771       memcpy(itf->match_id + start_id, tmp, l*sizeof(cs_lnum_t));
2772 
2773     }
2774 
2775     BFT_FREE(tmp);
2776     BFT_FREE(order);
2777 
2778   }
2779 }
2780 
2781 /*----------------------------------------------------------------------------
2782  * Order interfaces by increasing element id.
2783  *
2784  * parameters:
2785  *   ifs <-> pointer to interface set
2786  *----------------------------------------------------------------------------*/
2787 
2788 static void
_order_by_elt_id(cs_interface_set_t * ifs)2789 _order_by_elt_id(cs_interface_set_t  *ifs)
2790 {
2791   int i;
2792 
2793   for (i = 0; i < ifs->size; i++) {
2794 
2795     int section_id;
2796 
2797     int  tr_index_size = 2;
2798     cs_lnum_t  _tr_index[2] = {0, 0};
2799     cs_lnum_t  *order = NULL, *buffer = NULL;
2800     const cs_lnum_t  *tr_index = _tr_index;
2801 
2802     cs_interface_t *itf = ifs->interfaces[i];
2803 
2804     if (itf == NULL)
2805       return;
2806 
2807     /* When this function is called, a distant-rank interface should have
2808        a match_id array, but not a send_order array */
2809 
2810     assert(itf->send_order == NULL);
2811 
2812     _tr_index[1] = itf->size;
2813 
2814     if (itf->tr_index_size > 0) {
2815       tr_index_size = itf->tr_index_size;
2816       tr_index = itf->tr_index;
2817     }
2818 
2819     BFT_MALLOC(order, tr_index[tr_index_size - 1], cs_lnum_t);
2820     BFT_MALLOC(buffer, tr_index[tr_index_size - 1]*2, cs_lnum_t);
2821 
2822     for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
2823 
2824       cs_lnum_t start_id = tr_index[section_id];
2825       cs_lnum_t end_id = tr_index[section_id + 1];
2826 
2827       /* Now compute distant ordering to build send_order */
2828 
2829       cs_order_lnum_allocated(NULL,
2830                               itf->elt_id + start_id,
2831                               order + start_id,
2832                               end_id - start_id);
2833 
2834       for (cs_lnum_t j = start_id; j < end_id; j++) {
2835         buffer[j*2] = itf->elt_id[j];
2836         buffer[j*2+1] = itf->match_id[j];
2837       }
2838 
2839       for (cs_lnum_t j = start_id; j < end_id; j++) {
2840         cs_lnum_t k = order[j] + start_id;
2841         itf->elt_id[j] = buffer[k*2];
2842         itf->match_id[j] = buffer[k*2+1];
2843       }
2844 
2845     }
2846 
2847     BFT_FREE(buffer);
2848     BFT_FREE(order);
2849 
2850   }
2851 }
2852 
2853 /*----------------------------------------------------------------------------
2854  * Replace array of distant element ids with ordering of list of ids to send,
2855  * so that sends will match receives.
2856  *
2857  * parameters:
2858  *   ifs <-> pointer to interface set
2859  *----------------------------------------------------------------------------*/
2860 
2861 static void
_match_id_to_send_order(cs_interface_set_t * ifs)2862 _match_id_to_send_order(cs_interface_set_t  *ifs)
2863 {
2864   int i;
2865 
2866   for (i = 0; i < ifs->size; i++) {
2867 
2868     int section_id;
2869     cs_lnum_t j, s_id, e_id;
2870 
2871     int  tr_index_size = 2;
2872     cs_lnum_t  _tr_index[2] = {0, 0};
2873     cs_lnum_t  *order = NULL;
2874     const cs_lnum_t  *tr_index = _tr_index;
2875 
2876     cs_interface_t *itf = ifs->interfaces[i];
2877 
2878     if (itf == NULL)
2879       return;
2880 
2881     /* When this function is called, a distant-rank interface should have
2882        a match_id array, but not a send_order array */
2883 
2884     assert(itf->send_order == NULL);
2885 
2886     _tr_index[1] = itf->size;
2887 
2888     if (itf->tr_index_size > 0) {
2889       tr_index_size = itf->tr_index_size;
2890       tr_index = itf->tr_index;
2891     }
2892 
2893     BFT_MALLOC(order, tr_index[tr_index_size - 1], cs_lnum_t);
2894 
2895     for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
2896 
2897       cs_lnum_t start_id = tr_index[section_id];
2898       cs_lnum_t l = tr_index[section_id + 1] - start_id;
2899 
2900       /* Now compute distant ordering to build send_order */
2901 
2902       cs_order_lnum_allocated(NULL,
2903                               itf->match_id + start_id,
2904                               order + start_id,
2905                               l);
2906 
2907     }
2908 
2909     /* Swap match_id and send_order arrays */
2910 
2911     itf->send_order = itf->match_id;
2912     itf->match_id = NULL;
2913 
2914     /* Parallel-only elements */
2915 
2916     s_id = tr_index[0]; e_id = tr_index[1];
2917     for (j = s_id; j < e_id; j++)
2918       itf->send_order[j] = order[j] + s_id;
2919 
2920     /* Periodic elements */
2921 
2922     if (itf->tr_index_size > 0) {
2923 
2924       int tr_id;
2925       cs_lnum_t k = tr_index[1];
2926       const int n_tr = tr_index_size - 2;
2927 
2928       for (tr_id = 0; tr_id < n_tr; tr_id++) {
2929         int r_tr_id = fvm_periodicity_get_reverse_id(ifs->periodicity,
2930                                                      tr_id);
2931         s_id = tr_index[r_tr_id+1];
2932         e_id = tr_index[r_tr_id+2];
2933         for (j = s_id; j < e_id; j++)
2934           itf->send_order[k++] = order[j] + s_id;
2935       }
2936 
2937       assert(k == itf->size);
2938     }
2939 
2940     BFT_FREE(order);
2941 
2942   }
2943 }
2944 
2945 /*----------------------------------------------------------------------------
2946  * Prepare renumbering of elements referenced by an interface set.
2947  *
2948  * This requires replacing the send ordering of interfaces from a set
2949  * with the matching (distant or periodic) element id, to which renumbering
2950  * is applied. The send ordering will be rebuilt later.
2951  *
2952  * For any given element i, a negative old_to_new[i] value means that that
2953  * element does not appear anymore in the new numbering, but the filtering
2954  * is not applied at this stage.
2955  *
2956  * parameters:
2957  *   ifs        <-> pointer to interface set structure
2958  *   old_to_new <-- renumbering array (0 to n-1 numbering)
2959  *----------------------------------------------------------------------------*/
2960 
2961 static void
_set_renumber_update_ids(cs_interface_set_t * ifs,const cs_lnum_t old_to_new[])2962 _set_renumber_update_ids(cs_interface_set_t  *ifs,
2963                          const cs_lnum_t      old_to_new[])
2964 {
2965   int i;
2966   cs_lnum_t j;
2967   int local_rank = 0;
2968   cs_lnum_t *send_buf = NULL;
2969 
2970 #if defined(HAVE_MPI)
2971 
2972   int n_ranks = 1;
2973   int request_count = 0;
2974   MPI_Request *request = NULL;
2975   MPI_Status *status  = NULL;
2976 
2977   if (ifs->comm != MPI_COMM_NULL) {
2978     MPI_Comm_rank(ifs->comm, &local_rank);
2979     MPI_Comm_size(ifs->comm, &n_ranks);
2980   }
2981 
2982 #endif
2983 
2984   assert(ifs != NULL);
2985 
2986 #if defined(HAVE_MPI)
2987 
2988   if (n_ranks > 1)
2989     BFT_MALLOC(send_buf, cs_interface_set_n_elts(ifs), cs_lnum_t);
2990 
2991 #endif /* HAVE_MPI */
2992 
2993   /* Prepare send buffer first (for same rank, send_order is swapped
2994      with match_id directly) */
2995 
2996   for (i = 0, j = 0; i < ifs->size; i++) {
2997 
2998     cs_lnum_t k;
2999     cs_interface_t *itf = ifs->interfaces[i];
3000 
3001     /* When this function is called, a distant-rank interface should have
3002        a send_order array, but not a match_id array */
3003 
3004     assert(itf->match_id == NULL);
3005 
3006     for (k = 0; k < itf->size; k++)
3007       itf->elt_id[k] = old_to_new[itf->elt_id[k]];
3008 
3009     itf->match_id = itf->send_order; /* Pre swap of send_order with match_id */
3010 
3011     if (itf->rank != local_rank)
3012       for (k = 0; k < itf->size; k++) {
3013         send_buf[j + k] = itf->elt_id[itf->send_order[k]];
3014     }
3015     else {
3016       for (k = 0; k < itf->size; k++)
3017         itf->match_id[k] = itf->elt_id[itf->send_order[k]];
3018     }
3019 
3020     itf->send_order = NULL; /* Finish swap of send_order with match_id */
3021 
3022     j += itf->size;
3023 
3024   }
3025 
3026 #if defined(HAVE_MPI)
3027 
3028   /* Now exchange data using MPI */
3029 
3030   if (n_ranks > 1) {
3031 
3032     BFT_MALLOC(request, ifs->size*2, MPI_Request);
3033     BFT_MALLOC(status, ifs->size*2, MPI_Status);
3034 
3035     for (i = 0, j = 0; i < ifs->size; i++) {
3036       cs_interface_t *itf = ifs->interfaces[i];
3037       if (itf->rank != local_rank)
3038         MPI_Irecv(itf->match_id,
3039                   itf->size,
3040                   CS_MPI_LNUM,
3041                   itf->rank,
3042                   itf->rank,
3043                   ifs->comm,
3044                   &(request[request_count++]));
3045       j += itf->size;
3046     }
3047 
3048     for (i = 0, j = 0; i < ifs->size; i++) {
3049       cs_interface_t *itf = ifs->interfaces[i];
3050       if (itf->rank != local_rank)
3051         MPI_Isend(send_buf + j,
3052                   itf->size,
3053                   CS_MPI_LNUM,
3054                   itf->rank,
3055                   local_rank,
3056                   ifs->comm,
3057                   &(request[request_count++]));
3058       j += itf->size;
3059     }
3060 
3061     MPI_Waitall(request_count, request, status);
3062 
3063     BFT_FREE(request);
3064     BFT_FREE(status);
3065     BFT_FREE(send_buf);
3066 
3067   }
3068 
3069 #endif /* defined(HAVE_MPI) */
3070 }
3071 
3072 /*----------------------------------------------------------------------------
3073  * Copy array from distant or matching interface elements to local elements,
3074  * for strided and non-interleaved elements.
3075  *
3076  * The destination arrays defines values for all elements in the
3077  * interface set (i.e. all elements listed by cs_interface_get_elt_ids()
3078  * when looping over interfaces of a set, while the source array
3079  * defines values for all elements of the type associated with the interfaces.
3080  *
3081  * parameters:
3082  *   ifs           <-> pointer to interface set structure
3083  *   datatype      <-- type of data considered
3084  *   n_elts        <-- number of elements associated with interface
3085  *   stride        <-- number of values per entity (interlaced)
3086  *   src           <-- source array (size: cs_interface_set_n_elts(ifs)*stride
3087  *                     or parent array size * stride)
3088  *   dest          <-- destination array
3089  *                     (size: cs_interface_set_n_elts(ifs)*stride)
3090  *----------------------------------------------------------------------------*/
3091 
3092 static void
_interface_set_copy_array_ni(const cs_interface_set_t * ifs,cs_datatype_t datatype,cs_lnum_t n_elts,int stride,const void * src,void * dest)3093 _interface_set_copy_array_ni(const cs_interface_set_t  *ifs,
3094                              cs_datatype_t              datatype,
3095                              cs_lnum_t                  n_elts,
3096                              int                        stride,
3097                              const void                *src,
3098                              void                      *dest)
3099 {
3100   int i;
3101   cs_lnum_t j;
3102   int local_rank = 0;
3103   int type_size = cs_datatype_size[datatype];
3104   int stride_size = cs_datatype_size[datatype]*stride;
3105   cs_lnum_t shift_size = cs_datatype_size[datatype]*n_elts;
3106   unsigned char *send_buf = NULL;
3107   unsigned char *_dest = dest;
3108   const unsigned char *_src = src;
3109 
3110 #if defined(HAVE_MPI)
3111 
3112   int n_ranks = 1;
3113   int request_count = 0;
3114   MPI_Datatype mpi_type = cs_datatype_to_mpi[datatype];
3115   MPI_Request  *request = NULL;
3116   MPI_Status  *status  = NULL;
3117 
3118   if (ifs->comm != MPI_COMM_NULL) {
3119     MPI_Comm_rank(ifs->comm, &local_rank);
3120     MPI_Comm_size(ifs->comm, &n_ranks);
3121   }
3122 
3123 #endif
3124 
3125   assert(ifs != NULL);
3126 
3127   BFT_MALLOC(send_buf,
3128              cs_interface_set_n_elts(ifs)*stride_size,
3129              unsigned char);
3130 
3131   /* Prepare send buffer first (so that src is not used
3132      anymore after this, and may overlap with dest if desired);
3133      for same-rank interface, assign values directly to dest instead */
3134 
3135   for (i = 0, j = 0; i < ifs->size; i++) {
3136 
3137     cs_lnum_t k, l, m;
3138     cs_interface_t *itf = ifs->interfaces[i];
3139     unsigned char *p = send_buf;
3140 
3141     p += j*stride_size;
3142 
3143     for (k = 0; k < itf->size; k++) {
3144       cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
3145       for (m = 0; m < stride; m++) {
3146         for (l = 0; l < type_size; l++)
3147           p[(k*stride + m) * type_size + l]
3148             = _src[send_id*type_size + m*shift_size + l];
3149       }
3150     }
3151 
3152     j += itf->size;
3153 
3154   }
3155 
3156   /* Now exchange data */
3157 
3158 #if defined(HAVE_MPI)
3159 
3160   if (n_ranks > 1) {
3161     BFT_MALLOC(request, ifs->size*2, MPI_Request);
3162     BFT_MALLOC(status, ifs->size*2, MPI_Status);
3163   }
3164 
3165 #endif
3166 
3167   for (i = 0, j = 0; i < ifs->size; i++) {
3168 
3169     cs_interface_t *itf = ifs->interfaces[i];
3170 
3171     if (itf->rank == local_rank)
3172       memcpy(_dest + j*stride_size,
3173              send_buf + j*stride_size,
3174              itf->size*stride_size);
3175 
3176 #if defined(HAVE_MPI)
3177 
3178     else /* if (itf->rank != local_rank) */
3179       MPI_Irecv(_dest + j*stride_size,
3180                 itf->size*stride,
3181                 mpi_type,
3182                 itf->rank,
3183                 itf->rank,
3184                 ifs->comm,
3185                 &(request[request_count++]));
3186 
3187 #endif
3188 
3189     j += itf->size;
3190 
3191   }
3192 
3193 #if defined(HAVE_MPI)
3194 
3195   if (n_ranks > 1) {
3196 
3197     for (i = 0, j = 0; i < ifs->size; i++) {
3198       cs_interface_t *itf = ifs->interfaces[i];
3199       if (itf->rank != local_rank)
3200         MPI_Isend(send_buf + j*stride_size,
3201                   itf->size*stride,
3202                   mpi_type,
3203                   itf->rank,
3204                   local_rank,
3205                   ifs->comm,
3206                   &(request[request_count++]));
3207       j += itf->size;
3208     }
3209 
3210     MPI_Waitall(request_count, request, status);
3211 
3212     BFT_FREE(request);
3213     BFT_FREE(status);
3214 
3215   }
3216 
3217 #endif /* defined(HAVE_MPI) */
3218 
3219   BFT_FREE(send_buf);
3220 }
3221 
3222 /*----------------------------------------------------------------------------*/
3223 /*!
3224  * \brief Apply strided subdivision of elements to id array;
3225  *
3226  * \param[in]  size     original array size
3227  * \param[in]  stride   number of sub-elements per element
3228  * \param[in]  array_o  original array
3229  *
3230  * \return  subdivided array
3231  */
3232 /*----------------------------------------------------------------------------*/
3233 
3234 static cs_lnum_t *
_copy_sub_strided(cs_lnum_t size_old,cs_lnum_t stride,cs_lnum_t * array_o)3235 _copy_sub_strided(cs_lnum_t   size_old,
3236                   cs_lnum_t   stride,
3237                   cs_lnum_t  *array_o)
3238 {
3239   cs_lnum_t  size_new = size_old*stride;
3240 
3241   cs_lnum_t *array_n = NULL;
3242 
3243   if (array_o != NULL) {
3244     BFT_MALLOC(array_n, size_new, cs_lnum_t);
3245     for (cs_lnum_t i = 0; i < size_new; i++)
3246       array_n[i] = array_o[i/stride]*stride + i%stride;
3247   }
3248 
3249   return array_n;
3250 }
3251 
3252 /*----------------------------------------------------------------------------*/
3253 /*!
3254  * \brief Apply bloc subdivision of elements to interface;
3255  *
3256  * The match ids of interfaces should be available for this operation.
3257  *
3258  * \param[in]  o             reference interface
3259  * \param[in]  l_block_size  local block size
3260  * \param[in]  d_block_size  distant block size
3261  * \param[in]  n_blocks      number of blocks in new interface
3262  *
3263  * \return  blocked interface
3264  */
3265 /*----------------------------------------------------------------------------*/
3266 
3267 static cs_interface_t  *
_copy_sub_blocked(cs_interface_t * o,cs_lnum_t l_block_size,cs_lnum_t d_block_size,cs_lnum_t n_blocks)3268 _copy_sub_blocked(cs_interface_t  *o,
3269                   cs_lnum_t        l_block_size,
3270                   cs_lnum_t        d_block_size,
3271                   cs_lnum_t        n_blocks)
3272 {
3273   cs_interface_t *n = _cs_interface_create();
3274 
3275   n->rank = o->rank;
3276   n->size = o->size * n_blocks;
3277 
3278   n->tr_index_size = o->tr_index_size;
3279   if (o->tr_index != NULL) {
3280     BFT_MALLOC(n->tr_index, n->tr_index_size, cs_lnum_t);
3281     for (int j = 0; j < n->tr_index_size; j++)
3282       n->tr_index[j] = o->tr_index[j] * n_blocks;
3283   }
3284 
3285   const int _tr_index[2] = {0, o->size};
3286   const int *tr_index = (o->tr_index != NULL) ? o->tr_index : _tr_index;
3287   const int n_tr = (o->tr_index != NULL) ? o->tr_index_size - 1: 1;
3288 
3289   cs_lnum_t  size_new = o->size * n_blocks;
3290 
3291   if (o->elt_id != NULL) {
3292 
3293     BFT_MALLOC(n->elt_id, size_new, cs_lnum_t);
3294 
3295     cs_lnum_t j = 0;
3296     for (int tr_id = 0; tr_id < n_tr; tr_id++) {
3297       cs_lnum_t s_id = tr_index[tr_id];
3298       cs_lnum_t e_id = tr_index[tr_id+1];
3299       for (cs_lnum_t b_id = 0; b_id < n_blocks; b_id++) {
3300         for (cs_lnum_t i = s_id; i < e_id; i++) {
3301           n->elt_id[j++] = o->elt_id[i] + l_block_size*b_id;
3302         }
3303       }
3304     }
3305 
3306 
3307     BFT_MALLOC(n->match_id, size_new, cs_lnum_t);
3308 
3309     j = 0;
3310     for (int tr_id = 0; tr_id < n_tr; tr_id++) {
3311       cs_lnum_t s_id = tr_index[tr_id];
3312       cs_lnum_t e_id = tr_index[tr_id+1];
3313       for (cs_lnum_t b_id = 0; b_id < n_blocks; b_id++) {
3314         for (cs_lnum_t i = s_id; i < e_id; i++) {
3315           n->match_id[j++] = o->match_id[i] + d_block_size*b_id;
3316         }
3317       }
3318     }
3319   }
3320 
3321   return n;
3322 }
3323 
3324 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
3325 
3326 /*=============================================================================
3327  * Public function definitions
3328  *============================================================================*/
3329 
3330 /*----------------------------------------------------------------------------*/
3331 /*!
3332  * \brief Return process rank associated with an interface's distant elements.
3333  *
3334  * \param[in]  itf  pointer to interface structure
3335  *
3336  * \return  process rank associated with the interface's distant elements
3337  */
3338 /*----------------------------------------------------------------------------*/
3339 
3340 int
cs_interface_rank(const cs_interface_t * itf)3341 cs_interface_rank(const cs_interface_t  *itf)
3342 {
3343   int retval = -1;
3344 
3345   if (itf != NULL)
3346     retval = itf->rank;
3347 
3348   return retval;
3349 }
3350 
3351 /*----------------------------------------------------------------------------*/
3352 /*!
3353  * \brief Return number of local and distant elements defining an interface.
3354  *
3355  * \param[in]  itf  pointer to interface structure
3356  *
3357  * \return  number of local and distant elements defining the interface
3358  */
3359 /*----------------------------------------------------------------------------*/
3360 
3361 cs_lnum_t
cs_interface_size(const cs_interface_t * itf)3362 cs_interface_size(const cs_interface_t  *itf)
3363 {
3364   cs_lnum_t retval = 0;
3365 
3366   if (itf != NULL)
3367     retval = itf->size;
3368 
3369   return retval;
3370 }
3371 
3372 /*----------------------------------------------------------------------------*/
3373 /*!
3374  * \brief Return pointer to array of local element ids defining an interface.
3375  *
3376  * The size of the array may be obtained by cs_interface_size().
3377  * The array is owned by the interface structure, and is not copied
3378  * (hence the constant qualifier for the return value).
3379  *
3380  * \param[in]  itf  pointer to interface structure
3381  *
3382  * \return  pointer to array of local element ids (0 to n-1) defining
3383  * the interface
3384  */
3385 /*----------------------------------------------------------------------------*/
3386 
3387 const cs_lnum_t *
cs_interface_get_elt_ids(const cs_interface_t * itf)3388 cs_interface_get_elt_ids(const cs_interface_t  *itf)
3389 {
3390   const cs_lnum_t *retval = NULL;
3391 
3392   if (itf != NULL)
3393     retval = itf->elt_id;
3394 
3395   return retval;
3396 }
3397 
3398 /*----------------------------------------------------------------------------*/
3399 /*!
3400  * \brief Return pointer to array of matching element ids defining an interface.
3401  *
3402  * This array is only available if cs_interface_set_add_match_ids() has
3403  * been called for the containing interface set.
3404  *
3405  * The size of the array may be obtained by cs_interface_size().
3406  * The array is owned by the interface structure, and is not copied
3407  * (hence the constant qualifier for the return value).
3408  *
3409  * \param[in]  itf  pointer to interface structure
3410  *
3411  * \return  pointer to array of local element ids (0 to n-1) defining
3412  * the interface
3413  */
3414 /*----------------------------------------------------------------------------*/
3415 
3416 const cs_lnum_t *
cs_interface_get_match_ids(const cs_interface_t * itf)3417 cs_interface_get_match_ids(const cs_interface_t  *itf)
3418 {
3419   const cs_lnum_t *retval = NULL;
3420 
3421   if (itf != NULL)
3422     retval = itf->match_id;
3423 
3424   return retval;
3425 }
3426 
3427 /*----------------------------------------------------------------------------*/
3428 /*!
3429  * \brief Return size of index of sub-sections for different transformations.
3430  *
3431  * The index is applicable to both local_num and distant_num arrays,
3432  * with purely parallel equivalences appearing at position 0, and
3433  * equivalences through periodic transform i at position i+1;
3434  * Its size should thus be equal to 1 + number of periodic transforms + 1,
3435  * In absence of periodicity, it may be 0, as the index is not needed.
3436  *
3437  * \param[in]  itf  pointer to interface structure
3438  *
3439  * \return  transform index size for the interface
3440  */
3441 /*----------------------------------------------------------------------------*/
3442 
3443 cs_lnum_t
cs_interface_get_tr_index_size(const cs_interface_t * itf)3444 cs_interface_get_tr_index_size(const cs_interface_t  *itf)
3445 {
3446   cs_lnum_t retval = 0;
3447 
3448   if (itf != NULL)
3449     retval = itf->tr_index_size;
3450 
3451   return retval;
3452 }
3453 
3454 /*----------------------------------------------------------------------------*/
3455 /*!
3456  * \brief Return pointer to index of sub-sections for different transformations.
3457  *
3458  * The index is applicable to both local_num and distant_num arrays,
3459  * with purely parallel equivalences appearing at position 0, and
3460  * equivalences through periodic transform i at position i+1;
3461  * In absence of periodicity, it may be NULL, as it is not needed.
3462  *
3463  * \param[in]  itf  pointer to interface structure
3464  *
3465  * \return  pointer to transform index for the interface
3466  */
3467 /*----------------------------------------------------------------------------*/
3468 
3469 const cs_lnum_t *
cs_interface_get_tr_index(const cs_interface_t * itf)3470 cs_interface_get_tr_index(const cs_interface_t  *itf)
3471 {
3472   const cs_lnum_t *tr_index = 0;
3473 
3474   if (itf != NULL)
3475     tr_index = itf->tr_index;
3476 
3477   return tr_index;
3478 }
3479 
3480 /*----------------------------------------------------------------------------*/
3481 /*!
3482  * \brief Creation of a list of interfaces between elements of a same type.
3483  *
3484  * These interfaces may be used to identify equivalent vertices or faces using
3485  * domain splitting, as well as periodic elements (on the same or on
3486  * distant ranks).
3487  *
3488  * Note that periodicity information will be completed and made consistent
3489  * based on the input, so that if a periodic couple is defined on a given rank,
3490  * the reverse couple wil be defined, whether it is also defined on the same
3491  * or a different rank.
3492  *
3493  * In addition, multiple periodicity interfaces will be built automatically
3494  * if the periodicity structure provides for composed periodicities, so they
3495  * need not be defined prior to this function being called.
3496  *
3497  * \param[in]  n_elts                 number of local elements considered
3498  *                                    (size of parent_element_id[])
3499  * \param[in]  parent_element_id      pointer to list of selected elements
3500  *                                    local ids (0 to n-1), or NULL if all
3501  *                                    first n_elts elements are used
3502  * \param[in]  global_number          pointer to list of global (i.e. domain
3503  *                                    splitting independent) element numbers
3504  * \param[in]  periodicity            periodicity information (NULL if none)
3505  * \param[in]  n_periodic_lists       number of periodic lists (may be local)
3506  * \param[in]  periodicity_num        periodicity number (1 to n) associated
3507  *                                    with each periodic list (primary
3508  *                                    periodicities only)
3509  * \param[in]  n_periodic_couples     number of periodic couples associated
3510  *                                    with each periodic list
3511  * \param[in]  periodic_couples       array indicating periodic couples
3512  *                                    (interlaced, using global numberings)
3513  *                                    for each list
3514  *
3515  * \return  pointer to list of interfaces (possibly NULL in serial mode)
3516  */
3517 /*----------------------------------------------------------------------------*/
3518 
3519 cs_interface_set_t *
cs_interface_set_create(cs_lnum_t n_elts,const cs_lnum_t parent_element_id[],const cs_gnum_t global_number[],const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[])3520 cs_interface_set_create(cs_lnum_t                 n_elts,
3521                         const cs_lnum_t           parent_element_id[],
3522                         const cs_gnum_t           global_number[],
3523                         const fvm_periodicity_t  *periodicity,
3524                         int                       n_periodic_lists,
3525                         const int                 periodicity_num[],
3526                         const cs_lnum_t           n_periodic_couples[],
3527                         const cs_gnum_t    *const periodic_couples[])
3528 {
3529   cs_interface_set_t  *ifs;
3530 
3531   /* Initial checks */
3532 
3533   if (   (cs_glob_n_ranks < 2)
3534       && (periodicity == NULL || n_periodic_lists == 0))
3535     return NULL;
3536 
3537   /* Create structure */
3538 
3539   BFT_MALLOC(ifs, 1, cs_interface_set_t);
3540   ifs->size = 0;
3541   ifs->interfaces = NULL;
3542   ifs->periodicity = periodicity;
3543   ifs->match_id_rc = 0;
3544 
3545   const cs_gnum_t  *global_num = global_number;
3546 
3547   cs_gnum_t  *_global_num = NULL;
3548 
3549   if (global_number != NULL && parent_element_id != NULL) {
3550 
3551     BFT_MALLOC(_global_num, n_elts, cs_gnum_t);
3552 
3553     for (size_t i = 0 ; i < (size_t)n_elts ; i++)
3554       _global_num[i] = global_number[parent_element_id[i]];
3555 
3556     global_num = _global_num;
3557 
3558   }
3559 
3560   /* Build interfaces */
3561 
3562 #if defined(HAVE_MPI)
3563 
3564   ifs->comm = cs_glob_mpi_comm;
3565 
3566   if (cs_glob_n_ranks > 1) {
3567 
3568     /* Build interfaces */
3569 
3570     if (periodicity == NULL)
3571       _add_global_equiv(ifs,
3572                         n_elts,
3573                         global_num,
3574                         cs_glob_mpi_comm);
3575 
3576     else
3577       _add_global_equiv_periodic(ifs,
3578                                  n_elts,
3579                                  global_num,
3580                                  periodicity,
3581                                  n_periodic_lists,
3582                                  periodicity_num,
3583                                  n_periodic_couples,
3584                                  periodic_couples,
3585                                  cs_glob_mpi_comm);
3586 
3587   }
3588 
3589 #endif /* defined(HAVE_MPI) */
3590 
3591   if (cs_glob_n_ranks == 1 && (periodicity != NULL && n_periodic_lists > 0)) {
3592 
3593     _add_global_equiv_periodic_sp(ifs,
3594                                   n_elts,
3595                                   global_num,
3596                                   periodicity,
3597                                   n_periodic_lists,
3598                                   periodicity_num,
3599                                   n_periodic_couples,
3600                                   periodic_couples);
3601 
3602 
3603   }
3604 
3605   BFT_FREE(_global_num);
3606 
3607   /* Finish preparation of interface set and return */
3608 
3609   _order_by_elt_id(ifs);
3610   _match_id_to_send_order(ifs);
3611 
3612   return ifs;
3613 }
3614 
3615 /*----------------------------------------------------------------------------*/
3616 /*!
3617  * \brief Destruction of an interface set.
3618  *
3619  * \param[in, out]  ifs  pointer to pointer to structure to destroy
3620  */
3621 /*----------------------------------------------------------------------------*/
3622 
3623 void
cs_interface_set_destroy(cs_interface_set_t ** ifs)3624 cs_interface_set_destroy(cs_interface_set_t  **ifs)
3625 {
3626   int i;
3627   cs_interface_set_t  *itfs = *ifs;
3628 
3629   if (itfs != NULL) {
3630     for (i = 0; i < itfs->size; i++) {
3631       _cs_interface_destroy(&(itfs->interfaces[i]));
3632     }
3633     BFT_FREE(itfs->interfaces);
3634     BFT_FREE(itfs);
3635     *ifs = itfs;
3636   }
3637 }
3638 
3639 /*----------------------------------------------------------------------------*/
3640 /*!
3641  * \brief Duplicate an interface set, applying an optional constant stride.
3642  *
3643  * \param[in, out]  ifs     pointer to interface set structure
3644  * \param[in]       stride  if > 1, each element subdivided in stride elements
3645  *
3646  * \return  pointer to new interface set
3647  */
3648 /*----------------------------------------------------------------------------*/
3649 
3650 cs_interface_set_t  *
cs_interface_set_dup(const cs_interface_set_t * ifs,cs_lnum_t stride)3651 cs_interface_set_dup(const cs_interface_set_t  *ifs,
3652                      cs_lnum_t                  stride)
3653 {
3654   cs_interface_set_t  *ifs_new = NULL;
3655 
3656   if (ifs == NULL)
3657     return ifs_new;
3658 
3659   if (stride < 1)
3660     stride = 1;
3661 
3662   BFT_MALLOC(ifs_new, 1, cs_interface_set_t);
3663 
3664   ifs_new->size = ifs->size;
3665   ifs_new->periodicity = ifs->periodicity;
3666   ifs_new->match_id_rc = 0;
3667 
3668 #if defined(HAVE_MPI)
3669   ifs_new->comm = ifs->comm;
3670 #endif
3671 
3672   BFT_MALLOC(ifs_new->interfaces,
3673              ifs->size,
3674              cs_interface_t *);
3675 
3676   /* Loop on interfaces */
3677 
3678   for (int i = 0; i < ifs->size; i++) {
3679 
3680     cs_interface_t *o = ifs->interfaces[i];
3681     cs_interface_t *n = _cs_interface_create();
3682 
3683     n->rank = o->rank;
3684     n->size = o->size * stride;
3685 
3686     n->tr_index_size = o->tr_index_size;
3687     if (o->tr_index != NULL) {
3688       BFT_MALLOC(n->tr_index, n->tr_index_size, cs_lnum_t);
3689       for (int j = 0; j < n->tr_index_size; j++)
3690         n->tr_index[j] = o->tr_index[j] * stride;
3691     }
3692 
3693     n->elt_id = _copy_sub_strided(o->size, stride, o->elt_id);
3694     n->send_order = _copy_sub_strided(o->size, stride, o->send_order);
3695 
3696     n->match_id = NULL;
3697 
3698     ifs_new->interfaces[i] = n;
3699   }
3700 
3701   return ifs_new;
3702 }
3703 
3704 /*----------------------------------------------------------------------------*/
3705 /*!
3706  * \brief Duplicate an interface set for coupled variable blocks.
3707  *
3708  * \param[in, out]  ifs         pointer to interface set structure
3709  * \param[in]       block_size  local block size (number of elements)
3710  * \param[in]       n_blocks    number of associated blocks
3711  *
3712  * \return  pointer to new interface set
3713  */
3714 /*----------------------------------------------------------------------------*/
3715 
3716 cs_interface_set_t  *
cs_interface_set_dup_blocks(cs_interface_set_t * ifs,cs_lnum_t block_size,cs_lnum_t n_blocks)3717 cs_interface_set_dup_blocks(cs_interface_set_t  *ifs,
3718                             cs_lnum_t            block_size,
3719                             cs_lnum_t            n_blocks)
3720 {
3721   cs_interface_set_t  *ifs_new = NULL;
3722 
3723   if (ifs == NULL)
3724     return ifs_new;
3725 
3726   if (n_blocks < 1)
3727     n_blocks = 1;
3728 
3729   BFT_MALLOC(ifs_new, 1, cs_interface_set_t);
3730   ifs->match_id_rc = 0;
3731 
3732   ifs_new->size = ifs->size;
3733   ifs_new->periodicity = ifs->periodicity;
3734 
3735   cs_lnum_t *d_block_size;
3736   BFT_MALLOC(d_block_size, ifs->size, cs_lnum_t);
3737 
3738   int n_ranks = 1;
3739 
3740 #if defined(HAVE_MPI)
3741 
3742   ifs_new->comm = ifs->comm;
3743 
3744   /* Exchange block sizes */
3745 
3746   int request_count = 0, local_rank = -1;
3747   MPI_Request *request = NULL;
3748   MPI_Status *status  = NULL;
3749 
3750   if (ifs->comm != MPI_COMM_NULL) {
3751     MPI_Comm_rank(ifs->comm, &local_rank);
3752     MPI_Comm_size(ifs->comm, &n_ranks);
3753   }
3754 
3755   if (n_ranks > 1) {
3756 
3757     cs_lnum_t send_buf[1] = {block_size};
3758 
3759     BFT_MALLOC(request, ifs->size*2, MPI_Request);
3760     BFT_MALLOC(status, ifs->size*2, MPI_Status);
3761 
3762     for (int i = 0; i < ifs->size; i++) {
3763       cs_interface_t *itf = ifs->interfaces[i];
3764       if (itf->rank != local_rank)
3765         MPI_Irecv(d_block_size + i,
3766                   1,
3767                   CS_MPI_LNUM,
3768                   itf->rank,
3769                   itf->rank,
3770                   ifs->comm,
3771                   &(request[request_count++]));
3772       else
3773         d_block_size[i] = block_size;
3774     }
3775 
3776     for (int i = 0; i < ifs->size; i++) {
3777       cs_interface_t *itf = ifs->interfaces[i];
3778       if (itf->rank != local_rank)
3779         MPI_Isend(send_buf,
3780                   1,
3781                   CS_MPI_LNUM,
3782                   itf->rank,
3783                   local_rank,
3784                   ifs->comm,
3785                   &(request[request_count++]));
3786     }
3787 
3788     MPI_Waitall(request_count, request, status);
3789 
3790     BFT_FREE(request);
3791     BFT_FREE(status);
3792 
3793   }
3794 
3795 #endif /* defined(HAVE_MPI) */
3796 
3797   if (n_ranks <= 1 && ifs->size > 0) {
3798     cs_interface_t *itf = ifs->interfaces[0];
3799 
3800     assert(ifs->size <= 1);
3801     assert(itf->rank == 0);
3802 
3803     d_block_size[0] = block_size;
3804   }
3805 
3806   /* Ensure match ids are available on reference interface */
3807 
3808   cs_interface_set_add_match_ids(ifs);
3809 
3810   /* Build new interface
3811      ------------------- */
3812 
3813   BFT_MALLOC(ifs_new->interfaces,
3814              ifs->size,
3815              cs_interface_t *);
3816 
3817   /* Loop on interfaces */
3818 
3819   for (int i = 0; i < ifs->size; i++) {
3820     cs_interface_t *o = ifs->interfaces[i];
3821     ifs_new->interfaces[i]
3822       = _copy_sub_blocked(o, block_size, d_block_size[i], n_blocks);
3823   }
3824 
3825   /* Free memory */
3826 
3827   cs_interface_set_free_match_ids(ifs);
3828 
3829   BFT_FREE(d_block_size);
3830 
3831   /* Finish preparation of interface set and return */
3832 
3833   _match_id_to_send_order(ifs_new);
3834 
3835   return ifs_new;
3836 }
3837 
3838 /*----------------------------------------------------------------------------*/
3839 /*!
3840  * \brief Return number of interfaces associated with an interface set.
3841  *
3842  * \param[in]  ifs  pointer to interface set structure
3843  *
3844  * \return  number of interfaces in set
3845  */
3846 /*----------------------------------------------------------------------------*/
3847 
3848 int
cs_interface_set_size(const cs_interface_set_t * ifs)3849 cs_interface_set_size(const cs_interface_set_t  *ifs)
3850 {
3851   int retval = 0;
3852 
3853   if (ifs != NULL)
3854     retval = ifs->size;
3855 
3856   return retval;
3857 }
3858 
3859 /*----------------------------------------------------------------------------*/
3860 /*!
3861  * \brief Return total number of elements in interface set.
3862  *
3863  * This is equal to the sum of cs_interface_size() on the cs_interface_size()
3864  * interfaces of a set.
3865  *
3866  * \param[in]  ifs  pointer to interface set structure
3867  *
3868  * \return  number of interfaces in set
3869  */
3870 /*----------------------------------------------------------------------------*/
3871 
3872 cs_lnum_t
cs_interface_set_n_elts(const cs_interface_set_t * ifs)3873 cs_interface_set_n_elts(const cs_interface_set_t  *ifs)
3874 {
3875   int i;
3876 
3877   cs_lnum_t retval = 0;
3878 
3879   for (i = 0; i < ifs->size; i++)
3880     retval += (ifs->interfaces[i])->size;
3881 
3882   return retval;
3883 }
3884 
3885 /*----------------------------------------------------------------------------*/
3886 /*!
3887  * \brief Return pointer to a given interface in an interface set.
3888  *
3889  * \param[in]  ifs          <-- pointer to interface set structure
3890  * \param[in]  interface_id <-- index of interface in set (0 to n-1)
3891  *
3892  * \return  pointer to interface structure
3893  */
3894 /*----------------------------------------------------------------------------*/
3895 
3896 const cs_interface_t *
cs_interface_set_get(const cs_interface_set_t * ifs,int interface_id)3897 cs_interface_set_get(const cs_interface_set_t  *ifs,
3898                      int                        interface_id)
3899 {
3900   const cs_interface_t  *retval = NULL;
3901 
3902   if (ifs != NULL) {
3903     if (interface_id > -1 && interface_id < ifs->size)
3904       retval = ifs->interfaces[interface_id];
3905   }
3906 
3907   return retval;
3908 }
3909 
3910 /*----------------------------------------------------------------------------*/
3911 /*!
3912  * \brief Return pointer to the periocicity structure associated of an
3913  * interface set.
3914  *
3915  * \param[in]  ifs  pointer to interface set structure
3916  *
3917  * \return  pointer to periodicity structure, or NULL
3918  */
3919 /*----------------------------------------------------------------------------*/
3920 
3921 const fvm_periodicity_t *
cs_interface_set_periodicity(const cs_interface_set_t * ifs)3922 cs_interface_set_periodicity(const cs_interface_set_t  *ifs)
3923 {
3924   const fvm_periodicity_t  *retval = NULL;
3925 
3926   if (ifs != NULL)
3927     retval = ifs->periodicity;
3928 
3929   return retval;
3930 }
3931 
3932 /*----------------------------------------------------------------------------*/
3933 /*!
3934  * \brief Apply renumbering of elements referenced by an interface set.
3935  *
3936  * For any given element i, a negative old_to_new[i] value means that that
3937  * element does not appear anymore in the new numbering.
3938  *
3939  * \param[in, out]  ifs         pointer to interface set structure
3940  * \param[in]       old_to_new  renumbering array (0 to n-1 numbering)
3941  */
3942 /*----------------------------------------------------------------------------*/
3943 
3944 void
cs_interface_set_renumber(cs_interface_set_t * ifs,const cs_lnum_t old_to_new[])3945 cs_interface_set_renumber(cs_interface_set_t  *ifs,
3946                           const cs_lnum_t      old_to_new[])
3947 {
3948   int i;
3949   int n_interfaces = 0;
3950 
3951   assert(ifs != NULL);
3952   assert(old_to_new != NULL);
3953 
3954   /* Compute new element and match ids */
3955 
3956   _set_renumber_update_ids(ifs, old_to_new);
3957 
3958   _order_elt_id(ifs);
3959 
3960   /* Remove references to elements not appearing anymore */
3961 
3962   for (i = 0; i < ifs->size; i++) {
3963 
3964     cs_lnum_t j, k;
3965     cs_interface_t *itf = ifs->interfaces[i];
3966 
3967     cs_lnum_t *loc_id = itf->elt_id;
3968     cs_lnum_t *dist_id = itf->match_id;
3969 
3970     if (itf->tr_index_size == 0) {
3971       for (j = 0, k = 0; j < itf->size; j++) {
3972         if (loc_id[j] > -1 && dist_id[j] > -1) {
3973           loc_id[k] = loc_id[j];
3974           dist_id[k] = dist_id[j];
3975           k += 1;
3976         }
3977       }
3978     }
3979     else {
3980       int tr_id;
3981       cs_lnum_t end_id = itf->tr_index[0];
3982       k = 0;
3983       for (tr_id = 0; tr_id < itf->tr_index_size - 1; tr_id++) {
3984         cs_lnum_t start_id = end_id;
3985         end_id = itf->tr_index[tr_id + 1];
3986         for (j = start_id; j < end_id; j++) {
3987           if (loc_id[j] > -1 && dist_id[j] > -1) {
3988             loc_id[k] = loc_id[j];
3989             dist_id[k] = dist_id[j];
3990             k += 1;
3991           }
3992         }
3993         itf->tr_index[tr_id + 1] = k;
3994       }
3995     }
3996 
3997     if (k < itf->size) {
3998       if (k > 0) {
3999         itf->size = k;
4000         BFT_REALLOC(itf->elt_id, k, cs_lnum_t);
4001         BFT_REALLOC(itf->match_id, k, cs_lnum_t);
4002       }
4003       else {
4004         BFT_FREE(itf->elt_id);
4005         BFT_FREE(itf->match_id);
4006         BFT_FREE(ifs->interfaces[i]);
4007       }
4008     }
4009   }
4010 
4011   for (i = 0, n_interfaces = 0; i < ifs->size; i++) {
4012     if (ifs->interfaces[i] != NULL)
4013       ifs->interfaces[n_interfaces++] = ifs->interfaces[i];
4014   }
4015 
4016   if (n_interfaces < ifs->size) {
4017     BFT_REALLOC(ifs->interfaces,
4018                 n_interfaces,
4019                 cs_interface_t *);
4020     ifs->size = n_interfaces;
4021   }
4022 
4023   _match_id_to_send_order(ifs);
4024 }
4025 
4026 /*----------------------------------------------------------------------------*/
4027 /*!
4028  * \brief Copy array from distant or matching interface elements to
4029  * local elements.
4030  *
4031  * Source and destination arrays define values for all elements in the
4032  * interface set (i.e. all elements listed by cs_interface_get_elt_ids()
4033  * when looping over interfaces of a set.
4034  *
4035  * \param[in]   ifs            pointer to interface set structure
4036  * \param[in]   datatype       type of data considered
4037  * \param[in]   stride         number of values per entity (interlaced)
4038  * \param[in]   src_on_parent  true if source array is defined on the elements
4039  *                             defined by ifs->elt_ids, false if source array
4040  *                             defined directly on cs_interface_set_n_elts(ifs)
4041  * \param[in]   src            source array (size:
4042  *                             cs_interface_set_n_elts(ifs)*stride
4043  *                             or parent array size * stride)
4044  * \param[out]  dest           destination array (size:
4045  *                             cs_interface_set_n_elts(ifs)*stride)
4046  */
4047 /*----------------------------------------------------------------------------*/
4048 
4049 void
cs_interface_set_copy_array(const cs_interface_set_t * ifs,cs_datatype_t datatype,int stride,bool src_on_parent,const void * src,void * dest)4050 cs_interface_set_copy_array(const cs_interface_set_t  *ifs,
4051                             cs_datatype_t              datatype,
4052                             int                        stride,
4053                             bool                       src_on_parent,
4054                             const void                *src,
4055                             void                      *dest)
4056 {
4057   int i;
4058   cs_lnum_t j;
4059   int local_rank = 0;
4060   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4061   unsigned char *send_buf = NULL;
4062   unsigned char *_dest = dest;
4063   const unsigned char *_src = src;
4064 
4065 #if defined(HAVE_MPI)
4066 
4067   int n_ranks = 1;
4068   int request_count = 0;
4069   MPI_Datatype mpi_type = cs_datatype_to_mpi[datatype];
4070   MPI_Request  *request = NULL;
4071   MPI_Status  *status  = NULL;
4072 
4073   if (ifs->comm != MPI_COMM_NULL) {
4074     MPI_Comm_rank(ifs->comm, &local_rank);
4075     MPI_Comm_size(ifs->comm, &n_ranks);
4076   }
4077 
4078 #endif
4079 
4080   assert(ifs != NULL);
4081 
4082   BFT_MALLOC(send_buf,
4083              cs_interface_set_n_elts(ifs)*stride_size,
4084              unsigned char);
4085 
4086   /* Prepare send buffer first (so that src is not used
4087      anymore after this, and may overlap with dest if desired); */
4088 
4089   for (i = 0, j = 0; i < ifs->size; i++) {
4090 
4091     cs_lnum_t k, l;
4092     cs_interface_t *itf = ifs->interfaces[i];
4093     unsigned char *p = send_buf;
4094 
4095     p += j*stride_size;
4096 
4097     if (src_on_parent) {
4098       for (k = 0; k < itf->size; k++) {
4099         cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
4100         for (l = 0; l < stride_size; l++)
4101           p[k*stride_size + l] = _src[send_id*stride_size + l];
4102       }
4103     }
4104     else {
4105       for (k = 0; k < itf->size; k++) {
4106         cs_lnum_t send_id = itf->send_order[k] + j;
4107         for (l = 0; l < stride_size; l++)
4108           p[k*stride_size + l] = _src[send_id*stride_size + l];
4109       }
4110     }
4111 
4112     j += itf->size;
4113 
4114   }
4115 
4116   /* Now exchange data */
4117 
4118 #if defined(HAVE_MPI)
4119   if (n_ranks > 1) {
4120     BFT_MALLOC(request, ifs->size*2, MPI_Request);
4121     BFT_MALLOC(status, ifs->size*2, MPI_Status);
4122   }
4123 #endif
4124 
4125   for (i = 0, j = 0; i < ifs->size; i++) {
4126 
4127     cs_interface_t *itf = ifs->interfaces[i];
4128 
4129     if (itf->rank == local_rank)
4130       memcpy(_dest + j*stride_size,
4131              send_buf + j*stride_size,
4132              itf->size*stride_size);
4133 
4134 #if defined(HAVE_MPI)
4135 
4136     else /* if (itf->rank != local_rank) */
4137         MPI_Irecv(_dest + j*stride_size,
4138                   itf->size*stride,
4139                   mpi_type,
4140                   itf->rank,
4141                   itf->rank,
4142                   ifs->comm,
4143                   &(request[request_count++]));
4144 
4145 #endif
4146 
4147     j += itf->size;
4148 
4149   }
4150 
4151 #if defined(HAVE_MPI)
4152 
4153   if (n_ranks > 1) {
4154 
4155     for (i = 0, j = 0; i < ifs->size; i++) {
4156       cs_interface_t *itf = ifs->interfaces[i];
4157       if (itf->rank != local_rank)
4158         MPI_Isend(send_buf + j*stride_size,
4159                   itf->size*stride,
4160                   mpi_type,
4161                   itf->rank,
4162                   local_rank,
4163                   ifs->comm,
4164                   &(request[request_count++]));
4165       j += itf->size;
4166     }
4167 
4168     MPI_Waitall(request_count, request, status);
4169 
4170     BFT_FREE(request);
4171     BFT_FREE(status);
4172 
4173   }
4174 
4175 #endif /* defined(HAVE_MPI) */
4176 
4177   BFT_FREE(send_buf);
4178 }
4179 
4180 /*----------------------------------------------------------------------------*/
4181 /*!
4182  * \brief Copy indexed array from distant or matching interface elements to
4183  * local elements.
4184  *
4185  * Source and destination arrays define values for all elements in the
4186  * interface set (i.e. all elements listed by cs_interface_get_elt_ids()
4187  * when looping over interfaces of a set.
4188  *
4189  * Note that when copying the same type of data to all matching elements,
4190  * the source and destination index may be the same, if src_on_parent is true.
4191  * To avoid requiring a separate destination index, the dest_index argument
4192  * may be set to NULL, in which case it is assumed that source and destination
4193  * are symmetric, and src_index is sufficient to determine sizes (whether
4194  * src_on_parent is true or not).
4195  *
4196  * In some use cases, for example when copying values only in one direction,
4197  * the copying is not symmetric, so both a source and destination buffer must
4198  * be provided.
4199  *
4200  * \param[in]   ifs            pointer to interface set structure
4201  * \param[in]   datatype       type of data considered
4202  * \param[in]   src_on_parent  true if source array is defined on the elements
4203  *                             defined by ifs->elt_ids, false if source array
4204  *                             defined directly on cs_interface_set_n_elts(ifs)
4205  * \param[in]   src_index      index for source array
4206  * \param[in]   dest_index     index for destination array, or NULL
4207  * \param[in]   src            source array (size:
4208  *                             src_index[cs_interface_set_n_elts(ifs)]
4209  *                             or parent array size)
4210  * \param[out]  dest           destination array (size:
4211  *                             src_index[cs_interface_set_n_elts(ifs)] or
4212  *                             dest_index[cs_interface_set_n_elts(ifs)])
4213  */
4214 /*----------------------------------------------------------------------------*/
4215 
4216 void
cs_interface_set_copy_indexed(const cs_interface_set_t * ifs,cs_datatype_t datatype,bool src_on_parent,const cs_lnum_t src_index[],const cs_lnum_t dest_index[],const void * src,void * dest)4217 cs_interface_set_copy_indexed(const cs_interface_set_t  *ifs,
4218                               cs_datatype_t              datatype,
4219                               bool                       src_on_parent,
4220                               const cs_lnum_t            src_index[],
4221                               const cs_lnum_t            dest_index[],
4222                               const void                *src,
4223                               void                      *dest)
4224 {
4225   int i;
4226   cs_lnum_t j;
4227   int local_rank = 0;
4228   int type_size = cs_datatype_size[datatype];
4229   cs_lnum_t send_size = 0, itf_index_size = 0;
4230   cs_lnum_t *itf_index = NULL, *itf_s_index = NULL, *itf_r_index = NULL;
4231   unsigned char *send_buf = NULL;
4232   unsigned char *_dest = dest;
4233   const unsigned char *_src = src;
4234 
4235 #if defined(HAVE_MPI)
4236 
4237   int n_ranks = 1;
4238   int request_count = 0;
4239   MPI_Datatype mpi_type = cs_datatype_to_mpi[datatype];
4240   MPI_Request  *request = NULL;
4241   MPI_Status  *status  = NULL;
4242 
4243   if (ifs->comm != MPI_COMM_NULL) {
4244     MPI_Comm_rank(ifs->comm, &local_rank);
4245     MPI_Comm_size(ifs->comm, &n_ranks);
4246   }
4247 
4248 #endif
4249 
4250   assert(ifs != NULL);
4251 
4252   /* Count number of elements to send or receive */
4253 
4254   itf_index_size = ifs->size + 1;
4255   if (dest_index != NULL)
4256     itf_index_size *= 2;
4257 
4258   BFT_MALLOC(itf_index, 2*(ifs->size + 1), cs_lnum_t);
4259   itf_s_index = itf_index;
4260   itf_s_index[0] = 0;
4261 
4262   if (src_on_parent) {
4263     for (i = 0, j = 0; i < ifs->size; i++) {
4264       cs_lnum_t k;
4265       cs_interface_t *itf = ifs->interfaces[i];
4266       for (k = 0; k < itf->size; k++) {
4267         cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
4268         send_size += src_index[send_id+1] - src_index[send_id];
4269       }
4270       itf_s_index[i+1] = send_size;
4271     }
4272   }
4273   else {
4274     for (i = 0, j = 0; i < ifs->size; i++) {
4275       cs_interface_t *itf = ifs->interfaces[i];
4276       j += itf->size;
4277       itf_s_index[i+1] = src_index[j];
4278     }
4279     send_size = itf_s_index[ifs->size];
4280   }
4281 
4282   if (dest_index != NULL) {
4283     itf_r_index = itf_index + ifs->size + 1;
4284     itf_r_index[0] = 0;
4285     for (i = 0, j = 0; i < ifs->size; i++) {
4286       cs_interface_t *itf = ifs->interfaces[i];
4287       j += itf->size;
4288       itf_r_index[i+1] = dest_index[j];
4289     }
4290   }
4291   else
4292     itf_r_index = itf_s_index;
4293 
4294   BFT_MALLOC(send_buf, send_size*type_size, unsigned char);
4295 
4296   /* Prepare send buffer first (so that src is not used
4297      anymore after this, and may overlap with dest if desired); */
4298 
4299   for (i = 0, j = 0; i < ifs->size; i++) {
4300 
4301     cs_lnum_t k, l, m;
4302     cs_interface_t *itf = ifs->interfaces[i];
4303     unsigned char *p = send_buf + (itf_s_index[i]*type_size);
4304 
4305     if (src_on_parent) {
4306       for (k = 0, l = 0; k < itf->size; k++) {
4307         cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
4308         cs_lnum_t start_id = src_index[send_id]*type_size;
4309         cs_lnum_t end_id = src_index[send_id+1]*type_size;
4310         for (m = start_id; m < end_id; m++)
4311           p[l++] = _src[m];
4312       }
4313     }
4314     else {
4315       for (k = 0, l = 0; k < itf->size; k++) {
4316         cs_lnum_t send_id = itf->send_order[k] + j;
4317         cs_lnum_t start_id = src_index[send_id]*type_size;
4318         cs_lnum_t end_id = src_index[send_id+1]*type_size;
4319         for (m = start_id; m < end_id; m++)
4320           p[l++] = _src[m];
4321       }
4322       j += itf->size;
4323     }
4324   }
4325 
4326   /* Now exchange data */
4327 
4328 #if defined(HAVE_MPI)
4329   if (n_ranks > 1) {
4330     BFT_MALLOC(request, ifs->size*2, MPI_Request);
4331     BFT_MALLOC(status, ifs->size*2, MPI_Status);
4332   }
4333 #endif
4334 
4335   for (i = 0, j = 0; i < ifs->size; i++) {
4336 
4337     cs_interface_t *itf = ifs->interfaces[i];
4338     cs_lnum_t r_buf_shift = itf_r_index[i]*type_size;
4339 
4340     if (itf->rank == local_rank) {
4341       cs_lnum_t s_buf_shift = itf_s_index[i]*type_size;
4342       int msg_size = (itf_s_index[i+1]-itf_s_index[i])*type_size;
4343       memcpy(_dest + r_buf_shift, send_buf + s_buf_shift, msg_size);
4344     }
4345 
4346 #if defined(HAVE_MPI)
4347 
4348     else /* if (itf->rank != local_rank) */
4349       MPI_Irecv(_dest + r_buf_shift,
4350                 (itf_r_index[i+1]-itf_r_index[i]),
4351                 mpi_type,
4352                 itf->rank,
4353                 itf->rank,
4354                 ifs->comm,
4355                 &(request[request_count++]));
4356 
4357 #endif
4358 
4359     j += itf->size;
4360 
4361   }
4362 
4363 #if defined(HAVE_MPI)
4364 
4365   if (n_ranks > 1) {
4366 
4367     for (i = 0, j = 0; i < ifs->size; i++) {
4368       cs_interface_t *itf = ifs->interfaces[i];
4369       cs_lnum_t s_buf_shift = itf_s_index[i]*type_size;
4370         if (itf->rank != local_rank)
4371           MPI_Isend(send_buf + s_buf_shift,
4372                     (itf_s_index[i+1]-itf_s_index[i]),
4373                     mpi_type,
4374                     itf->rank,
4375                     local_rank,
4376                     ifs->comm,
4377                     &(request[request_count++]));
4378     }
4379 
4380     MPI_Waitall(request_count, request, status);
4381 
4382     BFT_FREE(request);
4383     BFT_FREE(status);
4384 
4385   }
4386 
4387 #endif /* defined(HAVE_MPI) */
4388 
4389   BFT_FREE(send_buf);
4390   BFT_FREE(itf_index);
4391 }
4392 
4393 /*----------------------------------------------------------------------------*/
4394 /*!
4395  * \brief Update values using the bitwise inclusive or operation for elements
4396  *        associated with an interface set.
4397  *
4398  * On input, the variable array should contain local contributions. On output,
4399  * contributions from matching elements on parallel or periodic boundaries
4400  * have been processed.
4401  *
4402  * Only the values of elements belonging to the interfaces are modified.
4403  *
4404  * \param[in]       ifs        pointer to a fvm_interface_set_t structure
4405  * \param[in]       n_elts     number of elements in var buffer
4406  * \param[in]       stride     number of values (non interlaced) by entity
4407  * \param[in]       interlace  true if variable is interlaced (for stride > 1)
4408  * \param[in]       datatype   type of data considered
4409  * \param[in, out]  var        variable buffer
4410  */
4411 /*----------------------------------------------------------------------------*/
4412 
4413 void
cs_interface_set_inclusive_or(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)4414 cs_interface_set_inclusive_or(const cs_interface_set_t  *ifs,
4415                               cs_lnum_t                  n_elts,
4416                               cs_lnum_t                  stride,
4417                               bool                       interlace,
4418                               cs_datatype_t              datatype,
4419                               void                      *var)
4420 {
4421   int i;
4422   cs_lnum_t j, k, l;
4423   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4424   unsigned char *buf = NULL;
4425 
4426   BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
4427 
4428   if (stride < 2 || interlace)
4429     cs_interface_set_copy_array(ifs,
4430                                 datatype,
4431                                 stride,
4432                                 true, /* src_on_parent */
4433                                 var,
4434                                 buf);
4435 
4436   else
4437     _interface_set_copy_array_ni(ifs,
4438                                  datatype,
4439                                  n_elts,
4440                                  stride,
4441                                  var,
4442                                  buf);
4443 
4444   /* Now update values */
4445 
4446   switch (datatype) {
4447 
4448   case CS_CHAR:
4449     for (i = 0, j = 0; i < ifs->size; i++) {
4450       cs_interface_t *itf = ifs->interfaces[i];
4451       char *v = var;
4452       const char *p = (const char *)buf + j*stride;
4453       if (stride < 2 || interlace) {
4454         for (k = 0; k < itf->size; k++) {
4455           cs_lnum_t elt_id = itf->elt_id[k];
4456           for (l = 0; l < stride; l++)
4457             v[elt_id*stride + l] |= p[k*stride + l];
4458         }
4459       }
4460       else {
4461         for (k = 0; k < itf->size; k++) {
4462           cs_lnum_t elt_id = itf->elt_id[k];
4463           for (l = 0; l < stride; l++)
4464             v[elt_id + l*n_elts] |= p[k*stride + l];
4465         }
4466       }
4467       j += itf->size;
4468     }
4469     break;
4470 
4471   case CS_INT32:
4472     for (i = 0, j = 0; i < ifs->size; i++) {
4473       cs_interface_t *itf = ifs->interfaces[i];
4474       int32_t *v = var;
4475       const int32_t *p = (const int32_t *)buf + j*stride;
4476       if (stride < 2 || interlace) {
4477         for (k = 0; k < itf->size; k++) {
4478           cs_lnum_t elt_id = itf->elt_id[k];
4479           for (l = 0; l < stride; l++)
4480             v[elt_id*stride + l] |= p[k*stride + l];
4481         }
4482       }
4483       else {
4484         for (k = 0; k < itf->size; k++) {
4485           cs_lnum_t elt_id = itf->elt_id[k];
4486           for (l = 0; l < stride; l++)
4487             v[elt_id + l*n_elts] |= p[k*stride + l];
4488         }
4489       }
4490       j += itf->size;
4491     }
4492     break;
4493 
4494   case CS_INT64:
4495     for (i = 0, j = 0; i < ifs->size; i++) {
4496       cs_interface_t *itf = ifs->interfaces[i];
4497       int64_t *v = var;
4498       const int64_t *p = (const int64_t *)buf + j*stride;
4499       if (stride < 2 || interlace) {
4500         for (k = 0; k < itf->size; k++) {
4501           cs_lnum_t elt_id = itf->elt_id[k];
4502           for (l = 0; l < stride; l++)
4503             v[elt_id*stride + l] |= p[k*stride + l];
4504         }
4505       }
4506       else {
4507         for (k = 0; k < itf->size; k++) {
4508           cs_lnum_t elt_id = itf->elt_id[k];
4509           for (l = 0; l < stride; l++)
4510             v[elt_id + l*n_elts] |= p[k*stride + l];
4511         }
4512       }
4513       j += itf->size;
4514     }
4515     break;
4516 
4517   case CS_UINT16:
4518     for (i = 0, j = 0; i < ifs->size; i++) {
4519       cs_interface_t *itf = ifs->interfaces[i];
4520       uint16_t *v = var;
4521       const uint16_t *p = (const uint16_t *)buf + j*stride;
4522       if (stride < 2 || interlace) {
4523         for (k = 0; k < itf->size; k++) {
4524           cs_lnum_t elt_id = itf->elt_id[k];
4525           for (l = 0; l < stride; l++)
4526             v[elt_id*stride + l] |= p[k*stride + l];
4527         }
4528       }
4529       else {
4530         for (k = 0; k < itf->size; k++) {
4531           cs_lnum_t elt_id = itf->elt_id[k];
4532           for (l = 0; l < stride; l++)
4533             v[elt_id + l*n_elts] |= p[k*stride + l];
4534         }
4535       }
4536       j += itf->size;
4537     }
4538     break;
4539 
4540   case CS_UINT32:
4541     for (i = 0, j = 0; i < ifs->size; i++) {
4542       cs_interface_t *itf = ifs->interfaces[i];
4543       uint32_t *v = var;
4544       const uint32_t *p = (const uint32_t *)buf + j*stride;
4545       if (stride < 2 || interlace) {
4546         for (k = 0; k < itf->size; k++) {
4547           cs_lnum_t elt_id = itf->elt_id[k];
4548           for (l = 0; l < stride; l++)
4549             v[elt_id*stride + l] |= p[k*stride + l];
4550         }
4551       }
4552       else {
4553         for (k = 0; k < itf->size; k++) {
4554           cs_lnum_t elt_id = itf->elt_id[k];
4555           for (l = 0; l < stride; l++)
4556             v[elt_id + l*n_elts] |= p[k*stride + l];
4557         }
4558       }
4559       j += itf->size;
4560     }
4561     break;
4562 
4563   case CS_UINT64:
4564     for (i = 0, j = 0; i < ifs->size; i++) {
4565       cs_interface_t *itf = ifs->interfaces[i];
4566       uint64_t *v = var;
4567       const uint64_t *p = (const uint64_t *)buf + j*stride;
4568       if (stride < 2 || interlace) {
4569         for (k = 0; k < itf->size; k++) {
4570           cs_lnum_t elt_id = itf->elt_id[k];
4571           for (l = 0; l < stride; l++)
4572             v[elt_id*stride + l] |= p[k*stride + l];
4573         }
4574       }
4575       else {
4576         for (k = 0; k < itf->size; k++) {
4577           cs_lnum_t elt_id = itf->elt_id[k];
4578           for (l = 0; l < stride; l++)
4579             v[elt_id + l*n_elts] |= p[k*stride + l];
4580         }
4581       }
4582       j += itf->size;
4583     }
4584     break;
4585 
4586   default:
4587     bft_error(__FILE__, __LINE__, 0,
4588               _("Called %s with unhandled datatype (%d)."), __func__,
4589               (int)datatype);
4590   }
4591 
4592   BFT_FREE(buf);
4593 }
4594 
4595 /*----------------------------------------------------------------------------*/
4596 /*!
4597  * \brief Update the sum of values for elements associated with an
4598  * interface set.
4599  *
4600  * On input, the variable array should contain local contributions. On output,
4601  * contributions from matching elements on parallel or periodic boundaries
4602  * have been added.
4603  *
4604  * Only the values of elements belonging to the interfaces are modified.
4605  *
4606  * \param[in]       ifs        pointer to a fvm_interface_set_t structure
4607  * \param[in]       n_elts     number of elements in var buffer
4608  * \param[in]       stride     number of values (non interlaced) by entity
4609  * \param[in]       interlace  true if variable is interlaced (for stride > 1)
4610  * \param[in]       datatype   type of data considered
4611  * \param[in, out]  var        variable buffer
4612  */
4613 /*----------------------------------------------------------------------------*/
4614 
4615 void
cs_interface_set_sum(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)4616 cs_interface_set_sum(const cs_interface_set_t  *ifs,
4617                      cs_lnum_t                  n_elts,
4618                      cs_lnum_t                  stride,
4619                      bool                       interlace,
4620                      cs_datatype_t              datatype,
4621                      void                      *var)
4622 {
4623   int i;
4624   cs_lnum_t j, k, l;
4625   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4626   unsigned char *buf = NULL;
4627 
4628   BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
4629 
4630   if (stride < 2 || interlace)
4631     cs_interface_set_copy_array(ifs,
4632                                 datatype,
4633                                 stride,
4634                                 true, /* src_on_parent */
4635                                 var,
4636                                 buf);
4637 
4638   else
4639     _interface_set_copy_array_ni(ifs,
4640                                  datatype,
4641                                  n_elts,
4642                                  stride,
4643                                  var,
4644                                  buf);
4645 
4646   /* Now increment values */
4647 
4648   switch (datatype) {
4649 
4650   case CS_CHAR:
4651     for (i = 0, j = 0; i < ifs->size; i++) {
4652       cs_interface_t *itf = ifs->interfaces[i];
4653       char *v = var;
4654       const char *p = (const char *)buf + j*stride;
4655       if (stride < 2 || interlace) {
4656         for (k = 0; k < itf->size; k++) {
4657           cs_lnum_t elt_id = itf->elt_id[k];
4658           for (l = 0; l < stride; l++)
4659             v[elt_id*stride + l] += p[k*stride + l];
4660         }
4661       }
4662       else {
4663         for (k = 0; k < itf->size; k++) {
4664           cs_lnum_t elt_id = itf->elt_id[k];
4665           for (l = 0; l < stride; l++)
4666             v[elt_id + l*n_elts] += p[k*stride + l];
4667         }
4668       }
4669       j += itf->size;
4670     }
4671     break;
4672 
4673   case CS_FLOAT:
4674     for (i = 0, j = 0; i < ifs->size; i++) {
4675       cs_interface_t *itf = ifs->interfaces[i];
4676       float *v = var;
4677       const float *p = (const float *)buf + j*stride;
4678       if (stride < 2 || interlace) {
4679         for (k = 0; k < itf->size; k++) {
4680           cs_lnum_t elt_id = itf->elt_id[k];
4681           for (l = 0; l < stride; l++)
4682             v[elt_id*stride + l] += p[k*stride + l];
4683         }
4684       }
4685       else {
4686         for (k = 0; k < itf->size; k++) {
4687           cs_lnum_t elt_id = itf->elt_id[k];
4688           for (l = 0; l < stride; l++)
4689             v[elt_id + l*n_elts] += p[k*stride + l];
4690         }
4691       }
4692       j += itf->size;
4693     }
4694     break;
4695 
4696   case CS_DOUBLE:
4697     for (i = 0, j = 0; i < ifs->size; i++) {
4698       cs_interface_t *itf = ifs->interfaces[i];
4699       double *v = var;
4700       const double *p = (const double *)buf + j*stride;
4701       if (stride < 2 || interlace) {
4702         for (k = 0; k < itf->size; k++) {
4703           cs_lnum_t elt_id = itf->elt_id[k];
4704           for (l = 0; l < stride; l++)
4705             v[elt_id*stride + l] += p[k*stride + l];
4706         }
4707       }
4708       else {
4709         for (k = 0; k < itf->size; k++) {
4710           cs_lnum_t elt_id = itf->elt_id[k];
4711           for (l = 0; l < stride; l++)
4712             v[elt_id + l*n_elts] += p[k*stride + l];
4713         }
4714       }
4715       j += itf->size;
4716     }
4717     break;
4718 
4719   case CS_INT32:
4720     for (i = 0, j = 0; i < ifs->size; i++) {
4721       cs_interface_t *itf = ifs->interfaces[i];
4722       int32_t *v = var;
4723       const int32_t *p = (const int32_t *)buf + j*stride;
4724       if (stride < 2 || interlace) {
4725         for (k = 0; k < itf->size; k++) {
4726           cs_lnum_t elt_id = itf->elt_id[k];
4727           for (l = 0; l < stride; l++)
4728             v[elt_id*stride + l] += p[k*stride + l];
4729         }
4730       }
4731       else {
4732         for (k = 0; k < itf->size; k++) {
4733           cs_lnum_t elt_id = itf->elt_id[k];
4734           for (l = 0; l < stride; l++)
4735             v[elt_id + l*n_elts] += p[k*stride + l];
4736         }
4737       }
4738       j += itf->size;
4739     }
4740     break;
4741 
4742   case CS_INT64:
4743     for (i = 0, j = 0; i < ifs->size; i++) {
4744       cs_interface_t *itf = ifs->interfaces[i];
4745       int64_t *v = var;
4746       const int64_t *p = (const int64_t *)buf + j*stride;
4747       if (stride < 2 || interlace) {
4748         for (k = 0; k < itf->size; k++) {
4749           cs_lnum_t elt_id = itf->elt_id[k];
4750           for (l = 0; l < stride; l++)
4751             v[elt_id*stride + l] += p[k*stride + l];
4752         }
4753       }
4754       else {
4755         for (k = 0; k < itf->size; k++) {
4756           cs_lnum_t elt_id = itf->elt_id[k];
4757           for (l = 0; l < stride; l++)
4758             v[elt_id + l*n_elts] += p[k*stride + l];
4759         }
4760       }
4761       j += itf->size;
4762     }
4763     break;
4764 
4765   case CS_UINT16:
4766     for (i = 0, j = 0; i < ifs->size; i++) {
4767       cs_interface_t *itf = ifs->interfaces[i];
4768       uint16_t *v = var;
4769       const uint16_t *p = (const uint16_t *)buf + j*stride;
4770       if (stride < 2 || interlace) {
4771         for (k = 0; k < itf->size; k++) {
4772           cs_lnum_t elt_id = itf->elt_id[k];
4773           for (l = 0; l < stride; l++)
4774             v[elt_id*stride + l] += p[k*stride + l];
4775         }
4776       }
4777       else {
4778         for (k = 0; k < itf->size; k++) {
4779           cs_lnum_t elt_id = itf->elt_id[k];
4780           for (l = 0; l < stride; l++)
4781             v[elt_id + l*n_elts] += p[k*stride + l];
4782         }
4783       }
4784       j += itf->size;
4785     }
4786     break;
4787 
4788   case CS_UINT32:
4789     for (i = 0, j = 0; i < ifs->size; i++) {
4790       cs_interface_t *itf = ifs->interfaces[i];
4791       uint32_t *v = var;
4792       const uint32_t *p = (const uint32_t *)buf + j*stride;
4793       if (stride < 2 || interlace) {
4794         for (k = 0; k < itf->size; k++) {
4795           cs_lnum_t elt_id = itf->elt_id[k];
4796           for (l = 0; l < stride; l++)
4797             v[elt_id*stride + l] += p[k*stride + l];
4798         }
4799       }
4800       else {
4801         for (k = 0; k < itf->size; k++) {
4802           cs_lnum_t elt_id = itf->elt_id[k];
4803           for (l = 0; l < stride; l++)
4804             v[elt_id + l*n_elts] += p[k*stride + l];
4805         }
4806       }
4807       j += itf->size;
4808     }
4809     break;
4810 
4811   case CS_UINT64:
4812     for (i = 0, j = 0; i < ifs->size; i++) {
4813       cs_interface_t *itf = ifs->interfaces[i];
4814       uint64_t *v = var;
4815       const uint64_t *p = (const uint64_t *)buf + j*stride;
4816       if (stride < 2 || interlace) {
4817         for (k = 0; k < itf->size; k++) {
4818           cs_lnum_t elt_id = itf->elt_id[k];
4819           for (l = 0; l < stride; l++)
4820             v[elt_id*stride + l] += p[k*stride + l];
4821         }
4822       }
4823       else {
4824         for (k = 0; k < itf->size; k++) {
4825           cs_lnum_t elt_id = itf->elt_id[k];
4826           for (l = 0; l < stride; l++)
4827             v[elt_id + l*n_elts] += p[k*stride + l];
4828         }
4829       }
4830       j += itf->size;
4831     }
4832     break;
4833 
4834   default:
4835     bft_error(__FILE__, __LINE__, 0,
4836               _("Called cs_interface_set_sum with unhandled datatype (%d)."),
4837               (int)datatype);
4838   }
4839 
4840   BFT_FREE(buf);
4841 }
4842 
4843 /*----------------------------------------------------------------------------*/
4844 /*!
4845  * \brief Update the sum of values for elements associated with an
4846  * interface set, allowing control over periodicity.
4847  *
4848  * On input, the variable array should contain local contributions. On output,
4849  * contributions from matching elements on parallel or periodic boundaries
4850  * have been added.
4851  *
4852  * Only the values of elements belonging to the interfaces are modified.
4853  *
4854  * \param[in]       ifs        pointer to a fvm_interface_set_t structure
4855  * \param[in]       n_elts     number of elements in var buffer
4856  * \param[in]       stride     number of values (non interlaced) by entity
4857  * \param[in]       interlace  true if variable is interlaced (for stride > 1)
4858  * \param[in]       datatype   type of data considered
4859  * \param[in]       tr_ignore  if > 0, ignore periodicity with rotation;
4860  *                             if > 1, ignore all periodic transforms
4861  * \param[in, out]  var        variable buffer
4862  */
4863 /*----------------------------------------------------------------------------*/
4864 
4865 void
cs_interface_set_sum_tr(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,int tr_ignore,void * var)4866 cs_interface_set_sum_tr(const cs_interface_set_t  *ifs,
4867                         cs_lnum_t                  n_elts,
4868                         cs_lnum_t                  stride,
4869                         bool                       interlace,
4870                         cs_datatype_t              datatype,
4871                         int                        tr_ignore,
4872                         void                      *var)
4873 {
4874   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4875   unsigned char *buf = NULL;
4876 
4877   int n_tr = 0;
4878 
4879   if (tr_ignore > 0 && ifs->periodicity != NULL) {
4880     if (tr_ignore < 2) {
4881       int n_tr_max = fvm_periodicity_get_n_transforms(ifs->periodicity);
4882       for (int tr_id = 0; tr_id < n_tr_max; tr_id++) {
4883         if (fvm_periodicity_get_type(ifs->periodicity, tr_id)
4884             < FVM_PERIODICITY_ROTATION)
4885           n_tr = tr_id + 1;
4886       }
4887     }
4888     n_tr += 1; /* add base "identity" transform_id */
4889   }
4890 
4891   if (n_tr < 1) {
4892     cs_interface_set_sum(ifs,
4893                          n_elts,
4894                          stride,
4895                          interlace,
4896                          datatype,
4897                          var);
4898     return;
4899   }
4900 
4901   /* We can use a fixed max periodicity type here based on translation,
4902      because when even translation is ignored, n_tr has been set to 1 above,
4903      and the first transform is "no transform", so tests should always
4904      return a correct value */
4905 
4906   fvm_periodicity_type_t tr_threshold = FVM_PERIODICITY_ROTATION;
4907 
4908   BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
4909 
4910   if (stride < 2 || interlace)
4911     cs_interface_set_copy_array(ifs,
4912                                 datatype,
4913                                 stride,
4914                                 true, /* src_on_parent */
4915                                 var,
4916                                 buf);
4917 
4918   else
4919     _interface_set_copy_array_ni(ifs,
4920                                  datatype,
4921                                  n_elts,
4922                                  stride,
4923                                  var,
4924                                  buf);
4925 
4926   /* Now increment values */
4927 
4928   cs_lnum_t j = 0;
4929 
4930   switch (datatype) {
4931 
4932   case CS_FLOAT:
4933     for (int i = 0; i < ifs->size; i++) {
4934       cs_interface_t *itf = ifs->interfaces[i];
4935       for (int tr_id = 0; tr_id < n_tr; tr_id++) {
4936         cs_lnum_t s_id = itf->tr_index[tr_id];
4937         cs_lnum_t e_id = itf->tr_index[tr_id+1];
4938         if (e_id > s_id && tr_id > 0) {
4939           if (  fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
4940               >= tr_threshold)
4941             continue;
4942         }
4943         float *v = var;
4944         const float *p = (const float *)buf + j*stride;
4945         if (stride < 2 || interlace) {
4946           for (cs_lnum_t k = s_id; k < e_id; k++) {
4947             cs_lnum_t elt_id = itf->elt_id[k];
4948             for (cs_lnum_t l = 0; l < stride; l++)
4949               v[elt_id*stride + l] += p[k*stride + l];
4950           }
4951         }
4952         else {
4953           for (cs_lnum_t k = s_id; k < e_id; k++) {
4954             cs_lnum_t elt_id = itf->elt_id[k];
4955             for (cs_lnum_t l = 0; l < stride; l++)
4956               v[elt_id + l*n_elts] += p[k*stride + l];
4957           }
4958         }
4959       }
4960       j += itf->size;
4961     }
4962     break;
4963 
4964   case CS_DOUBLE:
4965     for (int i = 0; i < ifs->size; i++) {
4966       cs_interface_t *itf = ifs->interfaces[i];
4967       for (int tr_id = 0; tr_id < n_tr; tr_id++) {
4968         cs_lnum_t s_id = itf->tr_index[tr_id];
4969         cs_lnum_t e_id = itf->tr_index[tr_id+1];
4970         if (e_id > s_id && tr_id > 0) {
4971           if (  fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
4972               >= tr_threshold)
4973             continue;
4974         }
4975         double *v = var;
4976         const double *p = (const double *)buf + j*stride;
4977         if (stride < 2 || interlace) {
4978           for (cs_lnum_t k = s_id; k < e_id; k++) {
4979             cs_lnum_t elt_id = itf->elt_id[k];
4980             for (cs_lnum_t l = 0; l < stride; l++)
4981               v[elt_id*stride + l] += p[k*stride + l];
4982           }
4983         }
4984         else {
4985           for (cs_lnum_t k = s_id; k < e_id; k++) {
4986             cs_lnum_t elt_id = itf->elt_id[k];
4987             for (cs_lnum_t l = 0; l < stride; l++)
4988               v[elt_id + l*n_elts] += p[k*stride + l];
4989           }
4990         }
4991       }
4992       j += itf->size;
4993     }
4994     break;
4995 
4996   default:
4997     for (int i = 0; i < ifs->size; i++) {
4998       cs_interface_t *itf = ifs->interfaces[i];
4999       for (int tr_id = 0; tr_id < n_tr; tr_id++) {
5000         cs_lnum_t s_id = itf->tr_index[tr_id];
5001         cs_lnum_t e_id = itf->tr_index[tr_id+1];
5002         if (e_id > s_id && tr_id > 0) {
5003           if (  fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
5004               >= tr_threshold)
5005             continue;
5006         }
5007         char *v = var;
5008         cs_lnum_t _stride = stride * cs_datatype_size[datatype];
5009         const char *p = (const char *)buf + j*_stride;
5010         if (stride < 2 || interlace) {
5011           for (cs_lnum_t k = s_id; k < e_id; k++) {
5012             cs_lnum_t elt_id = itf->elt_id[k];
5013             for (cs_lnum_t l = 0; l < _stride; l++)
5014               v[elt_id*_stride + l] += p[k*_stride + l];
5015           }
5016         }
5017         else {
5018           cs_lnum_t elt_size = cs_datatype_size[datatype];
5019           for (cs_lnum_t k = s_id; k < e_id; k++) {
5020             cs_lnum_t elt_id = itf->elt_id[k];
5021             for (cs_lnum_t l = 0; l < stride; l++) {
5022               for (cs_lnum_t q = 0; q < elt_size; q++)
5023                 v[elt_id*elt_size + l*n_elts + q]
5024                   += p[k*_stride + l*elt_size + q];
5025             }
5026           }
5027         }
5028       }
5029       j += itf->size;
5030     }
5031 
5032   }
5033 
5034   BFT_FREE(buf);
5035 }
5036 
5037 /*----------------------------------------------------------------------------*/
5038 /*!
5039  * \brief Update to minimum value for elements associated with an
5040  * interface set.
5041  *
5042  * On input, the variable array should contain local contributions. On output,
5043  * contributions from matching elements on parallel or periodic boundaries
5044  * have been added.
5045  *
5046  * Only the values of elements belonging to the interfaces are modified.
5047  *
5048  * \param[in]       ifs        pointer to a fvm_interface_set_t structure
5049  * \param[in]       n_elts     number of elements in var buffer
5050  * \param[in]       stride     number of values (non interlaced) by entity
5051  * \param[in]       interlace  true if variable is interlaced (for stride > 1)
5052  * \param[in]       datatype   type of data considered
5053  * \param[in, out]  var        variable buffer
5054  */
5055 /*----------------------------------------------------------------------------*/
5056 
5057 void
cs_interface_set_min(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)5058 cs_interface_set_min(const cs_interface_set_t  *ifs,
5059                      cs_lnum_t                  n_elts,
5060                      cs_lnum_t                  stride,
5061                      bool                       interlace,
5062                      cs_datatype_t              datatype,
5063                      void                      *var)
5064 {
5065   int i;
5066   cs_lnum_t j, k, l;
5067   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
5068   unsigned char *buf = NULL;
5069 
5070   BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
5071 
5072   if (stride < 2 || interlace)
5073     cs_interface_set_copy_array(ifs,
5074                                 datatype,
5075                                 stride,
5076                                 true, /* src_on_parent */
5077                                 var,
5078                                 buf);
5079 
5080   else
5081     _interface_set_copy_array_ni(ifs,
5082                                  datatype,
5083                                  n_elts,
5084                                  stride,
5085                                  var,
5086                                  buf);
5087 
5088   /* Now increment values */
5089 
5090   switch (datatype) {
5091 
5092   case CS_CHAR:
5093     for (i = 0, j = 0; i < ifs->size; i++) {
5094       cs_interface_t *itf = ifs->interfaces[i];
5095       char *v = var;
5096       const char *p = (const char *)buf + j*stride;
5097       if (stride < 2 || interlace) {
5098         for (k = 0; k < itf->size; k++) {
5099           cs_lnum_t elt_id = itf->elt_id[k];
5100           for (l = 0; l < stride; l++)
5101             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5102                                           p[k*stride + l]);
5103         }
5104       }
5105       else {
5106         for (k = 0; k < itf->size; k++) {
5107           cs_lnum_t elt_id = itf->elt_id[k];
5108           for (l = 0; l < stride; l++)
5109             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5110                                           p[k*stride + l]);
5111         }
5112       }
5113       j += itf->size;
5114     }
5115     break;
5116 
5117   case CS_FLOAT:
5118     for (i = 0, j = 0; i < ifs->size; i++) {
5119       cs_interface_t *itf = ifs->interfaces[i];
5120       float *v = var;
5121       const float *p = (const float *)buf + j*stride;
5122       if (stride < 2 || interlace) {
5123         for (k = 0; k < itf->size; k++) {
5124           cs_lnum_t elt_id = itf->elt_id[k];
5125           for (l = 0; l < stride; l++)
5126             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5127                                           p[k*stride + l]);
5128         }
5129       }
5130       else {
5131         for (k = 0; k < itf->size; k++) {
5132           cs_lnum_t elt_id = itf->elt_id[k];
5133           for (l = 0; l < stride; l++)
5134             v[elt_id + l*n_elts] = CS_MIN(p[k*stride + l],
5135                                           v[elt_id + l*n_elts]);
5136         }
5137       }
5138       j += itf->size;
5139     }
5140     break;
5141 
5142   case CS_DOUBLE:
5143     for (i = 0, j = 0; i < ifs->size; i++) {
5144       cs_interface_t *itf = ifs->interfaces[i];
5145       double *v = var;
5146       const double *p = (const double *)buf + j*stride;
5147       if (stride < 2 || interlace) {
5148         for (k = 0; k < itf->size; k++) {
5149           cs_lnum_t elt_id = itf->elt_id[k];
5150           for (l = 0; l < stride; l++)
5151             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5152                                           p[k*stride + l]);
5153         }
5154       }
5155       else {
5156         for (k = 0; k < itf->size; k++) {
5157           cs_lnum_t elt_id = itf->elt_id[k];
5158           for (l = 0; l < stride; l++)
5159             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5160                                           p[k*stride + l]);
5161         }
5162       }
5163       j += itf->size;
5164     }
5165     break;
5166 
5167   case CS_INT32:
5168     for (i = 0, j = 0; i < ifs->size; i++) {
5169       cs_interface_t *itf = ifs->interfaces[i];
5170       int32_t *v = var;
5171       const int32_t *p = (const int32_t *)buf + j*stride;
5172       if (stride < 2 || interlace) {
5173         for (k = 0; k < itf->size; k++) {
5174           cs_lnum_t elt_id = itf->elt_id[k];
5175           for (l = 0; l < stride; l++)
5176             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5177                                           p[k*stride + l]);
5178         }
5179       }
5180       else {
5181         for (k = 0; k < itf->size; k++) {
5182           cs_lnum_t elt_id = itf->elt_id[k];
5183           for (l = 0; l < stride; l++)
5184             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5185                                           p[k*stride + l]);
5186         }
5187       }
5188       j += itf->size;
5189     }
5190     break;
5191 
5192   case CS_INT64:
5193     for (i = 0, j = 0; i < ifs->size; i++) {
5194       cs_interface_t *itf = ifs->interfaces[i];
5195       int64_t *v = var;
5196       const int64_t *p = (const int64_t *)buf + j*stride;
5197       if (stride < 2 || interlace) {
5198         for (k = 0; k < itf->size; k++) {
5199           cs_lnum_t elt_id = itf->elt_id[k];
5200           for (l = 0; l < stride; l++)
5201             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5202                                           p[k*stride + l]);
5203         }
5204       }
5205       else {
5206         for (k = 0; k < itf->size; k++) {
5207           cs_lnum_t elt_id = itf->elt_id[k];
5208           for (l = 0; l < stride; l++)
5209             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5210                                           p[k*stride + l]);
5211         }
5212       }
5213       j += itf->size;
5214     }
5215     break;
5216 
5217   case CS_UINT16:
5218     for (i = 0, j = 0; i < ifs->size; i++) {
5219       cs_interface_t *itf = ifs->interfaces[i];
5220       uint16_t *v = var;
5221       const uint16_t *p = (const uint16_t *)buf + j*stride;
5222       if (stride < 2 || interlace) {
5223         for (k = 0; k < itf->size; k++) {
5224           cs_lnum_t elt_id = itf->elt_id[k];
5225           for (l = 0; l < stride; l++)
5226             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5227                                           p[k*stride + l]);
5228         }
5229       }
5230       else {
5231         for (k = 0; k < itf->size; k++) {
5232           cs_lnum_t elt_id = itf->elt_id[k];
5233           for (l = 0; l < stride; l++)
5234             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5235                                           p[k*stride + l]);
5236         }
5237       }
5238       j += itf->size;
5239     }
5240     break;
5241 
5242   case CS_UINT32:
5243     for (i = 0, j = 0; i < ifs->size; i++) {
5244       cs_interface_t *itf = ifs->interfaces[i];
5245       uint32_t *v = var;
5246       const uint32_t *p = (const uint32_t *)buf + j*stride;
5247       if (stride < 2 || interlace) {
5248         for (k = 0; k < itf->size; k++) {
5249           cs_lnum_t elt_id = itf->elt_id[k];
5250           for (l = 0; l < stride; l++)
5251             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5252                                           p[k*stride + l]);
5253         }
5254       }
5255       else {
5256         for (k = 0; k < itf->size; k++) {
5257           cs_lnum_t elt_id = itf->elt_id[k];
5258           for (l = 0; l < stride; l++)
5259             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5260                                           p[k*stride + l]);
5261         }
5262       }
5263       j += itf->size;
5264     }
5265     break;
5266 
5267   case CS_UINT64:
5268     for (i = 0, j = 0; i < ifs->size; i++) {
5269       cs_interface_t *itf = ifs->interfaces[i];
5270       uint64_t *v = var;
5271       const uint64_t *p = (const uint64_t *)buf + j*stride;
5272       if (stride < 2 || interlace) {
5273         for (k = 0; k < itf->size; k++) {
5274           cs_lnum_t elt_id = itf->elt_id[k];
5275           for (l = 0; l < stride; l++)
5276             v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5277                                           p[k*stride + l]);
5278         }
5279       }
5280       else {
5281         for (k = 0; k < itf->size; k++) {
5282           cs_lnum_t elt_id = itf->elt_id[k];
5283           for (l = 0; l < stride; l++)
5284             v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5285                                           p[k*stride + l]);
5286         }
5287       }
5288       j += itf->size;
5289     }
5290     break;
5291 
5292   default:
5293     bft_error(__FILE__, __LINE__, 0,
5294               _("Called cs_interface_set_max with unhandled datatype (%d)."),
5295               (int)datatype);
5296   }
5297 
5298   BFT_FREE(buf);
5299 }
5300 
5301 /*----------------------------------------------------------------------------*/
5302 /*!
5303  * \brief Update to maximum value for elements associated with an
5304  * interface set.
5305  *
5306  * On input, the variable array should contain local contributions. On output,
5307  * contributions from matching elements on parallel or periodic boundaries
5308  * have been added.
5309  *
5310  * Only the values of elements belonging to the interfaces are modified.
5311  *
5312  * \param[in]       ifs        pointer to a fvm_interface_set_t structure
5313  * \param[in]       n_elts     number of elements in var buffer
5314  * \param[in]       stride     number of values (non interlaced) by entity
5315  * \param[in]       interlace  true if variable is interlaced (for stride > 1)
5316  * \param[in]       datatype   type of data considered
5317  * \param[in, out]  var        variable buffer
5318  */
5319 /*----------------------------------------------------------------------------*/
5320 
5321 void
cs_interface_set_max(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)5322 cs_interface_set_max(const cs_interface_set_t  *ifs,
5323                      cs_lnum_t                  n_elts,
5324                      cs_lnum_t                  stride,
5325                      bool                       interlace,
5326                      cs_datatype_t              datatype,
5327                      void                      *var)
5328 {
5329   int i;
5330   cs_lnum_t j, k, l;
5331   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
5332   unsigned char *buf = NULL;
5333 
5334   BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
5335 
5336   if (stride < 2 || interlace)
5337     cs_interface_set_copy_array(ifs,
5338                                 datatype,
5339                                 stride,
5340                                 true, /* src_on_parent */
5341                                 var,
5342                                 buf);
5343 
5344   else
5345     _interface_set_copy_array_ni(ifs,
5346                                  datatype,
5347                                  n_elts,
5348                                  stride,
5349                                  var,
5350                                  buf);
5351 
5352   /* Now increment values */
5353 
5354   switch (datatype) {
5355 
5356   case CS_CHAR:
5357     for (i = 0, j = 0; i < ifs->size; i++) {
5358       cs_interface_t *itf = ifs->interfaces[i];
5359       char *v = var;
5360       const char *p = (const char *)buf + j*stride;
5361       if (stride < 2 || interlace) {
5362         for (k = 0; k < itf->size; k++) {
5363           cs_lnum_t elt_id = itf->elt_id[k];
5364           for (l = 0; l < stride; l++)
5365             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5366                                           p[k*stride + l]);
5367         }
5368       }
5369       else {
5370         for (k = 0; k < itf->size; k++) {
5371           cs_lnum_t elt_id = itf->elt_id[k];
5372           for (l = 0; l < stride; l++)
5373             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5374                                           p[k*stride + l]);
5375         }
5376       }
5377       j += itf->size;
5378     }
5379     break;
5380 
5381   case CS_FLOAT:
5382     for (i = 0, j = 0; i < ifs->size; i++) {
5383       cs_interface_t *itf = ifs->interfaces[i];
5384       float *v = var;
5385       const float *p = (const float *)buf + j*stride;
5386       if (stride < 2 || interlace) {
5387         for (k = 0; k < itf->size; k++) {
5388           cs_lnum_t elt_id = itf->elt_id[k];
5389           for (l = 0; l < stride; l++)
5390             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5391                                           p[k*stride + l]);
5392         }
5393       }
5394       else {
5395         for (k = 0; k < itf->size; k++) {
5396           cs_lnum_t elt_id = itf->elt_id[k];
5397           for (l = 0; l < stride; l++)
5398             v[elt_id + l*n_elts] = CS_MAX(p[k*stride + l],
5399                                           v[elt_id + l*n_elts]);
5400         }
5401       }
5402       j += itf->size;
5403     }
5404     break;
5405 
5406   case CS_DOUBLE:
5407     for (i = 0, j = 0; i < ifs->size; i++) {
5408       cs_interface_t *itf = ifs->interfaces[i];
5409       double *v = var;
5410       const double *p = (const double *)buf + j*stride;
5411       if (stride < 2 || interlace) {
5412         for (k = 0; k < itf->size; k++) {
5413           cs_lnum_t elt_id = itf->elt_id[k];
5414           for (l = 0; l < stride; l++)
5415             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5416                                           p[k*stride + l]);
5417         }
5418       }
5419       else {
5420         for (k = 0; k < itf->size; k++) {
5421           cs_lnum_t elt_id = itf->elt_id[k];
5422           for (l = 0; l < stride; l++)
5423             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5424                                           p[k*stride + l]);
5425         }
5426       }
5427       j += itf->size;
5428     }
5429     break;
5430 
5431   case CS_INT32:
5432     for (i = 0, j = 0; i < ifs->size; i++) {
5433       cs_interface_t *itf = ifs->interfaces[i];
5434       int32_t *v = var;
5435       const int32_t *p = (const int32_t *)buf + j*stride;
5436       if (stride < 2 || interlace) {
5437         for (k = 0; k < itf->size; k++) {
5438           cs_lnum_t elt_id = itf->elt_id[k];
5439           for (l = 0; l < stride; l++)
5440             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5441                                           p[k*stride + l]);
5442         }
5443       }
5444       else {
5445         for (k = 0; k < itf->size; k++) {
5446           cs_lnum_t elt_id = itf->elt_id[k];
5447           for (l = 0; l < stride; l++)
5448             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5449                                           p[k*stride + l]);
5450         }
5451       }
5452       j += itf->size;
5453     }
5454     break;
5455 
5456   case CS_INT64:
5457     for (i = 0, j = 0; i < ifs->size; i++) {
5458       cs_interface_t *itf = ifs->interfaces[i];
5459       int64_t *v = var;
5460       const int64_t *p = (const int64_t *)buf + j*stride;
5461       if (stride < 2 || interlace) {
5462         for (k = 0; k < itf->size; k++) {
5463           cs_lnum_t elt_id = itf->elt_id[k];
5464           for (l = 0; l < stride; l++)
5465             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5466                                           p[k*stride + l]);
5467         }
5468       }
5469       else {
5470         for (k = 0; k < itf->size; k++) {
5471           cs_lnum_t elt_id = itf->elt_id[k];
5472           for (l = 0; l < stride; l++)
5473             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5474                                           p[k*stride + l]);
5475         }
5476       }
5477       j += itf->size;
5478     }
5479     break;
5480 
5481   case CS_UINT16:
5482     for (i = 0, j = 0; i < ifs->size; i++) {
5483       cs_interface_t *itf = ifs->interfaces[i];
5484       uint16_t *v = var;
5485       const uint16_t *p = (const uint16_t *)buf + j*stride;
5486       if (stride < 2 || interlace) {
5487         for (k = 0; k < itf->size; k++) {
5488           cs_lnum_t elt_id = itf->elt_id[k];
5489           for (l = 0; l < stride; l++)
5490             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5491                                           p[k*stride + l]);
5492         }
5493       }
5494       else {
5495         for (k = 0; k < itf->size; k++) {
5496           cs_lnum_t elt_id = itf->elt_id[k];
5497           for (l = 0; l < stride; l++)
5498             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5499                                           p[k*stride + l]);
5500         }
5501       }
5502       j += itf->size;
5503     }
5504     break;
5505 
5506   case CS_UINT32:
5507     for (i = 0, j = 0; i < ifs->size; i++) {
5508       cs_interface_t *itf = ifs->interfaces[i];
5509       uint32_t *v = var;
5510       const uint32_t *p = (const uint32_t *)buf + j*stride;
5511       if (stride < 2 || interlace) {
5512         for (k = 0; k < itf->size; k++) {
5513           cs_lnum_t elt_id = itf->elt_id[k];
5514           for (l = 0; l < stride; l++)
5515             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5516                                           p[k*stride + l]);
5517         }
5518       }
5519       else {
5520         for (k = 0; k < itf->size; k++) {
5521           cs_lnum_t elt_id = itf->elt_id[k];
5522           for (l = 0; l < stride; l++)
5523             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5524                                           p[k*stride + l]);
5525         }
5526       }
5527       j += itf->size;
5528     }
5529     break;
5530 
5531   case CS_UINT64:
5532     for (i = 0, j = 0; i < ifs->size; i++) {
5533       cs_interface_t *itf = ifs->interfaces[i];
5534       uint64_t *v = var;
5535       const uint64_t *p = (const uint64_t *)buf + j*stride;
5536       if (stride < 2 || interlace) {
5537         for (k = 0; k < itf->size; k++) {
5538           cs_lnum_t elt_id = itf->elt_id[k];
5539           for (l = 0; l < stride; l++)
5540             v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5541                                           p[k*stride + l]);
5542         }
5543       }
5544       else {
5545         for (k = 0; k < itf->size; k++) {
5546           cs_lnum_t elt_id = itf->elt_id[k];
5547           for (l = 0; l < stride; l++)
5548             v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5549                                           p[k*stride + l]);
5550         }
5551       }
5552       j += itf->size;
5553     }
5554     break;
5555 
5556   default:
5557     bft_error(__FILE__, __LINE__, 0,
5558               _("Called cs_interface_set_max with unhandled datatype (%d)."),
5559               (int)datatype);
5560   }
5561 
5562   BFT_FREE(buf);
5563 }
5564 
5565 /*----------------------------------------------------------------------------*/
5566 /*!
5567  * \brief Update the maximum of values for elements associated with an
5568  * interface set, allowing control over periodicity.
5569  *
5570  * On input, the variable array should contain local contributions. On output,
5571  * contributions from matching elements on parallel or periodic boundaries
5572  * have been added.
5573  *
5574  * Only the values of elements belonging to the interfaces are modified.
5575  *
5576  * \param[in]       ifs        pointer to a fvm_interface_set_t structure
5577  * \param[in]       n_elts     number of elements in var buffer
5578  * \param[in]       stride     number of values (non interlaced) by entity
5579  * \param[in]       interlace  true if variable is interlaced (for stride > 1)
5580  * \param[in]       datatype   type of data considered
5581  * \param[in]       tr_ignore  if > 0, ignore periodicity with rotation;
5582  *                             if > 1, ignore all periodic transforms
5583  * \param[in, out]  var        variable buffer
5584  */
5585 /*----------------------------------------------------------------------------*/
5586 
5587 void
cs_interface_set_max_tr(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,int tr_ignore,void * var)5588 cs_interface_set_max_tr(const cs_interface_set_t  *ifs,
5589                         cs_lnum_t                  n_elts,
5590                         cs_lnum_t                  stride,
5591                         bool                       interlace,
5592                         cs_datatype_t              datatype,
5593                         int                        tr_ignore,
5594                         void                      *var)
5595 {
5596   cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
5597   unsigned char *buf = NULL;
5598 
5599   int n_tr = 0;
5600 
5601   if (tr_ignore > 0 && ifs->periodicity != NULL) {
5602     if (tr_ignore < 2) {
5603       int n_tr_max = fvm_periodicity_get_n_transforms(ifs->periodicity);
5604       for (int tr_id = 0; tr_id < n_tr_max; tr_id++) {
5605         if (fvm_periodicity_get_type(ifs->periodicity, tr_id)
5606             < FVM_PERIODICITY_ROTATION)
5607           n_tr = tr_id + 1;
5608       }
5609     }
5610     n_tr += 1; /* add base "identity" transform_id */
5611   }
5612 
5613   if (n_tr < 1) {
5614     cs_interface_set_max(ifs,
5615                          n_elts,
5616                          stride,
5617                          interlace,
5618                          datatype,
5619                          var);
5620     return;
5621   }
5622 
5623   /* We can use a fixed max periodicity type here based on translation,
5624      because when even translation is ignored, n_tr has been set to 1 above,
5625      and the first transform is "no transform", so tests should always
5626      return a correct value */
5627 
5628   fvm_periodicity_type_t tr_threshold = FVM_PERIODICITY_ROTATION;
5629 
5630   BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
5631 
5632   if (stride < 2 || interlace)
5633     cs_interface_set_copy_array(ifs,
5634                                 datatype,
5635                                 stride,
5636                                 true, /* src_on_parent */
5637                                 var,
5638                                 buf);
5639 
5640   else
5641     _interface_set_copy_array_ni(ifs,
5642                                  datatype,
5643                                  n_elts,
5644                                  stride,
5645                                  var,
5646                                  buf);
5647 
5648   /* Now increment values */
5649 
5650   cs_lnum_t j = 0;
5651 
5652   for (int i = 0; i < ifs->size; i++) {
5653 
5654     cs_interface_t *itf = ifs->interfaces[i];
5655 
5656     for (int tr_id = 0; tr_id < n_tr; tr_id++) {
5657 
5658       cs_lnum_t s_id = itf->tr_index[tr_id];
5659       cs_lnum_t e_id = itf->tr_index[tr_id+1];
5660       if (e_id > s_id && tr_id > 0) {
5661         if (  fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
5662             >= tr_threshold)
5663           continue;
5664       }
5665 
5666       switch (datatype) {
5667 
5668       case CS_CHAR:
5669         {
5670           char *v = var;
5671           const char *p = (const char *)buf + j*stride;
5672           if (stride < 2 || interlace) {
5673             for (cs_lnum_t k = s_id; k < e_id; k++) {
5674               cs_lnum_t elt_id = itf->elt_id[k];
5675               for (cs_lnum_t l = 0; l < stride; l++)
5676                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5677                                               p[k*stride + l]);
5678             }
5679           }
5680           else {
5681             for (cs_lnum_t k = s_id; k < e_id; k++) {
5682               cs_lnum_t elt_id = itf->elt_id[k];
5683               for (cs_lnum_t l = 0; l < stride; l++)
5684                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5685                                               p[k*stride + l]);
5686             }
5687           }
5688         }
5689         break;
5690 
5691       case CS_FLOAT:
5692         {
5693           float *v = var;
5694           const float *p = (const float *)buf + j*stride;
5695           if (stride < 2 || interlace) {
5696             for (cs_lnum_t k = s_id; k < e_id; k++) {
5697               cs_lnum_t elt_id = itf->elt_id[k];
5698               for (cs_lnum_t l = 0; l < stride; l++)
5699                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5700                                               p[k*stride + l]);
5701             }
5702           }
5703           else {
5704             for (cs_lnum_t k = s_id; k < e_id; k++) {
5705               cs_lnum_t elt_id = itf->elt_id[k];
5706               for (cs_lnum_t l = 0; l < stride; l++)
5707                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5708                                               p[k*stride + l]);
5709             }
5710           }
5711         }
5712         break;
5713 
5714       case CS_DOUBLE:
5715         {
5716           double *v = var;
5717           const double *p = (const double *)buf + j*stride;
5718           if (stride < 2 || interlace) {
5719             for (cs_lnum_t k = s_id; k < e_id; k++) {
5720               cs_lnum_t elt_id = itf->elt_id[k];
5721               for (cs_lnum_t l = 0; l < stride; l++)
5722                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5723                                               p[k*stride + l]);
5724             }
5725           }
5726           else {
5727             for (cs_lnum_t k = s_id; k < e_id; k++) {
5728               cs_lnum_t elt_id = itf->elt_id[k];
5729               for (cs_lnum_t l = 0; l < stride; l++)
5730                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5731                                               p[k*stride + l]);
5732             }
5733           }
5734         }
5735         break;
5736 
5737       case CS_INT32:
5738         {
5739           int32_t *v = var;
5740           const int32_t *p = (const int32_t *)buf + j*stride;
5741           if (stride < 2 || interlace) {
5742             for (cs_lnum_t k = s_id; k < e_id; k++) {
5743               cs_lnum_t elt_id = itf->elt_id[k];
5744               for (cs_lnum_t l = 0; l < stride; l++)
5745                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5746                                               p[k*stride + l]);
5747             }
5748           }
5749           else {
5750             for (cs_lnum_t k = s_id; k < e_id; k++) {
5751               cs_lnum_t elt_id = itf->elt_id[k];
5752               for (cs_lnum_t l = 0; l < stride; l++)
5753                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5754                                               p[k*stride + l]);
5755             }
5756           }
5757         }
5758         break;
5759 
5760       case CS_INT64:
5761         {
5762           int64_t *v = var;
5763           const int64_t *p = (const int64_t *)buf + j*stride;
5764           if (stride < 2 || interlace) {
5765             for (cs_lnum_t k = s_id; k < e_id; k++) {
5766               cs_lnum_t elt_id = itf->elt_id[k];
5767               for (cs_lnum_t l = 0; l < stride; l++)
5768                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5769                                               p[k*stride + l]);
5770             }
5771           }
5772           else {
5773             for (cs_lnum_t k = s_id; k < e_id; k++) {
5774               cs_lnum_t elt_id = itf->elt_id[k];
5775               for (cs_lnum_t l = 0; l < stride; l++)
5776                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5777                                               p[k*stride + l]);
5778             }
5779           }
5780         }
5781         break;
5782 
5783       case CS_UINT16:
5784         {
5785           uint16_t *v = var;
5786           const uint16_t *p = (const uint16_t *)buf + j*stride;
5787           if (stride < 2 || interlace) {
5788             for (cs_lnum_t k = s_id; k < e_id; k++) {
5789               cs_lnum_t elt_id = itf->elt_id[k];
5790               for (cs_lnum_t l = 0; l < stride; l++)
5791                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5792                                               p[k*stride + l]);
5793             }
5794           }
5795           else {
5796             for (cs_lnum_t k = s_id; k < e_id; k++) {
5797               cs_lnum_t elt_id = itf->elt_id[k];
5798               for (cs_lnum_t l = 0; l < stride; l++)
5799                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5800                                               p[k*stride + l]);
5801             }
5802           }
5803         }
5804         break;
5805 
5806       case CS_UINT32:
5807         {
5808           uint32_t *v = var;
5809           const uint32_t *p = (const uint32_t *)buf + j*stride;
5810           if (stride < 2 || interlace) {
5811             for (cs_lnum_t k = s_id; k < e_id; k++) {
5812               cs_lnum_t elt_id = itf->elt_id[k];
5813               for (cs_lnum_t l = 0; l < stride; l++)
5814                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5815                                               p[k*stride + l]);
5816             }
5817           }
5818           else {
5819             for (cs_lnum_t k = s_id; k < e_id; k++) {
5820               cs_lnum_t elt_id = itf->elt_id[k];
5821               for (cs_lnum_t l = 0; l < stride; l++)
5822                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5823                                               p[k*stride + l]);
5824             }
5825           }
5826         }
5827         break;
5828 
5829       case CS_UINT64:
5830         {
5831           uint64_t *v = var;
5832           const uint64_t *p = (const uint64_t *)buf + j*stride;
5833           if (stride < 2 || interlace) {
5834             for (cs_lnum_t k = s_id; k < e_id; k++) {
5835               cs_lnum_t elt_id = itf->elt_id[k];
5836               for (cs_lnum_t l = 0; l < stride; l++)
5837                 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5838                                               p[k*stride + l]);
5839             }
5840           }
5841           else {
5842             for (cs_lnum_t k = s_id; k < e_id; k++) {
5843               cs_lnum_t elt_id = itf->elt_id[k];
5844               for (cs_lnum_t l = 0; l < stride; l++)
5845                 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5846                                               p[k*stride + l]);
5847             }
5848           }
5849         }
5850         break;
5851 
5852       default:
5853         bft_error(__FILE__, __LINE__, 0,
5854                   _("Called %s with unhandled datatype (%d)."),
5855                   __func__, (int)datatype);
5856       }
5857 
5858     } /* End of loop on transformations */
5859 
5860     j += itf->size;
5861 
5862   }
5863 
5864   BFT_FREE(buf);
5865 }
5866 
5867 /*----------------------------------------------------------------------------*/
5868 /*!
5869  * \brief Add matching element id information to an interface set.
5870  *
5871  * This information is required by calls to cs_interface_get_match_ids(),
5872  * and may be freed using cs_interface_set_free_match_ids().
5873  *
5874  * \param[in]  ifs  pointer to interface set structure
5875  */
5876 /*----------------------------------------------------------------------------*/
5877 
5878 void
cs_interface_set_add_match_ids(cs_interface_set_t * ifs)5879 cs_interface_set_add_match_ids(cs_interface_set_t  *ifs)
5880 {
5881   ifs->match_id_rc += 1;
5882 
5883   if (ifs->match_id_rc > 1)
5884     return;
5885 
5886   int i;
5887   cs_lnum_t j;
5888   int local_rank = 0;
5889   cs_lnum_t *send_buf = NULL;
5890 
5891 #if defined(HAVE_MPI)
5892 
5893   int n_ranks = 1;
5894   int request_count = 0;
5895   MPI_Request *request = NULL;
5896   MPI_Status *status  = NULL;
5897 
5898   if (ifs->comm != MPI_COMM_NULL) {
5899     MPI_Comm_rank(ifs->comm, &local_rank);
5900     MPI_Comm_size(ifs->comm, &n_ranks);
5901   }
5902 
5903 #endif
5904 
5905   assert(ifs != NULL);
5906 
5907   BFT_MALLOC(send_buf, cs_interface_set_n_elts(ifs), cs_lnum_t);
5908 
5909   /* Prepare send buffer first (for same rank, send_order is swapped
5910      with match_id directly) */
5911 
5912   for (i = 0, j = 0; i < ifs->size; i++) {
5913 
5914     cs_lnum_t k;
5915     cs_interface_t *itf = ifs->interfaces[i];
5916 
5917     /* When this function is called, a distant-rank interface should have
5918        a send_order array, but not a match_id array */
5919 
5920     assert(itf->match_id == NULL);
5921     BFT_MALLOC(itf->match_id, itf->size, cs_lnum_t);
5922 
5923     for (k = 0; k < itf->size; k++)
5924       send_buf[j + k] = itf->elt_id[itf->send_order[k]];
5925 
5926     j += itf->size;
5927 
5928   }
5929 
5930   /* Now exchange data */
5931 
5932 #if defined(HAVE_MPI)
5933   if (n_ranks > 1) {
5934     BFT_MALLOC(request, ifs->size*2, MPI_Request);
5935     BFT_MALLOC(status, ifs->size*2, MPI_Status);
5936   }
5937 #endif
5938 
5939   for (i = 0, j = 0; i < ifs->size; i++) {
5940 
5941     cs_interface_t *itf = ifs->interfaces[i];
5942 
5943     if (itf->rank == local_rank)
5944       memcpy(itf->match_id, send_buf + j, itf->size*sizeof(cs_lnum_t));
5945 
5946 #if defined(HAVE_MPI)
5947 
5948     else /* if (itf->rank != local_rank) */
5949       MPI_Irecv(itf->match_id,
5950                 itf->size,
5951                 CS_MPI_LNUM,
5952                 itf->rank,
5953                 itf->rank,
5954                 ifs->comm,
5955                 &(request[request_count++]));
5956 
5957 #endif
5958 
5959       j += itf->size;
5960 
5961     }
5962 
5963 #if defined(HAVE_MPI)
5964 
5965   if (n_ranks > 1) {
5966 
5967     for (i = 0, j = 0; i < ifs->size; i++) {
5968       cs_interface_t *itf = ifs->interfaces[i];
5969       if (itf->rank != local_rank)
5970         MPI_Isend(send_buf + j,
5971                   itf->size,
5972                   CS_MPI_LNUM,
5973                   itf->rank,
5974                   local_rank,
5975                   ifs->comm,
5976                   &(request[request_count++]));
5977       j += itf->size;
5978     }
5979 
5980     MPI_Waitall(request_count, request, status);
5981 
5982     BFT_FREE(request);
5983     BFT_FREE(status);
5984 
5985   }
5986 
5987 #endif /* defined(HAVE_MPI) */
5988 
5989   BFT_FREE(send_buf);
5990 }
5991 
5992 /*----------------------------------------------------------------------------*/
5993 /*!
5994  * \brief Free matching element id information of an interface set.
5995  *
5996  * This information is used by calls to cs_interface_get_match_ids(),
5997  * and may be defined using cs_interface_set_add_match_ids().
5998  *
5999  * \param[in]  ifs  pointer to interface set structure
6000  */
6001 /*----------------------------------------------------------------------------*/
6002 
6003 void
cs_interface_set_free_match_ids(cs_interface_set_t * ifs)6004 cs_interface_set_free_match_ids(cs_interface_set_t  *ifs)
6005 {
6006   assert(ifs != NULL);
6007 
6008   if (ifs->match_id_rc > 0)
6009     ifs->match_id_rc -= 1;
6010 
6011   if (ifs->match_id_rc > 0)
6012     return;
6013 
6014   for (int i = 0; i < ifs->size; i++) {
6015     cs_interface_t *itf = ifs->interfaces[i];
6016     assert(itf->send_order != NULL || itf->size == 0);
6017     BFT_FREE(itf->match_id);
6018   }
6019 }
6020 
6021 /*----------------------------------------------------------------------------*/
6022 /*!
6023  * \brief Tag mutiple elements of local interface with a given values.
6024  *
6025  * This is effective only on an interface matching the current rank,
6026  * and when multiple (periodic) instances of a given element appear on that
6027  * rank, al instances except the first are tagged with the chosen value.
6028  *
6029  * \param[in]       ifs          pointer to interface set structure
6030  * \param[in]       periodicity  periodicity information (NULL if none)
6031  * \param[in]       tr_ignore   if > 0, ignore periodicity with rotation;
6032  *                              if > 1, ignore all periodic transforms
6033  * \param[in]       tag_value   tag to assign
6034  * \param[in, out]  tag         global tag array for elements
6035  */
6036 /*----------------------------------------------------------------------------*/
6037 
6038 void
cs_interface_tag_local_matches(const cs_interface_t * itf,const fvm_periodicity_t * periodicity,int tr_ignore,cs_gnum_t tag_value,cs_gnum_t * tag)6039 cs_interface_tag_local_matches(const cs_interface_t     *itf,
6040                                const fvm_periodicity_t  *periodicity,
6041                                int                       tr_ignore,
6042                                cs_gnum_t                 tag_value,
6043                                cs_gnum_t                *tag)
6044 {
6045   int l_rank = CS_MAX(cs_glob_rank_id, 0);
6046 
6047   if (itf->rank != l_rank)
6048     return;
6049 
6050   /* Build temporary local match id */
6051 
6052   cs_lnum_t  *match_id;
6053   BFT_MALLOC(match_id, itf->size, cs_lnum_t);
6054 
6055   for (cs_lnum_t i = 0; i < itf->size; i++)
6056     match_id[i] = itf->elt_id[itf->send_order[i]];
6057 
6058   /* Also filter by transform type if needed */
6059 
6060   fvm_periodicity_type_t p_type_max = FVM_PERIODICITY_MIXED;
6061   int n_tr_max = itf->tr_index_size - 2;
6062 
6063   assert(n_tr_max == fvm_periodicity_get_n_transforms(periodicity));
6064 
6065   if (tr_ignore == 1)
6066     p_type_max = FVM_PERIODICITY_TRANSLATION;
6067   else if (tr_ignore  == 2)
6068     p_type_max = FVM_PERIODICITY_NULL;
6069 
6070   const cs_lnum_t *tr_index = itf->tr_index;
6071 
6072   for (int tr_id = 0; tr_id < n_tr_max; tr_id++) {
6073 
6074     if (fvm_periodicity_get_type(periodicity, tr_id) > p_type_max)
6075       continue;
6076 
6077     cs_lnum_t s_id = tr_index[tr_id+1];
6078     cs_lnum_t e_id = tr_index[tr_id+2];
6079 
6080     for (cs_lnum_t j = s_id; j < e_id; j++) {
6081       cs_lnum_t k = CS_MAX(itf->elt_id[j], match_id[j]);
6082       tag[k] = tag_value;
6083     }
6084 
6085   }
6086 
6087   BFT_FREE(match_id);
6088 }
6089 
6090 /*----------------------------------------------------------------------------*/
6091 /*!
6092  * \brief Dump printout of an interface list.
6093  *
6094  * \param[in]  ifs  pointer to structure that should be dumped
6095  */
6096 /*----------------------------------------------------------------------------*/
6097 
6098 void
cs_interface_set_dump(const cs_interface_set_t * ifs)6099 cs_interface_set_dump(const cs_interface_set_t  *ifs)
6100 {
6101   int i;
6102   cs_lnum_t j;
6103 
6104   /* Dump local info */
6105 
6106   if (ifs == NULL) {
6107     bft_printf("  interface list: nil\n");
6108     return;
6109   }
6110 
6111   bft_printf("  interface list: %p\n"
6112              "  n interfaces:   %d\n",
6113              (const void *)ifs, ifs->size);
6114 
6115   for (i = 0, j = 0; i < ifs->size; i++) {
6116     bft_printf("\n  interface %d:\n", i);
6117     _cs_interface_dump(ifs->interfaces[i]);
6118     j += (ifs->interfaces[i])->size;
6119   }
6120 
6121   if (ifs->periodicity != NULL)
6122     bft_printf("\n  periodicity %p:\n", (const void *)(ifs->periodicity));
6123 }
6124 
6125 /*----------------------------------------------------------------------------*/
6126 
6127 END_C_DECLS
6128