1 /*============================================================================
2 * \file Handling of interfaces associating mesh elements (such as
3 * inter-processor or periodic connectivity between cells, faces,
4 * or vertices).
5 *============================================================================*/
6
7 /*
8 This file is part of Code_Saturne, a general-purpose CFD tool.
9
10 Copyright (C) 1998-2021 EDF S.A.
11
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; either version 2 of the License, or (at your option) any later
15 version.
16
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
20 details.
21
22 You should have received a copy of the GNU General Public License along with
23 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
24 Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 */
26
27 /*----------------------------------------------------------------------------*/
28
29 #include "cs_defs.h"
30
31 /*----------------------------------------------------------------------------
32 * Standard C library headers
33 *----------------------------------------------------------------------------*/
34
35 #include <assert.h>
36 #include <stdio.h>
37 #include <string.h>
38
39 /*----------------------------------------------------------------------------
40 * Local headers
41 *----------------------------------------------------------------------------*/
42
43 #include "bft_error.h"
44 #include "bft_mem.h"
45 #include "bft_printf.h"
46
47 #include "cs_all_to_all.h"
48 #include "cs_base.h"
49 #include "cs_block_dist.h"
50 #include "cs_order.h"
51
52 #include "fvm_periodicity.h"
53
54 /*----------------------------------------------------------------------------
55 * Header for the current file
56 *----------------------------------------------------------------------------*/
57
58 #include "cs_interface.h"
59
60 /*----------------------------------------------------------------------------*/
61
62 BEGIN_C_DECLS
63
64 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
65
66 /*============================================================================
67 * Local structure definitions
68 *============================================================================*/
69
70 /*----------------------------------------------------------------------------
71 * Structure defining an interface
72 *----------------------------------------------------------------------------*/
73
74 struct _cs_interface_t {
75
76 int rank; /* Associated rank */
77
78 cs_lnum_t size; /* Number of equivalent elements */
79
80 int tr_index_size; /* Size of perio_index */
81 cs_lnum_t *tr_index; /* Index of sub-sections in elt_id, distant_id,
82 and match_id for different transformations;
83 purely parallel equivalences appear at
84 position 0, equivalences through periodic
85 transform i appear at position i+1;
86 note that send_order crosses subsection
87 boundaries, and is not indexed by this array.
88 NULL in absence of transformations */
89
90 cs_lnum_t *elt_id; /* Local element ids (ordered, always present) */
91 cs_lnum_t *match_id; /* Matching element ids for same-rank interface,
92 distant element ids such that match_id[i]
93 on distant rank matches local_id[i]
94 (temporary life cycle even in parallel) */
95 cs_lnum_t *send_order; /* Local element ids ordered so that
96 receive matches elt_id for other-rank
97 interfaces, and match_id[send_order[i]]
98 equals elt_id[i] on same-rank interface */
99
100 };
101
102 /*----------------------------------------------------------------------------
103 * Structure defining a set of interfaces
104 *----------------------------------------------------------------------------*/
105
106 struct _cs_interface_set_t {
107
108 int size; /* Number of interfaces */
109
110 cs_interface_t **interfaces; /* Interface structures array */
111
112 const fvm_periodicity_t *periodicity; /* Optional periodicity structure */
113
114 int match_id_rc; /* Match_id reference count */
115
116 #if defined(HAVE_MPI)
117 MPI_Comm comm; /* Associated communicator */
118 #endif
119 };
120
121 /*----------------------------------------------------------------------------
122 * Local structure defining a temporary list of interfaces
123 *----------------------------------------------------------------------------*/
124
125 typedef struct {
126
127 int count; /* Number of equivalences */
128 cs_lnum_t *shift; /* Index of per-equivalence data in rank[] and num[] */
129 int *rank; /* Rank associated with each element */
130 int *tr_id; /* Transformation id associated with each element,
131 + 1, with 0 indicating no transformation
132 (NULL in absence of periodicity) */
133 cs_lnum_t *num; /* Local number associated with each element */
134
135 } _per_block_equiv_t;
136
137 /*----------------------------------------------------------------------------
138 * Local structure defining a temporary list of periodic interfaces
139 *----------------------------------------------------------------------------*/
140
141 typedef struct {
142
143 int count; /* Number of periodic couples */
144 cs_lnum_t *block_id; /* local id in block */
145 int *tr_id; /* Transform id associated with each couple */
146 int *shift; /* Index of per-couple data in rank[] and num[] */
147 int *rank; /* Ranks associated with periodic elements */
148 cs_lnum_t *num; /* Local numbers associated with periodic elements */
149
150 } _per_block_period_t;
151
152 /*=============================================================================
153 * Private function definitions
154 *============================================================================*/
155
156 /*----------------------------------------------------------------------------
157 * Creation of an empty interface between elements of a same type.
158 *
159 * This interface may be used to identify equivalent vertices or faces using
160 * domain splitting, as well as periodic elements (on the same or on
161 * distant ranks).
162 *
163 * returns:
164 * pointer to allocated interface structure
165 *----------------------------------------------------------------------------*/
166
167 static cs_interface_t *
_cs_interface_create(void)168 _cs_interface_create(void)
169 {
170 cs_interface_t *_interface;
171
172 BFT_MALLOC(_interface, 1, cs_interface_t);
173
174 _interface->rank = -1;
175 _interface->size = 0;
176
177 _interface->tr_index_size = 0;
178 _interface->tr_index = NULL;
179
180 _interface->elt_id = NULL;
181 _interface->match_id = NULL;
182 _interface->send_order = NULL;
183
184 return _interface;
185 }
186
187 /*----------------------------------------------------------------------------
188 * Destruction of an interface.
189 *
190 * parameters:
191 * itf <-> pointer to pointer to structure to destroy
192 *
193 * returns:
194 * NULL pointer
195 *----------------------------------------------------------------------------*/
196
197 static void
_cs_interface_destroy(cs_interface_t ** itf)198 _cs_interface_destroy(cs_interface_t **itf)
199 {
200 cs_interface_t *_itf = *itf;
201
202 if (_itf != NULL) {
203 BFT_FREE(_itf->tr_index);
204 BFT_FREE(_itf->elt_id);
205 BFT_FREE(_itf->match_id);
206 BFT_FREE(_itf->send_order);
207 BFT_FREE(_itf);
208 }
209
210 *itf = _itf;
211 }
212
213 /*----------------------------------------------------------------------------
214 * Dump printout of an interface.
215 *
216 * parameters:
217 * itf <-- pointer to structure that should be dumped
218 *----------------------------------------------------------------------------*/
219
220 static void
_cs_interface_dump(const cs_interface_t * itf)221 _cs_interface_dump(const cs_interface_t *itf)
222 {
223 int i, section_id;
224 cs_lnum_t start_id, end_id;
225
226 int tr_index_size = 2;
227 cs_lnum_t _tr_index[2] = {0, 0};
228 const cs_lnum_t *tr_index = _tr_index;
229
230 if (itf == NULL) {
231 bft_printf(" interface: nil\n");
232 return;
233 }
234
235 bft_printf(" interface: %p\n"
236 " associated rank: %d\n"
237 " size: %llu\n"
238 " transform index size: %d\n",
239 (const void *)itf,
240 itf->rank,
241 (unsigned long long)(itf->size),
242 itf->tr_index_size);
243
244 if (itf->tr_index_size > 0) {
245 bft_printf(" transform index:\n");
246 for (i = 0; i < itf->tr_index_size; i++)
247 bft_printf(" %5d %lu\n", i, (unsigned long)itf->tr_index[i]);
248 }
249
250
251 _tr_index[1] = itf->size;
252
253 if (itf->tr_index_size > 0) {
254 tr_index_size = itf->tr_index_size;
255 tr_index = itf->tr_index;
256 }
257
258 /* Case for other-rank interface */
259
260 if (itf->match_id != NULL) {
261
262 for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
263
264 if (section_id == 0)
265 bft_printf("\n"
266 " id elt_id match_id (parallel)\n");
267 else
268 bft_printf("\n"
269 " id elt_id match_id (transform %d)\n",
270 section_id - 1);
271
272 start_id = tr_index[section_id];
273 end_id = tr_index[section_id + 1];
274
275 for (i = start_id; i < end_id; i++)
276 bft_printf(" %10ld %10ld %10ld\n",
277 (long)i, (long)itf->elt_id[i], (long)itf->match_id[i]);
278
279 }
280
281 }
282
283 else {
284
285 for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
286
287 if (section_id == 0)
288 bft_printf("\n"
289 " id elt_id (parallel)\n");
290 else
291 bft_printf("\n"
292 " id elt_id (transform %d)\n",
293 section_id - 1);
294
295 start_id = tr_index[section_id];
296 end_id = tr_index[section_id + 1];
297
298 for (i = start_id; i < end_id; i++)
299 bft_printf(" %10ld %10ld\n", (long)i, (long)itf->elt_id[i]);
300
301 }
302
303 }
304
305 /* Print send order separately, as it is section-independant */
306
307 if (itf->send_order != NULL) {
308
309 bft_printf("\n"
310 " id send_order\n");
311
312 for (i = 0; i < itf->size; i++)
313 bft_printf(" %10ld %10ld\n", (long)i, (long)itf->send_order[i]);
314
315 }
316
317 bft_printf("\n");
318 }
319
320 /*----------------------------------------------------------------------------
321 * Sort and remove duplicates from periodic tuple information.
322 *
323 * parameters:
324 * n_block_tuples <-> number of tuples in current block
325 * block_tuples <-> tuple information for this rank: for each tuple,
326 * {global number of local element,
327 * global number of periodic element,
328 * transform id}
329 *----------------------------------------------------------------------------*/
330
331 static void
_sort_periodic_tuples(cs_lnum_t * n_block_tuples,cs_gnum_t ** block_tuples)332 _sort_periodic_tuples(cs_lnum_t *n_block_tuples,
333 cs_gnum_t **block_tuples)
334 {
335 cs_lnum_t i, j, k;
336
337 cs_lnum_t n_tuples = *n_block_tuples;
338 cs_lnum_t *order = NULL;
339 cs_gnum_t *tuples = *block_tuples;
340 cs_gnum_t *tuples_tmp = NULL;
341
342 if (n_tuples < 1)
343 return;
344
345 /* Sort periodic tuples by local correspondant */
346
347 BFT_MALLOC(order, n_tuples, cs_lnum_t);
348 BFT_MALLOC(tuples_tmp, n_tuples*3, cs_gnum_t);
349
350 cs_order_gnum_allocated_s(NULL,
351 tuples,
352 3,
353 order,
354 n_tuples);
355
356 /* Copy to temporary array, ignoring duplicates */
357
358 k = order[0]*3;
359 tuples_tmp[0] = tuples[k];
360 tuples_tmp[1] = tuples[k + 1];
361 tuples_tmp[2] = tuples[k + 2];
362 j = 3;
363
364 for (i = 1; i < n_tuples; i++) {
365 k = order[i] * 3;
366 if ( (tuples[k] != tuples_tmp[j-3])
367 || (tuples[k+1] != tuples_tmp[j-2])
368 || (tuples[k+2] != tuples_tmp[j-1])) {
369 tuples_tmp[j] = tuples[k];
370 tuples_tmp[j + 1] = tuples[k + 1];
371 tuples_tmp[j + 2] = tuples[k + 2];
372 j += 3;
373 }
374 }
375 n_tuples = j / 3;
376
377 BFT_FREE(order);
378
379 /* Resize input/outpout array if duplicates were removed */
380
381 if (n_tuples <= *n_block_tuples) {
382 BFT_REALLOC(tuples, n_tuples*3, cs_gnum_t);
383 *n_block_tuples = n_tuples;
384 *block_tuples = tuples;
385 }
386
387 /* Copy sorted data to input/output array and free temporary storage */
388
389 memcpy(tuples, tuples_tmp, sizeof(cs_gnum_t)*n_tuples*3);
390
391 BFT_FREE(tuples_tmp);
392 }
393
394 /*----------------------------------------------------------------------------
395 * Extract periodicity transform data necessary for periodic combinations.
396 *
397 * Builds a square transformation combination matrix associating a combination
398 * transform id with transform ids of levels lower than that given by
399 * the level argument; the number of rows and columns is thus equal to the
400 * number of transformations of level lower than the argument.
401 * Entries corresponding to impossible combinations are set to -1;
402 *
403 * The caller is responsible for freeing the tr_combine array returned by
404 * this function.
405 *
406 * parameters:
407 * periodicity <-- periodicity information
408 * level --> transform combination level (1 or 2)
409 * n_rows --> number of rows of combination matrix
410 * (equals transform_level_index[level])
411 * tr_combine --> combination id matrix (size n_rows * n_rows)
412 *----------------------------------------------------------------------------*/
413
414 static void
_transform_combine_info(const fvm_periodicity_t * periodicity,int level,int * n_rows,int ** tr_combine)415 _transform_combine_info(const fvm_periodicity_t *periodicity,
416 int level,
417 int *n_rows,
418 int **tr_combine)
419 {
420 int i;
421 int tr_level_idx[4], parent_id[2];
422
423 int n_vals_1 = 0, n_vals_2 = 0;
424 int n_rows_1 = 0, n_rows_2 = 0;
425 int *tr_combine_1, *tr_combine_2 = NULL;
426
427 assert(periodicity != NULL);
428 assert(level == 1 || level == 2);
429
430 /* Extract base periodicity dimension info */
431
432 fvm_periodicity_get_tr_level_idx(periodicity, tr_level_idx);
433
434 /* We always need the level0 x level0 -> level1 array */
435
436 n_rows_1 = tr_level_idx[1];
437 n_vals_1 = n_rows_1*n_rows_1;
438
439 BFT_MALLOC(tr_combine_1, n_vals_1, int);
440
441 for (i = 0; i < n_vals_1; i++)
442 tr_combine_1[i] = -1;
443
444 for (i = tr_level_idx[1]; i < tr_level_idx[2]; i++) {
445
446 fvm_periodicity_get_parent_ids(periodicity, i, parent_id);
447
448 assert(parent_id[0] > -1 && parent_id[1] > -1);
449 assert(parent_id[0] < n_rows_1 && parent_id[1] < n_rows_1);
450
451 tr_combine_1[parent_id[0]*n_rows_1 + parent_id[1]] = i;
452 tr_combine_1[parent_id[1]*n_rows_1 + parent_id[0]] = i;
453
454 }
455
456 /* For level 1 transforms, we are done once return values are set */
457
458 if (level == 1) {
459
460 *n_rows = n_rows_1;
461 *tr_combine = tr_combine_1;
462
463 }
464
465 /* Handle level 2 transforms */
466
467 else { /* if (level == 2) */
468
469 int tr_01, tr_02, tr_12;
470 int comp_id[3];
471
472 n_rows_2 = tr_level_idx[2];
473 n_vals_2 = n_rows_2*n_rows_2;
474
475 /* Allocate and populate array */
476
477 BFT_MALLOC(tr_combine_2, n_vals_2, int);
478
479 for (i = 0; i < n_vals_2; i++)
480 tr_combine_2[i] = -1;
481
482 for (i = tr_level_idx[2]; i < tr_level_idx[3]; i++) {
483
484 fvm_periodicity_get_components(periodicity, i, comp_id);
485
486 assert(comp_id[0] > -1 && comp_id[1] > -1 && comp_id[2] > -1);
487
488 tr_01 = tr_combine_1[comp_id[0]*n_rows_1 + comp_id[1]];
489 tr_02 = tr_combine_1[comp_id[0]*n_rows_1 + comp_id[2]];
490 tr_12 = tr_combine_1[comp_id[1]*n_rows_1 + comp_id[2]];
491
492 assert(tr_01 > -1 && tr_02 > -1 && tr_12 > -1);
493
494 tr_combine_2[tr_01*n_rows_2 + comp_id[2]] = i;
495 tr_combine_2[comp_id[2]*n_rows_2 + tr_01] = i;
496
497 tr_combine_2[tr_02*n_rows_2 + comp_id[1]] = i;
498 tr_combine_2[comp_id[1]*n_rows_2 + tr_02] = i;
499
500 tr_combine_2[tr_12*n_rows_2 + comp_id[0]] = i;
501 tr_combine_2[comp_id[0]*n_rows_2 + tr_12] = i;
502
503 }
504
505 BFT_FREE(tr_combine_1);
506
507 /* Set return values */
508
509 *n_rows = n_rows_2;
510 *tr_combine = tr_combine_2;
511 }
512
513 }
514
515 #if defined(HAVE_MPI)
516
517 /*----------------------------------------------------------------------------
518 * Maximum global number associated with an I/O numbering structure
519 *
520 * parameters:
521 * n_elts <-- local number of elements
522 * global_num <-- global number (id) associated with each element
523 * comm <-- associated MPI communicator
524 *
525 * returns:
526 * maximum global number associated with the I/O numbering
527 *----------------------------------------------------------------------------*/
528
529 static cs_gnum_t
_global_num_max(cs_lnum_t n_elts,const cs_gnum_t global_num[],MPI_Comm comm)530 _global_num_max(cs_lnum_t n_elts,
531 const cs_gnum_t global_num[],
532 MPI_Comm comm)
533 {
534 cs_gnum_t global_max;
535 cs_gnum_t local_max = 0;
536
537 /* Get maximum global number value */
538
539 for (cs_lnum_t i = 0; i < n_elts; i++) {
540 if (global_num[i] > local_max)
541 local_max = global_num[i];
542 }
543
544 MPI_Allreduce(&local_max, &global_max, 1, CS_MPI_GNUM, MPI_MAX, comm);
545
546 return global_max;
547 }
548
549 /*----------------------------------------------------------------------------
550 * Build temporary equivalence structure for data in a given block,
551 * and associate an equivalence id to received elements (-1 for elements
552 * with no correponding elements)
553 *
554 * parameters:
555 * n_elts_recv <-- number of elements received
556 * recv_rank <-- source ranks received
557 * recv_global_num <-- global numbering received
558 * recv_num <-- local numbering received
559 * equiv_id --> equivalence id for each element (-1 if none)
560 *
561 * returns:
562 * temporary equivalence structure for block
563 *----------------------------------------------------------------------------*/
564
565 #if defined(__INTEL_COMPILER)
566 #pragma optimization_level 2 /* Crash with O3 on IA64 with icc 9.1 20070320 */
567 #endif
568
569 static _per_block_equiv_t
_block_global_num_to_equiv(cs_lnum_t n_elts_recv,const int recv_rank[],const cs_gnum_t recv_global_num[],const cs_lnum_t recv_num[],cs_lnum_t equiv_id[])570 _block_global_num_to_equiv(cs_lnum_t n_elts_recv,
571 const int recv_rank[],
572 const cs_gnum_t recv_global_num[],
573 const cs_lnum_t recv_num[],
574 cs_lnum_t equiv_id[])
575 {
576 /* Initialize return structure */
577
578 _per_block_equiv_t e = {.count = 0,
579 .shift = NULL,
580 .rank = NULL,
581 .tr_id = NULL,
582 .num = NULL};
583
584 if (n_elts_recv == 0)
585 return e;
586
587 /* Determine equivalent elements; requires ordering to loop through buffer
588 by increasing number. */
589
590 cs_lnum_t *recv_order = NULL;
591 BFT_MALLOC(recv_order, n_elts_recv, cs_lnum_t);
592
593 cs_order_gnum_allocated(NULL,
594 recv_global_num,
595 recv_order,
596 n_elts_recv);
597
598 /* Loop by increasing number: if two elements have the same global
599 number, they are equivalent. We do not increment equivalence counts
600 as soon as two elements are equivalent, as three equivalent elements
601 should have the same equivalence id. Rather, we increment the
602 equivalence counter when the previous element was part of an
603 equivalence and the current element is not part of this same
604 equivalence. */
605
606 equiv_id[recv_order[0]] = -1;
607 cs_gnum_t prev_num = recv_global_num[recv_order[0]];
608
609 for (cs_lnum_t i = 1; i < n_elts_recv; i++) {
610 cs_gnum_t cur_num = recv_global_num[recv_order[i]];
611 if (cur_num == prev_num) {
612 equiv_id[recv_order[i-1]] = e.count;
613 equiv_id[recv_order[i]] = e.count;
614 }
615 else {
616 if (equiv_id[recv_order[i-1]] > -1)
617 e.count++;
618 equiv_id[recv_order[i]] = -1;
619 }
620 prev_num = cur_num;
621 }
622 if (equiv_id[recv_order[n_elts_recv-1]] > -1)
623 e.count++;
624
625 BFT_FREE(recv_order);
626
627 /* Count number of elements associated with each equivalence */
628
629 int *multiple;
630 BFT_MALLOC(multiple, e.count, int);
631 for (cs_lnum_t i = 0; i < e.count; multiple[i++] = 0);
632 for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
633 if (equiv_id[i] > -1)
634 multiple[equiv_id[i]] += 1;
635 }
636
637 BFT_MALLOC(e.shift, e.count+1, cs_lnum_t);
638 e.shift[0] = 0;
639 for (cs_lnum_t i = 0; i < e.count; i++) {
640 e.shift[i+1] = e.shift[i] + multiple[i];
641 multiple[i] = 0;
642 }
643
644 /* Build equivalence data */
645
646 BFT_MALLOC(e.rank, e.shift[e.count], int);
647 BFT_MALLOC(e.num, e.shift[e.count], cs_lnum_t);
648
649 for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
650 if (equiv_id[i] > -1) {
651 cs_lnum_t j = e.shift[equiv_id[i]] + multiple[equiv_id[i]];
652 e.rank[j] = recv_rank[i];
653 e.num[j] = recv_num[i];
654 multiple[equiv_id[i]] += 1;
655 }
656 }
657
658 BFT_FREE(multiple);
659
660 return e;
661 }
662
663 /*----------------------------------------------------------------------------
664 * Build global interface data from flat equivalence data
665 * (usually prepared and received from distant ranks).
666 *
667 * parameters:
668 * ifs <-> pointer to structure that should be updated
669 * tr_index_size <-- size of transform index (number of transforms
670 * + 1 for idelement + 1 for past-the-end);
671 * 0 or 1 if no transforms are present
672 * n_elts_recv <-- size of received data
673 * equiv_recv <-- flat (received) equivalence data; for each
674 * equivalence, we have:
675 * {local_number, n_equivalents,
676 * {distant_number, distant_rank}*n_equivalents}
677 *----------------------------------------------------------------------------*/
678
679 static void
_interfaces_from_flat_equiv(cs_interface_set_t * ifs,int tr_index_size,cs_lnum_t n_elts_recv,const cs_lnum_t equiv_recv[])680 _interfaces_from_flat_equiv(cs_interface_set_t *ifs,
681 int tr_index_size,
682 cs_lnum_t n_elts_recv,
683 const cs_lnum_t equiv_recv[])
684 {
685 cs_lnum_t i, j, k, l;
686 cs_lnum_t local_num, distant_num, n_sub, n_elts_rank_tr_size;
687 int rank, tr_id;
688
689 int _tr_index_size = tr_index_size;
690 int _tr_stride = tr_index_size > 1 ? tr_index_size - 1 : 1;
691 int max_rank = 0, n_ranks = 0, start_id = 0;
692 int recv_step = 1;
693
694 cs_lnum_t *n_elts_rank = NULL;
695 cs_lnum_t *n_elts_rank_tr = NULL;
696 int *interface_id = NULL;
697
698 cs_interface_t *_interface = NULL;
699
700 if (_tr_index_size == 0) {
701 _tr_index_size = 1;
702 _tr_stride = 1;
703 }
704 else if (_tr_index_size > 1)
705 recv_step = 2;
706
707 /* Compute size of subsections for each rank */
708
709 i = 0;
710 while (i < n_elts_recv) {
711 i++;
712 n_sub = equiv_recv[i++];
713 for (j = 0; j < n_sub; j++) {
714 i += recv_step;
715 rank = equiv_recv[i++];
716 if (rank > max_rank)
717 max_rank = rank;
718 }
719 }
720
721 BFT_MALLOC(n_elts_rank, max_rank + 1, cs_lnum_t);
722
723 for (i = 0; i < max_rank + 1; n_elts_rank[i++] = 0);
724
725 i = 0;
726 while (i < n_elts_recv) {
727 i++;
728 n_sub = equiv_recv[i++];
729 for (j = 0; j < n_sub; j++) {
730 i += recv_step;
731 rank = equiv_recv[i++];
732 n_elts_rank[rank] += 1;
733 }
734 }
735
736 /* Build final data structures */
737
738 n_ranks = 0;
739 for (i = 0; i < max_rank + 1; i++) {
740 if (n_elts_rank[i] > 0)
741 n_ranks++;
742 }
743
744 /* (Re-)Allocate structures */
745
746 start_id = ifs->size;
747
748 ifs->size += n_ranks;
749
750 BFT_REALLOC(ifs->interfaces,
751 ifs->size,
752 cs_interface_t *);
753
754 for (i = start_id; i < ifs->size; i++)
755 ifs->interfaces[i] = _cs_interface_create();
756
757 /* Initialize rank info and interface id */
758
759 n_ranks = 0;
760 BFT_MALLOC(interface_id, max_rank + 1, int);
761 for (i = 0; i < max_rank + 1; i++) {
762 if (n_elts_rank[i] > 0) {
763 interface_id[i] = start_id + n_ranks++;
764 (ifs->interfaces[interface_id[i]])->rank = i;
765 (ifs->interfaces[interface_id[i]])->size = n_elts_rank[i];
766 }
767 else
768 interface_id[i] = -1;
769 }
770
771 BFT_FREE(n_elts_rank);
772
773 /* n_elts_rank_tr will be used as a position counter for new interfaces */
774
775 n_elts_rank_tr_size = (ifs->size - start_id)*_tr_stride;
776 BFT_MALLOC(n_elts_rank_tr, n_elts_rank_tr_size, cs_lnum_t);
777 for (i = 0; i < n_elts_rank_tr_size; i++)
778 n_elts_rank_tr[i] = 0;
779
780 for (i = start_id; i < ifs->size; i++) {
781
782 _interface = ifs->interfaces[i];
783
784 BFT_MALLOC(_interface->elt_id, _interface->size, cs_lnum_t);
785 BFT_MALLOC(_interface->match_id, _interface->size, cs_lnum_t);
786
787 if (_tr_index_size > 1) {
788 _interface->tr_index_size = _tr_index_size;
789 BFT_MALLOC(_interface->tr_index, _interface->tr_index_size, cs_lnum_t);
790 for (k = 0; k < _interface->tr_index_size; k++)
791 _interface->tr_index[k] = 0;
792 }
793 else {
794 _interface->tr_index_size = 0;
795 _interface->tr_index = NULL;
796 }
797
798 }
799
800 /* In absence of transforms, we may build the interface in one pass */
801
802 if (_tr_index_size < 2) {
803
804 i = 0;
805 while (i < n_elts_recv) {
806
807 local_num = equiv_recv[i++];
808 n_sub = equiv_recv[i++];
809
810 for (j = 0; j < n_sub; j++) {
811
812 distant_num = equiv_recv[i++];
813 rank = equiv_recv[i++];
814
815 _interface = ifs->interfaces[interface_id[rank]];
816 k = interface_id[rank] - start_id;
817
818 _interface->elt_id[n_elts_rank_tr[k]] = local_num - 1;
819 _interface->match_id[n_elts_rank_tr[k]] = distant_num - 1;
820 n_elts_rank_tr[k] += 1;
821
822 }
823 }
824
825 }
826
827 /* If we have transforms, build the transform index first */
828
829 else { /* if (_tr_index_size > 1) */
830
831 /* Initial count */
832
833 i = 0;
834 while (i < n_elts_recv) {
835
836 i++;
837 n_sub = equiv_recv[i++];
838
839 for (j = 0; j < n_sub; j++) {
840
841 i++;
842 tr_id = equiv_recv[i++];
843 rank = equiv_recv[i++];
844
845 _interface = ifs->interfaces[interface_id[rank]];
846
847 _interface->tr_index[tr_id + 1] += 1;
848
849 }
850 }
851
852 /* Build index from initial count */
853
854 for (i = start_id; i < ifs->size; i++) {
855 _interface = ifs->interfaces[i];
856 _interface->tr_index[0] = 0;
857 for (j = 1; j < _tr_index_size; j++)
858 _interface->tr_index[j] += _interface->tr_index[j-1];
859 }
860
861 /* Now populate the arrays */
862
863 i = 0;
864 while (i < n_elts_recv) {
865
866 local_num = equiv_recv[i++];
867 n_sub = equiv_recv[i++];
868
869 for (j = 0; j < n_sub; j++) {
870
871 distant_num = equiv_recv[i++];
872 tr_id = equiv_recv[i++];
873 rank = equiv_recv[i++];
874
875 _interface = ifs->interfaces[interface_id[rank]];
876 k = (interface_id[rank] - start_id)*_tr_stride + tr_id;
877
878 l = _interface->tr_index[tr_id] + n_elts_rank_tr[k];
879
880 _interface->elt_id[l] = local_num - 1;
881 _interface->match_id[l] = distant_num - 1;
882
883 n_elts_rank_tr[k] += 1;
884
885 }
886 }
887
888 }
889
890 /* n_elts_rank will now be used as a position counter for new interfaces */
891
892 BFT_FREE(n_elts_rank_tr);
893 BFT_FREE(interface_id);
894 }
895
896 /*----------------------------------------------------------------------------
897 * Creation of a list of interfaces between elements of a same type.
898 *
899 * The global_num values need not be ordered or contiguous.
900 *
901 * parameters:
902 * ifs <-> pointer to structure that should be updated
903 * n_elts <-- local number of elements
904 * global_num <-- global number (id) associated with each element
905 * comm <-- associated MPI communicator
906 *----------------------------------------------------------------------------*/
907
908 static void
_add_global_equiv(cs_interface_set_t * ifs,cs_lnum_t n_elts,const cs_gnum_t global_num[],MPI_Comm comm)909 _add_global_equiv(cs_interface_set_t *ifs,
910 cs_lnum_t n_elts,
911 const cs_gnum_t global_num[],
912 MPI_Comm comm)
913 {
914 /* Initialization */
915
916 int size, local_rank;
917 MPI_Comm_size(comm, &size);
918 MPI_Comm_rank(comm, &local_rank);
919
920 /* Get temporary maximum global number value */
921
922 cs_gnum_t global_max = _global_num_max(n_elts,
923 global_num,
924 comm);
925
926 cs_block_dist_info_t
927 bi = cs_block_dist_compute_sizes(local_rank,
928 size,
929 1,
930 0,
931 global_max);
932
933 int flags = CS_ALL_TO_ALL_ORDER_BY_SRC_RANK;
934
935 cs_all_to_all_t
936 *d = cs_all_to_all_create_from_block(n_elts,
937 flags,
938 global_num,
939 bi,
940 comm);
941
942 cs_gnum_t *recv_global_num = cs_all_to_all_copy_array(d,
943 CS_GNUM_TYPE,
944 1,
945 false, /* reverse */
946 global_num,
947 NULL);
948
949 cs_lnum_t *send_num = NULL;
950 BFT_MALLOC(send_num, n_elts, cs_lnum_t);
951 for (cs_lnum_t i = 0; i < n_elts; i++)
952 send_num[i] = i+1;
953
954 cs_lnum_t *recv_num = cs_all_to_all_copy_array(d,
955 CS_LNUM_TYPE,
956 1,
957 false, /* reverse */
958 send_num,
959 NULL);
960
961 BFT_FREE(send_num);
962
963 cs_lnum_t n_elts_recv = cs_all_to_all_n_elts_dest(d);
964
965 int *src_rank = cs_all_to_all_get_src_rank(d);
966
967 /* Build equivalence data */
968
969 cs_lnum_t *equiv_id = NULL;
970 BFT_MALLOC(equiv_id, n_elts_recv, cs_lnum_t);
971
972 _per_block_equiv_t e = _block_global_num_to_equiv(n_elts_recv,
973 src_rank,
974 recv_global_num,
975 recv_num,
976 equiv_id);
977
978 /* Now free Memory */
979
980 BFT_FREE(recv_num);
981 BFT_FREE(recv_global_num);
982
983 /* Now that equivalences are marked, count for each rank; for each
984 equivalence, we will need to send the corresponding element
985 numbers and ranks, for a total of 2*(e.multiple[...] - 1) values. */
986
987 cs_lnum_t *block_index;
988 BFT_MALLOC(block_index, n_elts_recv+1, cs_lnum_t);
989
990 block_index[0] = 0;
991 for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
992 cs_lnum_t n_eq = 0;
993 if (equiv_id[i] > -1) {
994 size_t e_id = equiv_id[i];
995 n_eq = 2*(e.shift[e_id+1] - e.shift[e_id]);
996 }
997 block_index[i+1] = block_index[i] + n_eq;
998 }
999
1000 cs_lnum_t *part_index = cs_all_to_all_copy_index(d,
1001 true, /* reverse */
1002 block_index,
1003 NULL);
1004
1005 /* Now prepare new send buffer */
1006
1007 cs_lnum_t *block_equiv;
1008 BFT_MALLOC(block_equiv, block_index[n_elts_recv], cs_lnum_t);
1009
1010 for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
1011 if (equiv_id[i] > -1) {
1012
1013 cs_lnum_t *block_equiv_p = block_equiv + block_index[i];
1014
1015 cs_lnum_t e_id = equiv_id[i];
1016 cs_lnum_t k = 2;
1017 const int multiple = e.shift[e_id+1] - e.shift[e_id];
1018 const int *rank_p = e.rank + e.shift[e_id];
1019 const cs_lnum_t *num_p = e.num + e.shift[e_id];
1020
1021 for (cs_lnum_t j = 0; j < multiple; j++) {
1022 if (rank_p[j] == src_rank[i]) {
1023 block_equiv_p[0] = num_p[j];
1024 block_equiv_p[1] = multiple - 1;
1025 }
1026 else {
1027 block_equiv_p[k++] = num_p[j];
1028 block_equiv_p[k++] = rank_p[j];
1029 }
1030 }
1031
1032 }
1033 }
1034
1035 /* Free temporary (block) equivalence info */
1036
1037 e.count = 0;
1038 BFT_FREE(e.shift);
1039 BFT_FREE(e.rank);
1040 if (e.tr_id != NULL)
1041 BFT_FREE(e.tr_id);
1042 BFT_FREE(e.num);
1043
1044 BFT_FREE(src_rank);
1045 BFT_FREE(equiv_id);
1046
1047 /* Send prepared block data to destination rank */
1048
1049 cs_lnum_t *part_equiv
1050 = cs_all_to_all_copy_indexed(d,
1051 CS_LNUM_TYPE,
1052 true, /* reverse */
1053 block_index,
1054 block_equiv,
1055 part_index,
1056 NULL);
1057
1058 cs_lnum_t n_vals_part = part_index[n_elts];
1059
1060 /* At this stage, MPI operations are finished; we may release
1061 the corresponding counts and indexes */
1062
1063 BFT_FREE(block_equiv);
1064 BFT_FREE(part_index);
1065 BFT_FREE(block_index);
1066
1067 cs_all_to_all_destroy(&d);
1068
1069 /* Add interface */
1070
1071 _interfaces_from_flat_equiv(ifs,
1072 1,
1073 n_vals_part,
1074 part_equiv);
1075
1076 BFT_FREE(part_equiv);
1077 }
1078
1079 /*----------------------------------------------------------------------------
1080 * Build eventual tuples belonging to combined periodicities.
1081 *
1082 * parameters:
1083 * block_size <-- size of the block handled by each processor
1084 * periodicit <-- periodicity information (NULL if none)
1085 * n_block_tuples <-> number of tuples in current block
1086 * block_tuples <-> tuple information for this rank: for each tuple,
1087 * {global number of local element,
1088 * global number of periodic element,
1089 * transform id}
1090 * comm <-- associated MPI communicator
1091 *----------------------------------------------------------------------------*/
1092
1093 static void
_combine_periodic_tuples(size_t block_size,const fvm_periodicity_t * periodicity,cs_lnum_t * n_block_tuples,cs_gnum_t ** block_tuples,MPI_Comm comm)1094 _combine_periodic_tuples(size_t block_size,
1095 const fvm_periodicity_t *periodicity,
1096 cs_lnum_t *n_block_tuples,
1097 cs_gnum_t **block_tuples,
1098 MPI_Comm comm)
1099 {
1100 int *tr_reverse_id = NULL;
1101
1102 int *send_rank = NULL;
1103 cs_gnum_t *send_tuples = NULL;
1104
1105 cs_lnum_t _n_block_tuples = *n_block_tuples;
1106 cs_gnum_t *_block_tuples = *block_tuples;
1107
1108 assert (periodicity != NULL);
1109
1110 /* Initialization */
1111
1112 /* Build periodicity related arrays for quick access */
1113
1114 int n_tr = fvm_periodicity_get_n_transforms(periodicity);
1115
1116 BFT_MALLOC(tr_reverse_id, n_tr, int);
1117
1118 for (int i = 0; i < n_tr; i++)
1119 tr_reverse_id[i] = fvm_periodicity_get_reverse_id(periodicity, i);
1120
1121 /* Loop on combination levels */
1122
1123 for (int level = 1;
1124 level < fvm_periodicity_get_n_levels(periodicity);
1125 level++) {
1126
1127 int n_rows = 0;
1128 int *tr_combine = NULL;
1129
1130 _transform_combine_info(periodicity,
1131 level,
1132 &n_rows,
1133 &tr_combine);
1134
1135 /* Count values to exchange */
1136
1137 size_t n_send = 0;
1138
1139 cs_lnum_t start_id = 0;
1140 cs_lnum_t end_id = 1;
1141
1142 while (end_id < _n_block_tuples) {
1143
1144 if (_block_tuples[start_id*3] == _block_tuples[end_id*3]) {
1145
1146 end_id++;
1147 while (end_id < _n_block_tuples) {
1148 if (_block_tuples[end_id*3] != _block_tuples[start_id*3])
1149 break;
1150 end_id++;
1151 }
1152
1153 for (cs_lnum_t j = start_id; j < end_id; j++) {
1154 for (cs_lnum_t k = j+1; k < end_id; k++) {
1155
1156 int tr_1 = tr_reverse_id[_block_tuples[j*3 + 2]];
1157 int tr_2 = _block_tuples[k*3 + 2];
1158
1159 if (tr_combine[tr_1 *n_rows + tr_2] > -1)
1160 n_send += 2;
1161
1162 }
1163 }
1164
1165 }
1166
1167 start_id = end_id;
1168 end_id += 1;
1169 }
1170
1171 BFT_MALLOC(send_rank, n_send, int);
1172 BFT_MALLOC(send_tuples, n_send*3, cs_gnum_t);
1173
1174 /* Now exchange combined tuples */
1175
1176 start_id = 0;
1177 end_id = 1;
1178
1179 cs_lnum_t l = 0;
1180
1181 while (end_id < _n_block_tuples) {
1182
1183 if (_block_tuples[start_id*3] == _block_tuples[end_id*3]) {
1184
1185 end_id++;
1186 while (end_id < _n_block_tuples) {
1187 if (_block_tuples[end_id*3] != _block_tuples[start_id*3])
1188 break;
1189 end_id++;
1190 }
1191
1192 for (cs_lnum_t j = start_id; j < end_id; j++) {
1193 for (cs_lnum_t k = j+1; k < end_id; k++) {
1194
1195 int tr_1 = tr_reverse_id[_block_tuples[j*3 + 2]];
1196 int tr_2 = _block_tuples[k*3 + 2];
1197 int tr_c = tr_combine[tr_1 *n_rows + tr_2];
1198
1199 if (tr_c > -1) {
1200
1201 cs_gnum_t num_1 = _block_tuples[j*3 + 1];
1202 cs_gnum_t num_2 = _block_tuples[k*3 + 1];
1203
1204 send_rank[l*2] = (num_1 - 1) / block_size;
1205 send_rank[l*2+1] = (num_2 - 1) / block_size;
1206
1207 send_tuples[l*6] = num_1;
1208 send_tuples[l*6+1] = num_2;
1209 send_tuples[l*6+2] = tr_c;
1210
1211 send_tuples[l*6+3] = num_2;
1212 send_tuples[l*6+4] = num_1;
1213 send_tuples[l*6+5] = tr_reverse_id[tr_c];
1214
1215 l += 1;
1216
1217 }
1218
1219 }
1220 }
1221
1222 }
1223
1224 start_id = end_id;
1225 end_id += 1;
1226 }
1227
1228 assert((cs_gnum_t)(l*2) == n_send);
1229
1230 BFT_FREE(tr_combine);
1231
1232 cs_all_to_all_t *d = cs_all_to_all_create(n_send,
1233 0,
1234 NULL,
1235 send_rank,
1236 comm);
1237
1238 cs_all_to_all_transfer_dest_rank(d, &send_rank);
1239
1240 cs_gnum_t *recv_tuples = cs_all_to_all_copy_array(d,
1241 CS_GNUM_TYPE,
1242 3,
1243 false, /* reverse */
1244 send_tuples,
1245 NULL);
1246
1247 BFT_FREE(send_tuples);
1248
1249 cs_lnum_t n_recv = cs_all_to_all_n_elts_dest(d);
1250
1251 cs_all_to_all_destroy(&d);
1252
1253 if (n_recv > 0) {
1254 BFT_REALLOC(_block_tuples, (_n_block_tuples + n_recv)*3, cs_gnum_t);
1255 memcpy(_block_tuples + _n_block_tuples*3,
1256 recv_tuples,
1257 n_recv*3*sizeof(cs_gnum_t));
1258 }
1259
1260 BFT_FREE(recv_tuples);
1261
1262 /* Finally, merge additional tuples received for block with
1263 existing periodicity information */
1264
1265 if (n_recv > 0) {
1266
1267 _n_block_tuples += n_recv;
1268
1269 /* Sort and remove duplicates to update block periodicity info */
1270
1271 _sort_periodic_tuples(&_n_block_tuples, &_block_tuples);
1272
1273 *n_block_tuples = _n_block_tuples;
1274 *block_tuples = _block_tuples;
1275
1276 }
1277
1278 }
1279
1280 BFT_FREE(tr_reverse_id);
1281 }
1282
1283 /*----------------------------------------------------------------------------
1284 * Exchange periodic couple info between processors providing the data
1285 * and processors handling the related global numbering interval blocks.
1286 *
1287 * Note that the array pointed to by block_couples is allocated here,
1288 * and must be freed by the calling code.
1289 *
1290 * parameters:
1291 * block_size <-- size of the block handled by each processor
1292 * periodicity <-- periodicity information (NULL if none)
1293 * n_periodic_lists <-- number of periodic lists (may be local)
1294 * periodicity_num <-- periodicity number (1 to n) associated with
1295 * each periodic list (primary periodicities only)
1296 * n_periodic_couples <-- number of periodic couples associated with
1297 * each periodic list
1298 * periodic_couples <-- array indicating periodic couples (using
1299 * global numberings) for each list
1300 * n_block_tuples --> number of tuples in current block
1301 * block_tuples --> tuple information for block: for each tuple,
1302 * {global number of local element,
1303 * global number of periodic element,
1304 * transform id}
1305 * comm <-- associated MPI communicator
1306 *----------------------------------------------------------------------------*/
1307
1308 static void
_exchange_periodic_tuples(size_t block_size,const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[],cs_lnum_t * n_block_tuples,cs_gnum_t ** block_tuples,MPI_Comm comm)1309 _exchange_periodic_tuples(size_t block_size,
1310 const fvm_periodicity_t *periodicity,
1311 int n_periodic_lists,
1312 const int periodicity_num[],
1313 const cs_lnum_t n_periodic_couples[],
1314 const cs_gnum_t *const periodic_couples[],
1315 cs_lnum_t *n_block_tuples,
1316 cs_gnum_t **block_tuples,
1317 MPI_Comm comm)
1318 {
1319 int *periodic_block_rank = NULL;
1320 cs_gnum_t *send_tuples = NULL;
1321
1322 cs_gnum_t n_g_periodic_tuples = 0;
1323
1324 /* Initialization */
1325
1326 *n_block_tuples = 0;
1327 *block_tuples = NULL;
1328
1329 for (int list_id = 0; list_id < n_periodic_lists; list_id++)
1330 n_g_periodic_tuples += 2 * n_periodic_couples[list_id];
1331
1332 BFT_MALLOC(periodic_block_rank, n_g_periodic_tuples, int);
1333 BFT_MALLOC(send_tuples, n_g_periodic_tuples*3, cs_gnum_t);
1334
1335 /* Prepare lists to send to distant processors */
1336
1337 cs_lnum_t k = 0;
1338
1339 for (int list_id = 0; list_id < n_periodic_lists; list_id++) {
1340
1341 const int external_num = periodicity_num[list_id];
1342 const int direct_id = fvm_periodicity_get_transform_id(periodicity,
1343 external_num,
1344 1);
1345 const int reverse_id = fvm_periodicity_get_transform_id(periodicity,
1346 external_num,
1347 -1);
1348
1349 const cs_lnum_t _n_periodic_couples = n_periodic_couples[list_id];
1350 const cs_gnum_t *_periodic_couples = periodic_couples[list_id];
1351
1352 assert(direct_id >= 0 && reverse_id >= 0);
1353
1354 for (cs_lnum_t couple_id = 0; couple_id < _n_periodic_couples; couple_id++) {
1355
1356 cs_gnum_t num_1 = _periodic_couples[couple_id*2];
1357 cs_gnum_t num_2 = _periodic_couples[couple_id*2 + 1];
1358
1359 periodic_block_rank[k*2] = (num_1 - 1) / block_size;
1360 periodic_block_rank[k*2+1] = (num_2 - 1) / block_size;
1361
1362 send_tuples[k*6] = num_1;
1363 send_tuples[k*6+1] = num_2;
1364 send_tuples[k*6+2] = direct_id;
1365
1366 send_tuples[k*6+3] = num_2;
1367 send_tuples[k*6+4] = num_1;
1368 send_tuples[k*6+5] = reverse_id;
1369
1370 k += 1;
1371
1372 }
1373
1374 }
1375
1376 assert((cs_gnum_t)(k*2) == n_g_periodic_tuples);
1377
1378 /* Exchange data */
1379
1380 cs_all_to_all_t
1381 *d_periodic = cs_all_to_all_create(n_g_periodic_tuples,
1382 0,
1383 NULL,
1384 periodic_block_rank,
1385 comm);
1386
1387 cs_gnum_t *recv_tuples = cs_all_to_all_copy_array(d_periodic,
1388 CS_GNUM_TYPE,
1389 3,
1390 false, /* reverse */
1391 send_tuples,
1392 NULL);
1393
1394 cs_lnum_t _n_block_tuples = cs_all_to_all_n_elts_dest(d_periodic);
1395
1396 BFT_FREE(send_tuples);
1397
1398 cs_all_to_all_destroy(&d_periodic);
1399
1400 BFT_FREE(periodic_block_rank);
1401
1402 /* Sort periodic couples by local correspondant, remove duplicates */
1403
1404 _sort_periodic_tuples(&_n_block_tuples,
1405 &recv_tuples);
1406
1407 /* Set return values */
1408
1409 *n_block_tuples = _n_block_tuples;
1410 *block_tuples = recv_tuples;
1411 }
1412
1413 /*----------------------------------------------------------------------------
1414 * Associate block ids for periodic couples.
1415 *
1416 * If a global number appears multiple times in a block, the lowest
1417 * occurence id is returned.
1418 *
1419 * parameters:
1420 * n_block_elements <-- number of elements in block
1421 * order <-- block ordering by global number received
1422 * block_global_num <-- global numbering received
1423 * n_block_couples <-- number of couples in current block
1424 * stride <-- stride for global number of local element
1425 * in block_couples[]
1426 * block_couples <-- couple information for block: for each couple,
1427 * {global number of local element,
1428 * global number of periodic element,
1429 * transform id} if stride = 3,
1430 * {global number of local element} if stride = 1
1431 * couple_block_id --> id in block of local couple element
1432 * comm <-- associated MPI communicator
1433 *----------------------------------------------------------------------------*/
1434
1435 static void
_periodic_couples_block_id(cs_lnum_t n_block_elements,const cs_lnum_t order[],const cs_gnum_t block_global_num[],cs_lnum_t n_block_couples,int stride,const cs_gnum_t block_couples[],cs_lnum_t couple_block_id[])1436 _periodic_couples_block_id(cs_lnum_t n_block_elements,
1437 const cs_lnum_t order[],
1438 const cs_gnum_t block_global_num[],
1439 cs_lnum_t n_block_couples,
1440 int stride,
1441 const cs_gnum_t block_couples[],
1442 cs_lnum_t couple_block_id[])
1443 {
1444 cs_lnum_t couple_id;
1445
1446 /* Initialization */
1447
1448 assert(stride == 3 || stride == 1);
1449
1450 if (n_block_couples == 0)
1451 return;
1452
1453 /* Use binary search */
1454
1455 for (couple_id = 0; couple_id < n_block_couples; couple_id++) {
1456
1457 cs_gnum_t num_cmp;
1458 cs_lnum_t start_id = 0;
1459 cs_lnum_t end_id = n_block_elements - 1;
1460 cs_lnum_t mid_id = (end_id -start_id) / 2;
1461
1462 const cs_gnum_t num_1 = block_couples[couple_id*stride];
1463
1464 /* use binary search */
1465
1466 while (start_id <= end_id) {
1467 num_cmp = block_global_num[order[mid_id]];
1468 if (num_cmp < num_1)
1469 start_id = mid_id + 1;
1470 else if (num_cmp > num_1)
1471 end_id = mid_id - 1;
1472 else
1473 break;
1474 mid_id = start_id + ((end_id -start_id) / 2);
1475 }
1476
1477 /* In case of multiple occurences, find lowest one */
1478
1479 while (mid_id > 0 && block_global_num[order[mid_id-1]] == num_1)
1480 mid_id--;
1481
1482 assert(block_global_num[order[mid_id]] == num_1);
1483
1484 couple_block_id[couple_id] = order[mid_id];
1485 }
1486
1487 }
1488
1489 /*----------------------------------------------------------------------------
1490 * Exchange periodic couple info between processors providing the data
1491 * and processors handling the related global numbering interval blocks.
1492 *
1493 * _periodic_couples_block_id() should have been called first.
1494 *
1495 * parameters:
1496 * block_size <-- size of the block handled by each processor
1497 * equiv_id <-- equivalence id for each block element (-1 if none)
1498 * equiv <-- temporary equivalence structure for block
1499 * n_block_couples <-- number of couples in current block
1500 * block_couples <-- couple information for block: for each couple,
1501 * {global number of local element,
1502 * global number of periodic element,
1503 * transform id}
1504 * couple_block_id <-- local id in block
1505 * dest_rank --> local number of values to send to each rank
1506 * src index --> local number of values to receive from each rank
1507 *----------------------------------------------------------------------------*/
1508
1509 static void
_count_periodic_equiv_exchange(size_t block_size,const cs_lnum_t equiv_id[],const _per_block_equiv_t * equiv,cs_lnum_t n_block_couples,const cs_gnum_t block_couples[],const cs_lnum_t couple_block_id[],int dest_rank[],cs_lnum_t src_index[])1510 _count_periodic_equiv_exchange(size_t block_size,
1511 const cs_lnum_t equiv_id[],
1512 const _per_block_equiv_t *equiv,
1513 cs_lnum_t n_block_couples,
1514 const cs_gnum_t block_couples[],
1515 const cs_lnum_t couple_block_id[],
1516 int dest_rank[],
1517 cs_lnum_t src_index[])
1518 {
1519 src_index[0] = 0;
1520
1521 if (equiv != NULL && equiv_id != NULL) {
1522
1523 /* Compute list sizes to send to distant processors */
1524
1525 for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1526
1527 int e_mult;
1528 cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1529 cs_lnum_t e_id = equiv_id[couple_block_id[couple_id]];
1530
1531 if (e_id > -1)
1532 e_mult = equiv->shift[e_id +1] - equiv->shift[e_id];
1533 else
1534 e_mult = 1;
1535
1536 dest_rank[couple_id] = (num_2 - 1) / block_size;
1537 src_index[couple_id+1] = src_index[couple_id] + 3 + 2*e_mult;
1538
1539 }
1540
1541 }
1542 else { /* if (equiv == NULL || equiv_id == NULL) */
1543
1544 for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1545
1546 cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1547
1548 dest_rank[couple_id] = (num_2 - 1) / block_size;
1549 src_index[couple_id+1] = src_index[couple_id] + 5;
1550
1551 }
1552
1553 }
1554 }
1555
1556 /*----------------------------------------------------------------------------
1557 * Exchange periodic couple info between processors providing the data
1558 * and processors handling the related global numbering interval blocks.
1559 *
1560 * parameters:
1561 * block_size <-- size of the block handled by each processor
1562 * n_block_elements <-- number of elements in local block
1563 * src_rank <-- source rank (size: block_size)
1564 * block_global_num <-- global numbering received
1565 * block_num <-- local numbering received
1566 * equiv_id <-- equivalence id for each block element (-1 if none)
1567 * equiv <-- temporary equivalence structure for block
1568 * periodicity <-- periodicity information (NULL if none)
1569 * n_block_couples <-- number of couples in current block
1570 * block_couples <-- couple information for block: for each couple,
1571 * {global number of local element,
1572 * global number of periodic element,
1573 * transform id}
1574 * comm <-- associated MPI communicator
1575 *
1576 * returns:
1577 * structure defining a temporary list of periodic interfaces
1578 *----------------------------------------------------------------------------*/
1579
1580 static _per_block_period_t
_exchange_periodic_equiv(size_t block_size,cs_lnum_t n_block_elements,const int src_rank[],const cs_gnum_t block_global_num[],const cs_lnum_t block_num[],const cs_lnum_t equiv_id[],const _per_block_equiv_t * equiv,const fvm_periodicity_t * periodicity,cs_lnum_t n_block_couples,const cs_gnum_t block_couples[],MPI_Comm comm)1581 _exchange_periodic_equiv(size_t block_size,
1582 cs_lnum_t n_block_elements,
1583 const int src_rank[],
1584 const cs_gnum_t block_global_num[],
1585 const cs_lnum_t block_num[],
1586 const cs_lnum_t equiv_id[],
1587 const _per_block_equiv_t *equiv,
1588 const fvm_periodicity_t *periodicity,
1589 cs_lnum_t n_block_couples,
1590 const cs_gnum_t block_couples[],
1591 MPI_Comm comm)
1592 {
1593 cs_lnum_t *couple_block_id = NULL;
1594 int *reverse_tr_id = NULL;
1595 cs_lnum_t *order = NULL;
1596 cs_gnum_t *block_recv_num = NULL;
1597
1598 _per_block_period_t pe;
1599
1600 /* Initialize return structure */
1601
1602 pe.count = 0;
1603 pe.block_id = NULL;
1604 pe.tr_id = NULL;
1605 pe.shift = NULL;
1606 pe.rank = NULL;
1607 pe.num = NULL;
1608
1609 if (periodicity == NULL)
1610 return pe;
1611
1612 /* Build ordering array for binary search */
1613
1614 order = cs_order_gnum(NULL, block_global_num, n_block_elements);
1615
1616 /* Associate id in block for periodic couples prior to sending */
1617
1618 BFT_MALLOC(couple_block_id, n_block_couples, cs_lnum_t);
1619
1620 _periodic_couples_block_id(n_block_elements,
1621 order,
1622 block_global_num,
1623 n_block_couples,
1624 3,
1625 block_couples,
1626 couple_block_id);
1627
1628 /* build count and shift arrays for parallel exchange */
1629
1630 int *send_rank;
1631 cs_lnum_t *src_index;
1632 BFT_MALLOC(send_rank, n_block_couples, int);
1633 BFT_MALLOC(src_index, n_block_couples+1, cs_lnum_t);
1634
1635 _count_periodic_equiv_exchange(block_size,
1636 equiv_id,
1637 equiv,
1638 n_block_couples,
1639 block_couples,
1640 couple_block_id,
1641 send_rank,
1642 src_index);
1643
1644 cs_all_to_all_t *d = cs_all_to_all_create(n_block_couples,
1645 0,
1646 NULL,
1647 send_rank,
1648 comm);
1649
1650 cs_all_to_all_transfer_dest_rank(d, &send_rank);
1651
1652 cs_lnum_t *dest_index = cs_all_to_all_copy_index(d,
1653 false, /* reverse */
1654 src_index,
1655 NULL);
1656
1657 /* arrays to exchange; most of the exchanged data is of type int or
1658 cs_lnum_t, using only positive values. For each periodic couple,
1659 one value of type cs_gnum_t is exchanged, so to group all communication
1660 in one MPI call, all data is exchanged as cs_gnum_t, even if this
1661 means larger messages if cs_gnum_t is larger than cs_lnum_t.
1662 As the number of elements per couple is variable (depending on prior
1663 equivalence info), using an MPI datatype to mix int and cs_gnum_t
1664 types rather than casting all to cs_gnum_t is not feasible */
1665
1666 cs_gnum_t *equiv_send;
1667 BFT_MALLOC(equiv_send, src_index[n_block_couples], cs_gnum_t);
1668
1669 /* temporary array to find reverse transforms */
1670
1671 int n_tr = fvm_periodicity_get_n_transforms(periodicity);
1672
1673 BFT_MALLOC(reverse_tr_id, n_tr, int);
1674
1675 for (int tr_id = 0; tr_id < n_tr; tr_id++)
1676 reverse_tr_id[tr_id] = fvm_periodicity_get_reverse_id(periodicity, tr_id);
1677
1678 if (equiv != NULL && equiv_id != NULL) {
1679
1680 /* Compute list sizes to send to distant processors */
1681
1682 for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1683
1684 const cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1685 const cs_lnum_t local_id = couple_block_id[couple_id];
1686 const cs_lnum_t e_id = equiv_id[local_id];
1687
1688 cs_lnum_t i = src_index[couple_id];
1689
1690 if (e_id > -1) {
1691
1692 int j_start = equiv->shift[e_id];
1693 int j_end = equiv->shift[e_id + 1];
1694
1695 equiv_send[i++] = j_end - j_start;
1696 equiv_send[i++] = num_2;
1697 equiv_send[i++] = reverse_tr_id[block_couples[couple_id*3 + 2]];
1698
1699 for (int j = j_start; j < j_end; j++) {
1700 equiv_send[i++] = equiv->rank[j];
1701 equiv_send[i++] = equiv->num[j];
1702 }
1703
1704 }
1705 else {
1706
1707 equiv_send[i++] = 1;
1708 equiv_send[i++] = num_2;
1709 equiv_send[i++] = reverse_tr_id[block_couples[couple_id*3 + 2]];
1710 equiv_send[i++] = src_rank[local_id];
1711 equiv_send[i++] = block_num[local_id];
1712
1713 }
1714
1715 }
1716
1717 }
1718 else { /* if (equiv == NULL || equiv_id == NULL) */
1719
1720 for (cs_lnum_t couple_id = 0; couple_id < n_block_couples; couple_id++) {
1721
1722 const cs_gnum_t num_2 = block_couples[couple_id*3 + 1];
1723 const cs_lnum_t local_id = couple_block_id[couple_id];
1724
1725 cs_lnum_t i = src_index[couple_id];
1726
1727 equiv_send[i++] = 1;
1728 equiv_send[i++] = num_2;
1729 equiv_send[i++] = reverse_tr_id[block_couples[couple_id*3 + 2]];
1730 equiv_send[i++] = src_rank[local_id];
1731 equiv_send[i++] = block_num[local_id];
1732
1733 }
1734
1735 }
1736
1737 BFT_FREE(couple_block_id);
1738 BFT_FREE(reverse_tr_id);
1739
1740 /* Parallel exchange */
1741
1742 cs_gnum_t *equiv_recv
1743 = cs_all_to_all_copy_indexed(d,
1744 CS_GNUM_TYPE,
1745 false, /* reverse */
1746 src_index,
1747 equiv_send,
1748 dest_index,
1749 NULL);
1750
1751 cs_lnum_t n_elts_recv = cs_all_to_all_n_elts_dest(d);
1752 size_t recv_size = dest_index[n_elts_recv];
1753
1754 /* Free memory */
1755
1756 BFT_FREE(src_index);
1757 BFT_FREE(dest_index);
1758 BFT_FREE(equiv_send);
1759
1760 cs_all_to_all_destroy(&d);
1761
1762 /* Build return structure */
1763
1764 {
1765 size_t k, l, e_mult;
1766
1767 pe.count = 0;
1768 size_t i = 0;
1769 size_t j = 0;
1770
1771 while (i < recv_size) {
1772 pe.count += 1;
1773 j += equiv_recv[i];
1774 i += 3 + 2*equiv_recv[i];
1775 }
1776
1777 BFT_MALLOC(block_recv_num, pe.count, cs_gnum_t);
1778
1779 BFT_MALLOC(pe.tr_id, pe.count, int);
1780
1781 BFT_MALLOC(pe.shift, pe.count + 1, int);
1782
1783 BFT_MALLOC(pe.rank, j, int);
1784 BFT_MALLOC(pe.num, j, cs_lnum_t);
1785
1786 pe.shift[0] = 0;
1787
1788 for (i = 0, j = 0, k = 0, l = 0; i < (size_t)(pe.count); i++) {
1789
1790 e_mult = equiv_recv[k++];
1791 block_recv_num[i] = equiv_recv[k++];
1792 pe.tr_id[i] = equiv_recv[k++];
1793
1794 for (l = 0; l < e_mult; l++) {
1795 pe.rank[j] = equiv_recv[k++];
1796 pe.num[j] = equiv_recv[k++];
1797 pe.shift[i+1] = ++j;
1798 }
1799
1800 }
1801
1802 }
1803
1804 BFT_FREE(equiv_recv);
1805
1806 /* Associate id in block for received periodic equivalences */
1807
1808 BFT_MALLOC(pe.block_id, pe.count, cs_lnum_t);
1809
1810 _periodic_couples_block_id(n_block_elements,
1811 order,
1812 block_global_num,
1813 pe.count,
1814 1,
1815 block_recv_num,
1816 pe.block_id);
1817
1818 /* Free remaining working arrays */
1819
1820 BFT_FREE(block_recv_num);
1821 BFT_FREE(couple_block_id);
1822 BFT_FREE(order);
1823
1824 return pe;
1825 }
1826
1827 /*----------------------------------------------------------------------------
1828 * Merge periodic equivalent interface info with block equivalence info.
1829 *
1830 * Expands block equivalence info, and frees temporary list of periodic
1831 * interfaces.
1832 *
1833 * parameters:
1834 * n_block_elts <-- number of block elements
1835 * src_rank <-- source rank
1836 * block_num <-- local numbering received
1837 * equiv_id <-> equivalence id for each block element (-1 if none)
1838 * equiv <-> temporary equivalence structure for block
1839 * perio_equiv <-> temporary list of periodic interfaces
1840 *
1841 * returns:
1842 * structure defining a temporary equivalence structure
1843 *----------------------------------------------------------------------------*/
1844
1845 #if defined(__INTEL_COMPILER)
1846 #pragma optimization_level 2 /* Crash with O3 on IA64 with icc 9.1 20070320 */
1847 #endif
1848
1849 static void
_merge_periodic_equiv(cs_lnum_t n_block_elts,const int src_rank[],const cs_lnum_t block_num[],cs_lnum_t equiv_id[],_per_block_equiv_t * equiv,_per_block_period_t * perio_equiv)1850 _merge_periodic_equiv(cs_lnum_t n_block_elts,
1851 const int src_rank[],
1852 const cs_lnum_t block_num[],
1853 cs_lnum_t equiv_id[],
1854 _per_block_equiv_t *equiv,
1855 _per_block_period_t *perio_equiv)
1856 {
1857 cs_lnum_t i;
1858 size_t j;
1859
1860 cs_lnum_t old_count, new_count;
1861 size_t new_size;
1862
1863 cs_lnum_t *eq_mult = NULL;
1864 cs_lnum_t *new_shift = NULL;
1865
1866 _per_block_period_t *pe = perio_equiv;
1867
1868 assert(equiv != NULL);
1869
1870 if (perio_equiv == NULL)
1871 return;
1872
1873 old_count = equiv->count;
1874
1875 /* Note that by construction, the global numbers of elements appearing
1876 in the original (parallel) equivalence must appear multiple times
1877 in the block (the original equivalences are built using this), and
1878 the global numbers of elements appearing only in the periodic
1879 equivalence must appear only once; thus only one equiv_id[] value
1880 needs to be updated when appending purely periodic equivalences */
1881
1882 for (i = 0, new_count = old_count; i < pe->count; i++) {
1883 if (equiv_id[pe->block_id[i]] == -1)
1884 equiv_id[pe->block_id[i]] = new_count++;
1885 }
1886
1887 BFT_MALLOC(eq_mult, new_count, cs_lnum_t);
1888
1889 for (i = 0; i < old_count; i++)
1890 eq_mult[i] = equiv->shift[i+1] - equiv->shift[i];
1891
1892 for (i = old_count; i < new_count; i++)
1893 eq_mult[i] = 0;
1894
1895 for (i = 0; i < pe->count; i++) {
1896 if (eq_mult[equiv_id[pe->block_id[i]]] == 0)
1897 eq_mult[equiv_id[pe->block_id[i]]] += pe->shift[i+1] - pe->shift[i] + 1;
1898 else
1899 eq_mult[equiv_id[pe->block_id[i]]] += pe->shift[i+1] - pe->shift[i];
1900 }
1901
1902 /* Build new (merged) index, resetting eq_mult to use as a counter */
1903
1904 BFT_MALLOC(new_shift, new_count+1, cs_lnum_t);
1905
1906 new_shift[0] = 0;
1907
1908 for (i = 0; i < new_count; i++) {
1909 assert(eq_mult[i] > 0);
1910 new_shift[i+1] = new_shift[i] + eq_mult[i];
1911 eq_mult[i] = 0;
1912 }
1913
1914 new_size = new_shift[new_count];
1915
1916 /* Expand previous periodicity info */
1917 /*----------------------------------*/
1918
1919 equiv->count = new_count;
1920
1921 if (old_count > 0) {
1922
1923 int k;
1924 int *new_rank = NULL;
1925 cs_lnum_t *new_num = NULL;
1926
1927 BFT_MALLOC(new_rank, new_size, int);
1928
1929 for (i = 0; i < old_count; i++) {
1930 eq_mult[i] = equiv->shift[i+1] - equiv->shift[i];
1931 for (k = 0; k < eq_mult[i]; k++)
1932 new_rank[new_shift[i] + k] = equiv->rank[equiv->shift[i] + k];
1933 }
1934
1935 BFT_FREE(equiv->rank);
1936 equiv->rank = new_rank;
1937
1938 BFT_MALLOC(new_num, new_size, cs_lnum_t);
1939
1940 for (i = 0; i < old_count; i++) {
1941 for (k = 0; k < eq_mult[i]; k++)
1942 new_num[new_shift[i] + k] = equiv->num[equiv->shift[i] + k];
1943 }
1944 BFT_FREE(equiv->num);
1945 equiv->num = new_num;
1946
1947 if (equiv->tr_id != NULL) {
1948
1949 int *new_tr_id = NULL;
1950 BFT_MALLOC(new_tr_id, new_size, int);
1951 for (i = 0; i < old_count; i++) {
1952 for (k = 0; k < eq_mult[i]; k++)
1953 new_tr_id[new_shift[i] + k] = equiv->tr_id[equiv->shift[i] + k];
1954 }
1955 BFT_FREE(equiv->tr_id);
1956 equiv->tr_id = new_tr_id;
1957
1958 }
1959
1960 /* All is expanded at this stage, so old index may be replaced */
1961
1962 BFT_FREE(equiv->shift);
1963 equiv->shift = new_shift;
1964
1965 }
1966 else {
1967
1968 BFT_FREE(equiv->shift);
1969 equiv->shift = new_shift;
1970 BFT_MALLOC(equiv->rank, new_size, int);
1971 BFT_MALLOC(equiv->num, new_size, cs_lnum_t);
1972
1973 }
1974
1975 if (equiv->tr_id == NULL) {
1976 BFT_MALLOC(equiv->tr_id, new_size, int);
1977 for (j = 0; j < new_size; j++)
1978 equiv->tr_id[j] = 0;
1979 }
1980
1981 /* Now insert periodic equivalence info */
1982
1983 for (cs_lnum_t k = 0; k < n_block_elts; k++) {
1984
1985 if (equiv_id[k] >= old_count) {
1986
1987 const cs_lnum_t eq_id = equiv_id[k];
1988 const cs_lnum_t l = equiv->shift[eq_id];
1989
1990 assert(eq_mult[eq_id] == 0);
1991 equiv->rank[l] = src_rank[k];
1992 equiv->num[l] = block_num[k];
1993 equiv->tr_id[l] = 0;
1994 eq_mult[eq_id] = 1;
1995
1996 }
1997 }
1998
1999 for (i = 0; i < pe->count; i++) {
2000
2001 int k, l;
2002 const cs_lnum_t _block_id = pe->block_id[i];
2003 const int eq_id = equiv_id[_block_id];
2004
2005 for (k = pe->shift[i]; k < pe->shift[i+1]; k++) {
2006
2007 l = equiv->shift[eq_id] + eq_mult[eq_id];
2008
2009 equiv->rank[l] = pe->rank[k];
2010 equiv->num[l] = pe->num[k];
2011
2012 equiv->tr_id[l] = pe->tr_id[i] + 1;
2013
2014 eq_mult[eq_id] += 1;
2015
2016 }
2017
2018 }
2019
2020 BFT_FREE(eq_mult);
2021
2022 /* Free temporary periodic equivalence structure elements */
2023
2024 BFT_FREE(pe->block_id);
2025 BFT_FREE(pe->tr_id);
2026 BFT_FREE(pe->shift);
2027 BFT_FREE(pe->rank);
2028 BFT_FREE(pe->num);
2029 }
2030
2031 /*----------------------------------------------------------------------------
2032 * Creation of a list of interfaces between elements of a same type.
2033 *
2034 * parameters:
2035 * ifs <-> pointer to structure that should be updated
2036 * n_elts <-- local number of elements
2037 * global_num <-- global number (id) associated with each element
2038 * periodicity <-- periodicity information (NULL if none)
2039 * n_periodic_lists <-- number of periodic lists (may be local)
2040 * periodicity_num <-- periodicity number (1 to n) associated with
2041 * each periodic list (primary periodicities only)
2042 * n_periodic_couples <-- number of periodic couples associated with
2043 * each periodic list
2044 * periodic_couples <-- array indicating periodic couples (using
2045 * global numberings) for each list
2046 * comm <-- associated MPI communicator
2047 *----------------------------------------------------------------------------*/
2048
2049 static void
_add_global_equiv_periodic(cs_interface_set_t * ifs,cs_lnum_t n_elts,const cs_gnum_t global_num[],const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[],MPI_Comm comm)2050 _add_global_equiv_periodic(cs_interface_set_t *ifs,
2051 cs_lnum_t n_elts,
2052 const cs_gnum_t global_num[],
2053 const fvm_periodicity_t *periodicity,
2054 int n_periodic_lists,
2055 const int periodicity_num[],
2056 const cs_lnum_t n_periodic_couples[],
2057 const cs_gnum_t *const periodic_couples[],
2058 MPI_Comm comm)
2059 {
2060 cs_lnum_t n_block_couples = 0;
2061 cs_lnum_t *equiv_id = NULL, *couple_equiv_id = NULL;
2062 cs_gnum_t *block_couples = NULL;
2063
2064 /* Initialization */
2065
2066 int size, local_rank;
2067 MPI_Comm_size(comm, &size);
2068 MPI_Comm_rank(comm, &local_rank);
2069
2070 /* Get temporary maximum global number value */
2071
2072 cs_gnum_t global_max = _global_num_max(n_elts,
2073 global_num,
2074 comm);
2075
2076 cs_block_dist_info_t
2077 bi = cs_block_dist_compute_sizes(local_rank,
2078 size,
2079 1,
2080 0,
2081 global_max);
2082
2083 int flags = CS_ALL_TO_ALL_NEED_SRC_RANK;
2084
2085 cs_all_to_all_t
2086 *d = cs_all_to_all_create_from_block(n_elts,
2087 flags,
2088 global_num,
2089 bi,
2090 comm);
2091
2092 assert(sizeof(cs_gnum_t) >= sizeof(cs_lnum_t));
2093
2094 cs_gnum_t *recv_global_num = cs_all_to_all_copy_array(d,
2095 CS_GNUM_TYPE,
2096 1,
2097 false, /* reverse */
2098 global_num,
2099 NULL);
2100
2101 cs_lnum_t *send_num = NULL;
2102 BFT_MALLOC(send_num, n_elts, cs_lnum_t);
2103 for (cs_lnum_t i = 0; i < n_elts; i++)
2104 send_num[i] = i+1;
2105
2106 cs_lnum_t *recv_num = cs_all_to_all_copy_array(d,
2107 CS_LNUM_TYPE,
2108 1,
2109 false, /* reverse */
2110 send_num,
2111 NULL);
2112
2113 BFT_FREE(send_num);
2114
2115 cs_lnum_t n_elts_recv = cs_all_to_all_n_elts_dest(d);
2116
2117 int *src_rank = cs_all_to_all_get_src_rank(d);
2118
2119 /* Exchange periodicity information */
2120
2121 _exchange_periodic_tuples(bi.block_size,
2122 periodicity,
2123 n_periodic_lists,
2124 periodicity_num,
2125 n_periodic_couples,
2126 periodic_couples,
2127 &n_block_couples,
2128 &block_couples,
2129 comm);
2130
2131 /* Combine periodic couples if necessary */
2132
2133 if (fvm_periodicity_get_n_levels(periodicity) > 1)
2134 _combine_periodic_tuples(bi.block_size,
2135 periodicity,
2136 &n_block_couples,
2137 &block_couples,
2138 comm);
2139
2140 /* Build purely parallel equivalence data first */
2141
2142 if (n_elts_recv > 0)
2143 BFT_MALLOC(equiv_id, n_elts_recv, cs_lnum_t);
2144
2145 _per_block_equiv_t
2146 e = _block_global_num_to_equiv(n_elts_recv,
2147 src_rank,
2148 recv_global_num,
2149 recv_num,
2150 equiv_id);
2151
2152 /* Now combine periodic and parallel equivalences */
2153
2154 _per_block_period_t
2155 pe = _exchange_periodic_equiv(bi.block_size,
2156 n_elts_recv,
2157 src_rank,
2158 recv_global_num,
2159 recv_num,
2160 equiv_id,
2161 &e,
2162 periodicity,
2163 n_block_couples,
2164 block_couples,
2165 comm);
2166
2167 BFT_FREE(recv_global_num);
2168
2169 _merge_periodic_equiv(n_elts_recv,
2170 src_rank,
2171 recv_num,
2172 equiv_id,
2173 &e,
2174 &pe);
2175
2176 /* Free all arrays not needed anymore */
2177
2178 BFT_FREE(recv_num);
2179
2180 BFT_FREE(couple_equiv_id);
2181 BFT_FREE(block_couples);
2182 n_block_couples = 0;
2183
2184 /* Now that equivalences are marked, count for each rank; for each
2185 equivalence, we will need to send the corresponding element
2186 numbers, ranks, and transform ids,
2187 for a total of 2 + 3*(e.multiple[...] - 1)
2188 = (3*e.multiple[...]) - 1 values. */
2189
2190 cs_lnum_t *block_index;
2191 BFT_MALLOC(block_index, n_elts_recv+1, cs_lnum_t);
2192
2193 block_index[0] = 0;
2194 for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
2195 cs_lnum_t n_eq = 0;
2196 if (equiv_id[i] > -1) {
2197 size_t e_id = equiv_id[i];
2198 n_eq = 3*(e.shift[e_id+1] - e.shift[e_id]) - 1;
2199 }
2200 block_index[i+1] = block_index[i] + n_eq;
2201 }
2202
2203 cs_lnum_t *part_index = cs_all_to_all_copy_index(d,
2204 true, /* reverse */
2205 block_index,
2206 NULL);
2207
2208 /* Now prepare new send buffer */
2209
2210 cs_lnum_t *block_equiv;
2211 BFT_MALLOC(block_equiv, block_index[n_elts_recv], cs_lnum_t);
2212
2213 for (cs_lnum_t i = 0; i < n_elts_recv; i++) {
2214 if (equiv_id[i] > -1) {
2215
2216 cs_lnum_t *block_equiv_p = block_equiv + block_index[i];
2217
2218 cs_lnum_t e_id = equiv_id[i];
2219 cs_lnum_t k = 2;
2220 const int multiple = e.shift[e_id+1] - e.shift[e_id];
2221 const int *rank_p = e.rank + e.shift[e_id];
2222 const int *tr_id_p = e.tr_id + e.shift[e_id];
2223 const cs_lnum_t *num_p = e.num + e.shift[e_id];
2224
2225 for (cs_lnum_t j = 0; j < multiple; j++) {
2226 if (rank_p[j] == src_rank[i] && tr_id_p[j] == 0) {
2227 block_equiv_p[0] = num_p[j];
2228 block_equiv_p[1] = multiple - 1;
2229 }
2230 else {
2231 block_equiv_p[k++] = num_p[j];
2232 block_equiv_p[k++] = tr_id_p[j];
2233 block_equiv_p[k++] = rank_p[j];
2234 }
2235 }
2236
2237 }
2238 }
2239
2240 /* Free temporary (block) equivalence info */
2241
2242 e.count = 0;
2243 BFT_FREE(e.shift);
2244 BFT_FREE(e.rank);
2245 BFT_FREE(e.tr_id);
2246 BFT_FREE(e.num);
2247
2248 BFT_FREE(src_rank);
2249 BFT_FREE(equiv_id);
2250
2251 cs_lnum_t *part_equiv
2252 = cs_all_to_all_copy_indexed(d,
2253 CS_LNUM_TYPE,
2254 true, /* reverse */
2255 block_index,
2256 block_equiv,
2257 part_index,
2258 NULL);
2259
2260 cs_lnum_t n_vals_part = part_index[n_elts];
2261
2262 /* At this stage, MPI operations are finished; we may release
2263 the corresponding counts and indexes */
2264
2265 BFT_FREE(block_equiv);
2266 BFT_FREE(part_index);
2267 BFT_FREE(block_index);
2268
2269 cs_all_to_all_destroy(&d);
2270
2271 /* Add interface */
2272
2273 int tr_index_size = fvm_periodicity_get_n_transforms(periodicity) + 2;
2274
2275 _interfaces_from_flat_equiv(ifs,
2276 tr_index_size,
2277 n_vals_part,
2278 part_equiv);
2279
2280 BFT_FREE(part_equiv);
2281 }
2282
2283 #endif /* defined(HAVE_MPI) */
2284
2285 /*----------------------------------------------------------------------------
2286 * Prepare periodic couple info in single process mode.
2287 *
2288 * Note that the array pointed to by block_couples is allocated here,
2289 * and must be freed by the calling code.
2290 *
2291 * parameters:
2292 * periodicity <-- periodicity information (NULL if none)
2293 * n_periodic_lists <-- number of periodic lists (may be local)
2294 * periodicity_num <-- periodicity number (1 to n) associated with
2295 * each periodic list (primary periodicities only)
2296 * n_periodic_couples <-- number of periodic couples associated with
2297 * each periodic list
2298 * periodic_couples <-- array indicating periodic couples (using
2299 * global numberings) for each list
2300 * n_couples --> number of couples
2301 * couples --> couple information: for each couple,
2302 * {global number of local element,
2303 * global number of periodic element,
2304 * transform id}
2305 *----------------------------------------------------------------------------*/
2306
2307 static void
_define_periodic_couples_sp(const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[],cs_lnum_t * n_couples,cs_gnum_t ** couples)2308 _define_periodic_couples_sp(const fvm_periodicity_t *periodicity,
2309 int n_periodic_lists,
2310 const int periodicity_num[],
2311 const cs_lnum_t n_periodic_couples[],
2312 const cs_gnum_t *const periodic_couples[],
2313 cs_lnum_t *n_couples,
2314 cs_gnum_t **couples)
2315 {
2316 int list_id;
2317
2318 cs_lnum_t count = 0;
2319 cs_lnum_t _n_couples = 0;
2320 cs_gnum_t *_couples = NULL;
2321
2322 /* Initialization */
2323
2324 *n_couples = 0;
2325 *couples = NULL;
2326
2327 for (list_id = 0, _n_couples = 0; list_id < n_periodic_lists; list_id++)
2328 _n_couples += n_periodic_couples[list_id] * 2;
2329
2330 BFT_MALLOC(_couples, _n_couples*3, cs_gnum_t);
2331
2332 /* Prepare lists */
2333
2334 for (list_id = 0; list_id < n_periodic_lists; list_id++) {
2335
2336 cs_lnum_t couple_id;
2337 cs_gnum_t num_1, num_2;
2338
2339 const int external_num = periodicity_num[list_id];
2340 const int direct_id = fvm_periodicity_get_transform_id(periodicity,
2341 external_num,
2342 1);
2343 const int reverse_id = fvm_periodicity_get_transform_id(periodicity,
2344 external_num,
2345 -1);
2346
2347 const cs_lnum_t _n_periodic_couples = n_periodic_couples[list_id];
2348 const cs_gnum_t *_periodic_couples = periodic_couples[list_id];
2349
2350 assert(direct_id >= 0 && reverse_id >= 0);
2351
2352 for (couple_id = 0; couple_id < _n_periodic_couples; couple_id++) {
2353
2354 num_1 = _periodic_couples[couple_id*2];
2355 num_2 = _periodic_couples[couple_id*2 + 1];
2356
2357 _couples[count] = num_1;
2358 _couples[count+1] = num_2;
2359 _couples[count+2] = direct_id;
2360
2361 _couples[count+3] = num_2;
2362 _couples[count+4] = num_1;
2363 _couples[count+5] = reverse_id;
2364
2365 count += 6;
2366
2367 }
2368
2369 }
2370
2371 /* Sort periodic couples by local match, remove duplicates */
2372
2373 _sort_periodic_tuples(&_n_couples, &_couples);
2374
2375 /* Set return values */
2376
2377 *n_couples = _n_couples;
2378 *couples = _couples;
2379 }
2380
2381 /*----------------------------------------------------------------------------
2382 * Build eventual couples belonging to combined periodicities in single
2383 * process mode.
2384 *
2385 * parameters:
2386 * periodicity <-- periodicity information (NULL if none)
2387 * n_couples <-> number of couples in current block
2388 * couples <-> couple information for this rank: for each couple,
2389 * {global number of local element,
2390 * global number of periodic element,
2391 * transform id}
2392 *----------------------------------------------------------------------------*/
2393
2394 static void
_combine_periodic_couples_sp(const fvm_periodicity_t * periodicity,cs_lnum_t * n_couples,cs_gnum_t ** couples)2395 _combine_periodic_couples_sp(const fvm_periodicity_t *periodicity,
2396 cs_lnum_t *n_couples,
2397 cs_gnum_t **couples)
2398 {
2399 int i, n_tr, level;
2400
2401 cs_lnum_t start_id, end_id, j, k;
2402
2403 int add_count = 0;
2404 int *tr_reverse_id = NULL;
2405 cs_gnum_t *_add_couples = NULL;
2406
2407 cs_lnum_t _n_couples = *n_couples;
2408 cs_gnum_t *_couples = *couples;
2409
2410 assert (periodicity != NULL);
2411
2412 /* Build periodicity related arrays for quick access */
2413
2414 n_tr = fvm_periodicity_get_n_transforms(periodicity);
2415
2416 BFT_MALLOC(tr_reverse_id, n_tr, int);
2417
2418 for (i = 0; i < n_tr; i++)
2419 tr_reverse_id[i] = fvm_periodicity_get_reverse_id(periodicity, i);
2420
2421 /* Loop on combination levels */
2422
2423 for (level = 1;
2424 level < fvm_periodicity_get_n_levels(periodicity);
2425 level++) {
2426
2427 int n_rows = 0;
2428 int *tr_combine = NULL;
2429
2430 _transform_combine_info(periodicity,
2431 level,
2432 &n_rows,
2433 &tr_combine);
2434
2435 /* Count values to add */
2436
2437 add_count = 0;
2438
2439 start_id = 0;
2440 end_id = 1;
2441
2442 while (end_id < _n_couples) {
2443
2444 if (_couples[start_id*3] == _couples[end_id*3]) {
2445
2446 end_id++;
2447 while (end_id < _n_couples) {
2448 if (_couples[end_id*3] != _couples[start_id*3])
2449 break;
2450 end_id++;
2451 }
2452
2453 for (j = start_id; j < end_id; j++) {
2454 for (k = j+1; k < end_id; k++) {
2455
2456 int tr_1 = tr_reverse_id[_couples[j*3 + 2]];
2457 int tr_2 = _couples[k*3 + 2];
2458
2459 if (tr_combine[tr_1 *n_rows + tr_2] > -1)
2460 add_count += 6;
2461
2462 }
2463 }
2464
2465 }
2466
2467 start_id = end_id;
2468 end_id += 1;
2469 }
2470
2471 /* Nothing to do for this combination level if add_count = 0 */
2472
2473 if (add_count == 0) {
2474 BFT_FREE(tr_combine);
2475 continue;
2476 }
2477
2478 BFT_REALLOC(_couples, _n_couples*3 + add_count, cs_gnum_t);
2479
2480 _add_couples = _couples + _n_couples*3;
2481
2482 /* Now add combined couples */
2483
2484 start_id = 0;
2485 end_id = 1;
2486
2487 while (end_id < _n_couples) {
2488
2489 if (_couples[start_id*3] == _couples[end_id*3]) {
2490
2491 end_id++;
2492 while (end_id < _n_couples) {
2493 if (_couples[end_id*3] != _couples[start_id*3])
2494 break;
2495 end_id++;
2496 }
2497
2498 /* Loop on couple combinations */
2499
2500 for (j = start_id; j < end_id; j++) {
2501 for (k = j+1; k < end_id; k++) {
2502
2503 cs_gnum_t num_1 = _couples[j*3 + 1];
2504 cs_gnum_t num_2 = _couples[k*3 + 1];
2505 int tr_1 = tr_reverse_id[_couples[j*3 + 2]];
2506 int tr_2 = _couples[k*3 + 2];
2507 int tr_c = tr_combine[tr_1 *n_rows + tr_2];
2508
2509 if (tr_c > -1) {
2510
2511 _add_couples[0] = num_1;
2512 _add_couples[1] = num_2;
2513 _add_couples[2] = tr_c;
2514
2515 _add_couples[3] = num_2;
2516 _add_couples[4] = num_1;
2517 _add_couples[5] = tr_reverse_id[tr_c];
2518
2519 _add_couples += 6;
2520
2521 }
2522
2523 }
2524 } /* End of loop on couple combinations */
2525
2526 }
2527
2528 start_id = end_id;
2529 end_id += 1;
2530 }
2531
2532 BFT_FREE(tr_combine);
2533
2534 /* Finally, merge additional couples received for block
2535 existing periodicity information */
2536
2537 assert(add_count % 3 == 0);
2538
2539 _n_couples += add_count / 3;
2540
2541 /* Sort and remove duplicates to update periodicity info */
2542
2543 _sort_periodic_tuples(&_n_couples, &_couples);
2544
2545 *n_couples = _n_couples;
2546 *couples = _couples;
2547
2548 }
2549
2550 BFT_FREE(tr_reverse_id);
2551 }
2552
2553 /*----------------------------------------------------------------------------
2554 * Creation of a list of interfaces between elements of a same type.
2555 *
2556 * This simplified algorithm is intended for single-process mode.
2557 *
2558 * parameters:
2559 * ifs <-> pointer to structure that should be updated
2560 * n_elts <-- local number of elements
2561 * global_num <-- global number (id) associated with each element
2562 * periodicity <-- periodicity information (NULL if none)
2563 * n_periodic_lists <-- number of periodic lists (may be local)
2564 * periodicity_num <-- periodicity number (1 to n) associated with
2565 * each periodic list (primary periodicities only)
2566 * n_periodic_couples <-- number of periodic couples associated with
2567 * each periodic list
2568 * periodic_couples <-- array indicating periodic couples (using
2569 * global numberings) for each list
2570 *----------------------------------------------------------------------------*/
2571
2572 static void
_add_global_equiv_periodic_sp(cs_interface_set_t * ifs,cs_lnum_t n_elts,const cs_gnum_t global_num[],const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[])2573 _add_global_equiv_periodic_sp(cs_interface_set_t *ifs,
2574 cs_lnum_t n_elts,
2575 const cs_gnum_t global_num[],
2576 const fvm_periodicity_t *periodicity,
2577 int n_periodic_lists,
2578 const int periodicity_num[],
2579 const cs_lnum_t n_periodic_couples[],
2580 const cs_gnum_t *const periodic_couples[])
2581 {
2582 cs_lnum_t i, couple_id;
2583 cs_lnum_t n_couples = 0;
2584 cs_lnum_t *n_elts_tr = NULL;
2585 cs_gnum_t *couples = NULL;
2586
2587 cs_interface_t *_interface = NULL;
2588
2589 assert(sizeof(cs_gnum_t) >= sizeof(cs_lnum_t));
2590
2591 /* Organize periodicity information */
2592
2593 _define_periodic_couples_sp(periodicity,
2594 n_periodic_lists,
2595 periodicity_num,
2596 n_periodic_couples,
2597 periodic_couples,
2598 &n_couples,
2599 &couples);
2600
2601 /* Combine periodic couples if necessary */
2602
2603 if (fvm_periodicity_get_n_levels(periodicity) > 1)
2604 _combine_periodic_couples_sp(periodicity,
2605 &n_couples,
2606 &couples);
2607
2608 /* Add interface to set */
2609
2610 ifs->size += 1;
2611
2612 BFT_REALLOC(ifs->interfaces,
2613 ifs->size,
2614 cs_interface_t *);
2615
2616 _interface = _cs_interface_create();
2617
2618 ifs->interfaces[ifs->size - 1] = _interface;
2619
2620 /* Build interface */
2621
2622 _interface->rank = 0;
2623 _interface->size = n_couples;
2624
2625 _interface->tr_index_size
2626 = fvm_periodicity_get_n_transforms(periodicity) + 2;
2627
2628 BFT_MALLOC(_interface->tr_index, _interface->tr_index_size, cs_lnum_t);
2629 BFT_MALLOC(_interface->elt_id, _interface->size, cs_lnum_t);
2630 BFT_MALLOC(_interface->match_id, _interface->size, cs_lnum_t);
2631
2632 /* Count couples for each transform */
2633
2634 BFT_MALLOC(n_elts_tr, _interface->tr_index_size - 2, cs_lnum_t);
2635
2636 for (i = 0; i < _interface->tr_index_size - 2; i++)
2637 n_elts_tr[i] = 0;
2638
2639 for (couple_id = 0; couple_id < n_couples; couple_id++)
2640 n_elts_tr[couples[couple_id*3 + 2]] += 1;
2641
2642 /* Build index */
2643
2644 _interface->tr_index[0] = 0;
2645 _interface->tr_index[1] = 0;
2646
2647 for (i = 2; i < _interface->tr_index_size; i++) {
2648 _interface->tr_index[i] = _interface->tr_index[i-1] + n_elts_tr[i-2];
2649 n_elts_tr[i-2] = 0;
2650 }
2651
2652 /* Build local and distant correspondants */
2653
2654 if (global_num == NULL) {
2655
2656 for (couple_id = 0; couple_id < n_couples; couple_id++) {
2657
2658 const int tr_id = couples[couple_id*3 + 2];
2659 const cs_lnum_t j = _interface->tr_index[tr_id + 1] + n_elts_tr[tr_id];
2660
2661 _interface->elt_id[j] = couples[couple_id*3] - 1;
2662 _interface->match_id[j] = couples[couple_id*3 + 1] - 1;
2663
2664 n_elts_tr[tr_id] += 1;
2665
2666 }
2667
2668 }
2669 else {
2670
2671 cs_lnum_t *renum;
2672 BFT_MALLOC(renum, n_elts, cs_lnum_t);
2673 for (i = 0; i < n_elts; i++) {
2674 cs_lnum_t j = global_num[i] - 1;
2675 assert(j >= 0 && j < n_elts);
2676 renum[j] = i;
2677 }
2678
2679 for (couple_id = 0; couple_id < n_couples; couple_id++) {
2680
2681 const int tr_id = couples[couple_id*3 + 2];
2682 const cs_lnum_t j = _interface->tr_index[tr_id + 1] + n_elts_tr[tr_id];
2683
2684 _interface->elt_id[j] = renum[couples[couple_id*3] - 1];
2685 _interface->match_id[j] = renum[couples[couple_id*3 + 1] - 1];
2686
2687 n_elts_tr[tr_id] += 1;
2688
2689 }
2690
2691 BFT_FREE(renum);
2692 }
2693
2694 /* Free temporary arrays */
2695
2696 BFT_FREE(n_elts_tr);
2697 BFT_FREE(couples);
2698 }
2699
2700 /*----------------------------------------------------------------------------
2701 * Order element id lists and update matching id lists for each
2702 * section of each interface of a set.
2703 *
2704 * This must be called as part of a renumbering algorithm, before
2705 * matching element ids may be replaced by send orderings.
2706 *
2707 * parameters:
2708 * ifs <-> pointer to interface set
2709 *----------------------------------------------------------------------------*/
2710
2711 static void
_order_elt_id(cs_interface_set_t * ifs)2712 _order_elt_id(cs_interface_set_t *ifs)
2713 {
2714 int i;
2715
2716 for (i = 0; i < ifs->size; i++) {
2717
2718 int section_id;
2719 cs_lnum_t j;
2720
2721 int tr_index_size = 2;
2722 cs_lnum_t _tr_index[2] = {0, 0};
2723 cs_lnum_t *order = NULL;
2724 cs_lnum_t *tmp = NULL;
2725 const cs_lnum_t *tr_index = _tr_index;
2726
2727 cs_interface_t *itf = ifs->interfaces[i];
2728
2729 if (itf == NULL)
2730 return;
2731
2732 /* When this function is called, a distant-rank interface should have
2733 a match_id array, but not a send_order array */
2734
2735 assert(itf->send_order == NULL);
2736
2737 _tr_index[1] = itf->size;
2738
2739 if (itf->tr_index_size > 0) {
2740 tr_index_size = itf->tr_index_size;
2741 tr_index = itf->tr_index;
2742 }
2743
2744 BFT_MALLOC(order, tr_index[tr_index_size - 1], cs_lnum_t);
2745 BFT_MALLOC(tmp, tr_index[tr_index_size - 1], cs_lnum_t);
2746
2747 for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
2748
2749 cs_lnum_t start_id = tr_index[section_id];
2750 cs_lnum_t l = tr_index[section_id + 1] - start_id;
2751
2752 /* Now compute distant ordering to build send_order */
2753
2754 cs_order_lnum_allocated(NULL,
2755 itf->elt_id + start_id,
2756 order,
2757 l);
2758
2759 /* Update elt_id */
2760
2761 for (j = 0; j < l; j++)
2762 tmp[j] = itf->elt_id[order[j] + start_id];
2763
2764 memcpy(itf->elt_id + start_id, tmp, l*sizeof(cs_lnum_t));
2765
2766 /* Update match_id */
2767
2768 for (j = 0; j < l; j++)
2769 tmp[j] = itf->match_id[order[j] + start_id];
2770
2771 memcpy(itf->match_id + start_id, tmp, l*sizeof(cs_lnum_t));
2772
2773 }
2774
2775 BFT_FREE(tmp);
2776 BFT_FREE(order);
2777
2778 }
2779 }
2780
2781 /*----------------------------------------------------------------------------
2782 * Order interfaces by increasing element id.
2783 *
2784 * parameters:
2785 * ifs <-> pointer to interface set
2786 *----------------------------------------------------------------------------*/
2787
2788 static void
_order_by_elt_id(cs_interface_set_t * ifs)2789 _order_by_elt_id(cs_interface_set_t *ifs)
2790 {
2791 int i;
2792
2793 for (i = 0; i < ifs->size; i++) {
2794
2795 int section_id;
2796
2797 int tr_index_size = 2;
2798 cs_lnum_t _tr_index[2] = {0, 0};
2799 cs_lnum_t *order = NULL, *buffer = NULL;
2800 const cs_lnum_t *tr_index = _tr_index;
2801
2802 cs_interface_t *itf = ifs->interfaces[i];
2803
2804 if (itf == NULL)
2805 return;
2806
2807 /* When this function is called, a distant-rank interface should have
2808 a match_id array, but not a send_order array */
2809
2810 assert(itf->send_order == NULL);
2811
2812 _tr_index[1] = itf->size;
2813
2814 if (itf->tr_index_size > 0) {
2815 tr_index_size = itf->tr_index_size;
2816 tr_index = itf->tr_index;
2817 }
2818
2819 BFT_MALLOC(order, tr_index[tr_index_size - 1], cs_lnum_t);
2820 BFT_MALLOC(buffer, tr_index[tr_index_size - 1]*2, cs_lnum_t);
2821
2822 for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
2823
2824 cs_lnum_t start_id = tr_index[section_id];
2825 cs_lnum_t end_id = tr_index[section_id + 1];
2826
2827 /* Now compute distant ordering to build send_order */
2828
2829 cs_order_lnum_allocated(NULL,
2830 itf->elt_id + start_id,
2831 order + start_id,
2832 end_id - start_id);
2833
2834 for (cs_lnum_t j = start_id; j < end_id; j++) {
2835 buffer[j*2] = itf->elt_id[j];
2836 buffer[j*2+1] = itf->match_id[j];
2837 }
2838
2839 for (cs_lnum_t j = start_id; j < end_id; j++) {
2840 cs_lnum_t k = order[j] + start_id;
2841 itf->elt_id[j] = buffer[k*2];
2842 itf->match_id[j] = buffer[k*2+1];
2843 }
2844
2845 }
2846
2847 BFT_FREE(buffer);
2848 BFT_FREE(order);
2849
2850 }
2851 }
2852
2853 /*----------------------------------------------------------------------------
2854 * Replace array of distant element ids with ordering of list of ids to send,
2855 * so that sends will match receives.
2856 *
2857 * parameters:
2858 * ifs <-> pointer to interface set
2859 *----------------------------------------------------------------------------*/
2860
2861 static void
_match_id_to_send_order(cs_interface_set_t * ifs)2862 _match_id_to_send_order(cs_interface_set_t *ifs)
2863 {
2864 int i;
2865
2866 for (i = 0; i < ifs->size; i++) {
2867
2868 int section_id;
2869 cs_lnum_t j, s_id, e_id;
2870
2871 int tr_index_size = 2;
2872 cs_lnum_t _tr_index[2] = {0, 0};
2873 cs_lnum_t *order = NULL;
2874 const cs_lnum_t *tr_index = _tr_index;
2875
2876 cs_interface_t *itf = ifs->interfaces[i];
2877
2878 if (itf == NULL)
2879 return;
2880
2881 /* When this function is called, a distant-rank interface should have
2882 a match_id array, but not a send_order array */
2883
2884 assert(itf->send_order == NULL);
2885
2886 _tr_index[1] = itf->size;
2887
2888 if (itf->tr_index_size > 0) {
2889 tr_index_size = itf->tr_index_size;
2890 tr_index = itf->tr_index;
2891 }
2892
2893 BFT_MALLOC(order, tr_index[tr_index_size - 1], cs_lnum_t);
2894
2895 for (section_id = 0; section_id < tr_index_size - 1; section_id++) {
2896
2897 cs_lnum_t start_id = tr_index[section_id];
2898 cs_lnum_t l = tr_index[section_id + 1] - start_id;
2899
2900 /* Now compute distant ordering to build send_order */
2901
2902 cs_order_lnum_allocated(NULL,
2903 itf->match_id + start_id,
2904 order + start_id,
2905 l);
2906
2907 }
2908
2909 /* Swap match_id and send_order arrays */
2910
2911 itf->send_order = itf->match_id;
2912 itf->match_id = NULL;
2913
2914 /* Parallel-only elements */
2915
2916 s_id = tr_index[0]; e_id = tr_index[1];
2917 for (j = s_id; j < e_id; j++)
2918 itf->send_order[j] = order[j] + s_id;
2919
2920 /* Periodic elements */
2921
2922 if (itf->tr_index_size > 0) {
2923
2924 int tr_id;
2925 cs_lnum_t k = tr_index[1];
2926 const int n_tr = tr_index_size - 2;
2927
2928 for (tr_id = 0; tr_id < n_tr; tr_id++) {
2929 int r_tr_id = fvm_periodicity_get_reverse_id(ifs->periodicity,
2930 tr_id);
2931 s_id = tr_index[r_tr_id+1];
2932 e_id = tr_index[r_tr_id+2];
2933 for (j = s_id; j < e_id; j++)
2934 itf->send_order[k++] = order[j] + s_id;
2935 }
2936
2937 assert(k == itf->size);
2938 }
2939
2940 BFT_FREE(order);
2941
2942 }
2943 }
2944
2945 /*----------------------------------------------------------------------------
2946 * Prepare renumbering of elements referenced by an interface set.
2947 *
2948 * This requires replacing the send ordering of interfaces from a set
2949 * with the matching (distant or periodic) element id, to which renumbering
2950 * is applied. The send ordering will be rebuilt later.
2951 *
2952 * For any given element i, a negative old_to_new[i] value means that that
2953 * element does not appear anymore in the new numbering, but the filtering
2954 * is not applied at this stage.
2955 *
2956 * parameters:
2957 * ifs <-> pointer to interface set structure
2958 * old_to_new <-- renumbering array (0 to n-1 numbering)
2959 *----------------------------------------------------------------------------*/
2960
2961 static void
_set_renumber_update_ids(cs_interface_set_t * ifs,const cs_lnum_t old_to_new[])2962 _set_renumber_update_ids(cs_interface_set_t *ifs,
2963 const cs_lnum_t old_to_new[])
2964 {
2965 int i;
2966 cs_lnum_t j;
2967 int local_rank = 0;
2968 cs_lnum_t *send_buf = NULL;
2969
2970 #if defined(HAVE_MPI)
2971
2972 int n_ranks = 1;
2973 int request_count = 0;
2974 MPI_Request *request = NULL;
2975 MPI_Status *status = NULL;
2976
2977 if (ifs->comm != MPI_COMM_NULL) {
2978 MPI_Comm_rank(ifs->comm, &local_rank);
2979 MPI_Comm_size(ifs->comm, &n_ranks);
2980 }
2981
2982 #endif
2983
2984 assert(ifs != NULL);
2985
2986 #if defined(HAVE_MPI)
2987
2988 if (n_ranks > 1)
2989 BFT_MALLOC(send_buf, cs_interface_set_n_elts(ifs), cs_lnum_t);
2990
2991 #endif /* HAVE_MPI */
2992
2993 /* Prepare send buffer first (for same rank, send_order is swapped
2994 with match_id directly) */
2995
2996 for (i = 0, j = 0; i < ifs->size; i++) {
2997
2998 cs_lnum_t k;
2999 cs_interface_t *itf = ifs->interfaces[i];
3000
3001 /* When this function is called, a distant-rank interface should have
3002 a send_order array, but not a match_id array */
3003
3004 assert(itf->match_id == NULL);
3005
3006 for (k = 0; k < itf->size; k++)
3007 itf->elt_id[k] = old_to_new[itf->elt_id[k]];
3008
3009 itf->match_id = itf->send_order; /* Pre swap of send_order with match_id */
3010
3011 if (itf->rank != local_rank)
3012 for (k = 0; k < itf->size; k++) {
3013 send_buf[j + k] = itf->elt_id[itf->send_order[k]];
3014 }
3015 else {
3016 for (k = 0; k < itf->size; k++)
3017 itf->match_id[k] = itf->elt_id[itf->send_order[k]];
3018 }
3019
3020 itf->send_order = NULL; /* Finish swap of send_order with match_id */
3021
3022 j += itf->size;
3023
3024 }
3025
3026 #if defined(HAVE_MPI)
3027
3028 /* Now exchange data using MPI */
3029
3030 if (n_ranks > 1) {
3031
3032 BFT_MALLOC(request, ifs->size*2, MPI_Request);
3033 BFT_MALLOC(status, ifs->size*2, MPI_Status);
3034
3035 for (i = 0, j = 0; i < ifs->size; i++) {
3036 cs_interface_t *itf = ifs->interfaces[i];
3037 if (itf->rank != local_rank)
3038 MPI_Irecv(itf->match_id,
3039 itf->size,
3040 CS_MPI_LNUM,
3041 itf->rank,
3042 itf->rank,
3043 ifs->comm,
3044 &(request[request_count++]));
3045 j += itf->size;
3046 }
3047
3048 for (i = 0, j = 0; i < ifs->size; i++) {
3049 cs_interface_t *itf = ifs->interfaces[i];
3050 if (itf->rank != local_rank)
3051 MPI_Isend(send_buf + j,
3052 itf->size,
3053 CS_MPI_LNUM,
3054 itf->rank,
3055 local_rank,
3056 ifs->comm,
3057 &(request[request_count++]));
3058 j += itf->size;
3059 }
3060
3061 MPI_Waitall(request_count, request, status);
3062
3063 BFT_FREE(request);
3064 BFT_FREE(status);
3065 BFT_FREE(send_buf);
3066
3067 }
3068
3069 #endif /* defined(HAVE_MPI) */
3070 }
3071
3072 /*----------------------------------------------------------------------------
3073 * Copy array from distant or matching interface elements to local elements,
3074 * for strided and non-interleaved elements.
3075 *
3076 * The destination arrays defines values for all elements in the
3077 * interface set (i.e. all elements listed by cs_interface_get_elt_ids()
3078 * when looping over interfaces of a set, while the source array
3079 * defines values for all elements of the type associated with the interfaces.
3080 *
3081 * parameters:
3082 * ifs <-> pointer to interface set structure
3083 * datatype <-- type of data considered
3084 * n_elts <-- number of elements associated with interface
3085 * stride <-- number of values per entity (interlaced)
3086 * src <-- source array (size: cs_interface_set_n_elts(ifs)*stride
3087 * or parent array size * stride)
3088 * dest <-- destination array
3089 * (size: cs_interface_set_n_elts(ifs)*stride)
3090 *----------------------------------------------------------------------------*/
3091
3092 static void
_interface_set_copy_array_ni(const cs_interface_set_t * ifs,cs_datatype_t datatype,cs_lnum_t n_elts,int stride,const void * src,void * dest)3093 _interface_set_copy_array_ni(const cs_interface_set_t *ifs,
3094 cs_datatype_t datatype,
3095 cs_lnum_t n_elts,
3096 int stride,
3097 const void *src,
3098 void *dest)
3099 {
3100 int i;
3101 cs_lnum_t j;
3102 int local_rank = 0;
3103 int type_size = cs_datatype_size[datatype];
3104 int stride_size = cs_datatype_size[datatype]*stride;
3105 cs_lnum_t shift_size = cs_datatype_size[datatype]*n_elts;
3106 unsigned char *send_buf = NULL;
3107 unsigned char *_dest = dest;
3108 const unsigned char *_src = src;
3109
3110 #if defined(HAVE_MPI)
3111
3112 int n_ranks = 1;
3113 int request_count = 0;
3114 MPI_Datatype mpi_type = cs_datatype_to_mpi[datatype];
3115 MPI_Request *request = NULL;
3116 MPI_Status *status = NULL;
3117
3118 if (ifs->comm != MPI_COMM_NULL) {
3119 MPI_Comm_rank(ifs->comm, &local_rank);
3120 MPI_Comm_size(ifs->comm, &n_ranks);
3121 }
3122
3123 #endif
3124
3125 assert(ifs != NULL);
3126
3127 BFT_MALLOC(send_buf,
3128 cs_interface_set_n_elts(ifs)*stride_size,
3129 unsigned char);
3130
3131 /* Prepare send buffer first (so that src is not used
3132 anymore after this, and may overlap with dest if desired);
3133 for same-rank interface, assign values directly to dest instead */
3134
3135 for (i = 0, j = 0; i < ifs->size; i++) {
3136
3137 cs_lnum_t k, l, m;
3138 cs_interface_t *itf = ifs->interfaces[i];
3139 unsigned char *p = send_buf;
3140
3141 p += j*stride_size;
3142
3143 for (k = 0; k < itf->size; k++) {
3144 cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
3145 for (m = 0; m < stride; m++) {
3146 for (l = 0; l < type_size; l++)
3147 p[(k*stride + m) * type_size + l]
3148 = _src[send_id*type_size + m*shift_size + l];
3149 }
3150 }
3151
3152 j += itf->size;
3153
3154 }
3155
3156 /* Now exchange data */
3157
3158 #if defined(HAVE_MPI)
3159
3160 if (n_ranks > 1) {
3161 BFT_MALLOC(request, ifs->size*2, MPI_Request);
3162 BFT_MALLOC(status, ifs->size*2, MPI_Status);
3163 }
3164
3165 #endif
3166
3167 for (i = 0, j = 0; i < ifs->size; i++) {
3168
3169 cs_interface_t *itf = ifs->interfaces[i];
3170
3171 if (itf->rank == local_rank)
3172 memcpy(_dest + j*stride_size,
3173 send_buf + j*stride_size,
3174 itf->size*stride_size);
3175
3176 #if defined(HAVE_MPI)
3177
3178 else /* if (itf->rank != local_rank) */
3179 MPI_Irecv(_dest + j*stride_size,
3180 itf->size*stride,
3181 mpi_type,
3182 itf->rank,
3183 itf->rank,
3184 ifs->comm,
3185 &(request[request_count++]));
3186
3187 #endif
3188
3189 j += itf->size;
3190
3191 }
3192
3193 #if defined(HAVE_MPI)
3194
3195 if (n_ranks > 1) {
3196
3197 for (i = 0, j = 0; i < ifs->size; i++) {
3198 cs_interface_t *itf = ifs->interfaces[i];
3199 if (itf->rank != local_rank)
3200 MPI_Isend(send_buf + j*stride_size,
3201 itf->size*stride,
3202 mpi_type,
3203 itf->rank,
3204 local_rank,
3205 ifs->comm,
3206 &(request[request_count++]));
3207 j += itf->size;
3208 }
3209
3210 MPI_Waitall(request_count, request, status);
3211
3212 BFT_FREE(request);
3213 BFT_FREE(status);
3214
3215 }
3216
3217 #endif /* defined(HAVE_MPI) */
3218
3219 BFT_FREE(send_buf);
3220 }
3221
3222 /*----------------------------------------------------------------------------*/
3223 /*!
3224 * \brief Apply strided subdivision of elements to id array;
3225 *
3226 * \param[in] size original array size
3227 * \param[in] stride number of sub-elements per element
3228 * \param[in] array_o original array
3229 *
3230 * \return subdivided array
3231 */
3232 /*----------------------------------------------------------------------------*/
3233
3234 static cs_lnum_t *
_copy_sub_strided(cs_lnum_t size_old,cs_lnum_t stride,cs_lnum_t * array_o)3235 _copy_sub_strided(cs_lnum_t size_old,
3236 cs_lnum_t stride,
3237 cs_lnum_t *array_o)
3238 {
3239 cs_lnum_t size_new = size_old*stride;
3240
3241 cs_lnum_t *array_n = NULL;
3242
3243 if (array_o != NULL) {
3244 BFT_MALLOC(array_n, size_new, cs_lnum_t);
3245 for (cs_lnum_t i = 0; i < size_new; i++)
3246 array_n[i] = array_o[i/stride]*stride + i%stride;
3247 }
3248
3249 return array_n;
3250 }
3251
3252 /*----------------------------------------------------------------------------*/
3253 /*!
3254 * \brief Apply bloc subdivision of elements to interface;
3255 *
3256 * The match ids of interfaces should be available for this operation.
3257 *
3258 * \param[in] o reference interface
3259 * \param[in] l_block_size local block size
3260 * \param[in] d_block_size distant block size
3261 * \param[in] n_blocks number of blocks in new interface
3262 *
3263 * \return blocked interface
3264 */
3265 /*----------------------------------------------------------------------------*/
3266
3267 static cs_interface_t *
_copy_sub_blocked(cs_interface_t * o,cs_lnum_t l_block_size,cs_lnum_t d_block_size,cs_lnum_t n_blocks)3268 _copy_sub_blocked(cs_interface_t *o,
3269 cs_lnum_t l_block_size,
3270 cs_lnum_t d_block_size,
3271 cs_lnum_t n_blocks)
3272 {
3273 cs_interface_t *n = _cs_interface_create();
3274
3275 n->rank = o->rank;
3276 n->size = o->size * n_blocks;
3277
3278 n->tr_index_size = o->tr_index_size;
3279 if (o->tr_index != NULL) {
3280 BFT_MALLOC(n->tr_index, n->tr_index_size, cs_lnum_t);
3281 for (int j = 0; j < n->tr_index_size; j++)
3282 n->tr_index[j] = o->tr_index[j] * n_blocks;
3283 }
3284
3285 const int _tr_index[2] = {0, o->size};
3286 const int *tr_index = (o->tr_index != NULL) ? o->tr_index : _tr_index;
3287 const int n_tr = (o->tr_index != NULL) ? o->tr_index_size - 1: 1;
3288
3289 cs_lnum_t size_new = o->size * n_blocks;
3290
3291 if (o->elt_id != NULL) {
3292
3293 BFT_MALLOC(n->elt_id, size_new, cs_lnum_t);
3294
3295 cs_lnum_t j = 0;
3296 for (int tr_id = 0; tr_id < n_tr; tr_id++) {
3297 cs_lnum_t s_id = tr_index[tr_id];
3298 cs_lnum_t e_id = tr_index[tr_id+1];
3299 for (cs_lnum_t b_id = 0; b_id < n_blocks; b_id++) {
3300 for (cs_lnum_t i = s_id; i < e_id; i++) {
3301 n->elt_id[j++] = o->elt_id[i] + l_block_size*b_id;
3302 }
3303 }
3304 }
3305
3306
3307 BFT_MALLOC(n->match_id, size_new, cs_lnum_t);
3308
3309 j = 0;
3310 for (int tr_id = 0; tr_id < n_tr; tr_id++) {
3311 cs_lnum_t s_id = tr_index[tr_id];
3312 cs_lnum_t e_id = tr_index[tr_id+1];
3313 for (cs_lnum_t b_id = 0; b_id < n_blocks; b_id++) {
3314 for (cs_lnum_t i = s_id; i < e_id; i++) {
3315 n->match_id[j++] = o->match_id[i] + d_block_size*b_id;
3316 }
3317 }
3318 }
3319 }
3320
3321 return n;
3322 }
3323
3324 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
3325
3326 /*=============================================================================
3327 * Public function definitions
3328 *============================================================================*/
3329
3330 /*----------------------------------------------------------------------------*/
3331 /*!
3332 * \brief Return process rank associated with an interface's distant elements.
3333 *
3334 * \param[in] itf pointer to interface structure
3335 *
3336 * \return process rank associated with the interface's distant elements
3337 */
3338 /*----------------------------------------------------------------------------*/
3339
3340 int
cs_interface_rank(const cs_interface_t * itf)3341 cs_interface_rank(const cs_interface_t *itf)
3342 {
3343 int retval = -1;
3344
3345 if (itf != NULL)
3346 retval = itf->rank;
3347
3348 return retval;
3349 }
3350
3351 /*----------------------------------------------------------------------------*/
3352 /*!
3353 * \brief Return number of local and distant elements defining an interface.
3354 *
3355 * \param[in] itf pointer to interface structure
3356 *
3357 * \return number of local and distant elements defining the interface
3358 */
3359 /*----------------------------------------------------------------------------*/
3360
3361 cs_lnum_t
cs_interface_size(const cs_interface_t * itf)3362 cs_interface_size(const cs_interface_t *itf)
3363 {
3364 cs_lnum_t retval = 0;
3365
3366 if (itf != NULL)
3367 retval = itf->size;
3368
3369 return retval;
3370 }
3371
3372 /*----------------------------------------------------------------------------*/
3373 /*!
3374 * \brief Return pointer to array of local element ids defining an interface.
3375 *
3376 * The size of the array may be obtained by cs_interface_size().
3377 * The array is owned by the interface structure, and is not copied
3378 * (hence the constant qualifier for the return value).
3379 *
3380 * \param[in] itf pointer to interface structure
3381 *
3382 * \return pointer to array of local element ids (0 to n-1) defining
3383 * the interface
3384 */
3385 /*----------------------------------------------------------------------------*/
3386
3387 const cs_lnum_t *
cs_interface_get_elt_ids(const cs_interface_t * itf)3388 cs_interface_get_elt_ids(const cs_interface_t *itf)
3389 {
3390 const cs_lnum_t *retval = NULL;
3391
3392 if (itf != NULL)
3393 retval = itf->elt_id;
3394
3395 return retval;
3396 }
3397
3398 /*----------------------------------------------------------------------------*/
3399 /*!
3400 * \brief Return pointer to array of matching element ids defining an interface.
3401 *
3402 * This array is only available if cs_interface_set_add_match_ids() has
3403 * been called for the containing interface set.
3404 *
3405 * The size of the array may be obtained by cs_interface_size().
3406 * The array is owned by the interface structure, and is not copied
3407 * (hence the constant qualifier for the return value).
3408 *
3409 * \param[in] itf pointer to interface structure
3410 *
3411 * \return pointer to array of local element ids (0 to n-1) defining
3412 * the interface
3413 */
3414 /*----------------------------------------------------------------------------*/
3415
3416 const cs_lnum_t *
cs_interface_get_match_ids(const cs_interface_t * itf)3417 cs_interface_get_match_ids(const cs_interface_t *itf)
3418 {
3419 const cs_lnum_t *retval = NULL;
3420
3421 if (itf != NULL)
3422 retval = itf->match_id;
3423
3424 return retval;
3425 }
3426
3427 /*----------------------------------------------------------------------------*/
3428 /*!
3429 * \brief Return size of index of sub-sections for different transformations.
3430 *
3431 * The index is applicable to both local_num and distant_num arrays,
3432 * with purely parallel equivalences appearing at position 0, and
3433 * equivalences through periodic transform i at position i+1;
3434 * Its size should thus be equal to 1 + number of periodic transforms + 1,
3435 * In absence of periodicity, it may be 0, as the index is not needed.
3436 *
3437 * \param[in] itf pointer to interface structure
3438 *
3439 * \return transform index size for the interface
3440 */
3441 /*----------------------------------------------------------------------------*/
3442
3443 cs_lnum_t
cs_interface_get_tr_index_size(const cs_interface_t * itf)3444 cs_interface_get_tr_index_size(const cs_interface_t *itf)
3445 {
3446 cs_lnum_t retval = 0;
3447
3448 if (itf != NULL)
3449 retval = itf->tr_index_size;
3450
3451 return retval;
3452 }
3453
3454 /*----------------------------------------------------------------------------*/
3455 /*!
3456 * \brief Return pointer to index of sub-sections for different transformations.
3457 *
3458 * The index is applicable to both local_num and distant_num arrays,
3459 * with purely parallel equivalences appearing at position 0, and
3460 * equivalences through periodic transform i at position i+1;
3461 * In absence of periodicity, it may be NULL, as it is not needed.
3462 *
3463 * \param[in] itf pointer to interface structure
3464 *
3465 * \return pointer to transform index for the interface
3466 */
3467 /*----------------------------------------------------------------------------*/
3468
3469 const cs_lnum_t *
cs_interface_get_tr_index(const cs_interface_t * itf)3470 cs_interface_get_tr_index(const cs_interface_t *itf)
3471 {
3472 const cs_lnum_t *tr_index = 0;
3473
3474 if (itf != NULL)
3475 tr_index = itf->tr_index;
3476
3477 return tr_index;
3478 }
3479
3480 /*----------------------------------------------------------------------------*/
3481 /*!
3482 * \brief Creation of a list of interfaces between elements of a same type.
3483 *
3484 * These interfaces may be used to identify equivalent vertices or faces using
3485 * domain splitting, as well as periodic elements (on the same or on
3486 * distant ranks).
3487 *
3488 * Note that periodicity information will be completed and made consistent
3489 * based on the input, so that if a periodic couple is defined on a given rank,
3490 * the reverse couple wil be defined, whether it is also defined on the same
3491 * or a different rank.
3492 *
3493 * In addition, multiple periodicity interfaces will be built automatically
3494 * if the periodicity structure provides for composed periodicities, so they
3495 * need not be defined prior to this function being called.
3496 *
3497 * \param[in] n_elts number of local elements considered
3498 * (size of parent_element_id[])
3499 * \param[in] parent_element_id pointer to list of selected elements
3500 * local ids (0 to n-1), or NULL if all
3501 * first n_elts elements are used
3502 * \param[in] global_number pointer to list of global (i.e. domain
3503 * splitting independent) element numbers
3504 * \param[in] periodicity periodicity information (NULL if none)
3505 * \param[in] n_periodic_lists number of periodic lists (may be local)
3506 * \param[in] periodicity_num periodicity number (1 to n) associated
3507 * with each periodic list (primary
3508 * periodicities only)
3509 * \param[in] n_periodic_couples number of periodic couples associated
3510 * with each periodic list
3511 * \param[in] periodic_couples array indicating periodic couples
3512 * (interlaced, using global numberings)
3513 * for each list
3514 *
3515 * \return pointer to list of interfaces (possibly NULL in serial mode)
3516 */
3517 /*----------------------------------------------------------------------------*/
3518
3519 cs_interface_set_t *
cs_interface_set_create(cs_lnum_t n_elts,const cs_lnum_t parent_element_id[],const cs_gnum_t global_number[],const fvm_periodicity_t * periodicity,int n_periodic_lists,const int periodicity_num[],const cs_lnum_t n_periodic_couples[],const cs_gnum_t * const periodic_couples[])3520 cs_interface_set_create(cs_lnum_t n_elts,
3521 const cs_lnum_t parent_element_id[],
3522 const cs_gnum_t global_number[],
3523 const fvm_periodicity_t *periodicity,
3524 int n_periodic_lists,
3525 const int periodicity_num[],
3526 const cs_lnum_t n_periodic_couples[],
3527 const cs_gnum_t *const periodic_couples[])
3528 {
3529 cs_interface_set_t *ifs;
3530
3531 /* Initial checks */
3532
3533 if ( (cs_glob_n_ranks < 2)
3534 && (periodicity == NULL || n_periodic_lists == 0))
3535 return NULL;
3536
3537 /* Create structure */
3538
3539 BFT_MALLOC(ifs, 1, cs_interface_set_t);
3540 ifs->size = 0;
3541 ifs->interfaces = NULL;
3542 ifs->periodicity = periodicity;
3543 ifs->match_id_rc = 0;
3544
3545 const cs_gnum_t *global_num = global_number;
3546
3547 cs_gnum_t *_global_num = NULL;
3548
3549 if (global_number != NULL && parent_element_id != NULL) {
3550
3551 BFT_MALLOC(_global_num, n_elts, cs_gnum_t);
3552
3553 for (size_t i = 0 ; i < (size_t)n_elts ; i++)
3554 _global_num[i] = global_number[parent_element_id[i]];
3555
3556 global_num = _global_num;
3557
3558 }
3559
3560 /* Build interfaces */
3561
3562 #if defined(HAVE_MPI)
3563
3564 ifs->comm = cs_glob_mpi_comm;
3565
3566 if (cs_glob_n_ranks > 1) {
3567
3568 /* Build interfaces */
3569
3570 if (periodicity == NULL)
3571 _add_global_equiv(ifs,
3572 n_elts,
3573 global_num,
3574 cs_glob_mpi_comm);
3575
3576 else
3577 _add_global_equiv_periodic(ifs,
3578 n_elts,
3579 global_num,
3580 periodicity,
3581 n_periodic_lists,
3582 periodicity_num,
3583 n_periodic_couples,
3584 periodic_couples,
3585 cs_glob_mpi_comm);
3586
3587 }
3588
3589 #endif /* defined(HAVE_MPI) */
3590
3591 if (cs_glob_n_ranks == 1 && (periodicity != NULL && n_periodic_lists > 0)) {
3592
3593 _add_global_equiv_periodic_sp(ifs,
3594 n_elts,
3595 global_num,
3596 periodicity,
3597 n_periodic_lists,
3598 periodicity_num,
3599 n_periodic_couples,
3600 periodic_couples);
3601
3602
3603 }
3604
3605 BFT_FREE(_global_num);
3606
3607 /* Finish preparation of interface set and return */
3608
3609 _order_by_elt_id(ifs);
3610 _match_id_to_send_order(ifs);
3611
3612 return ifs;
3613 }
3614
3615 /*----------------------------------------------------------------------------*/
3616 /*!
3617 * \brief Destruction of an interface set.
3618 *
3619 * \param[in, out] ifs pointer to pointer to structure to destroy
3620 */
3621 /*----------------------------------------------------------------------------*/
3622
3623 void
cs_interface_set_destroy(cs_interface_set_t ** ifs)3624 cs_interface_set_destroy(cs_interface_set_t **ifs)
3625 {
3626 int i;
3627 cs_interface_set_t *itfs = *ifs;
3628
3629 if (itfs != NULL) {
3630 for (i = 0; i < itfs->size; i++) {
3631 _cs_interface_destroy(&(itfs->interfaces[i]));
3632 }
3633 BFT_FREE(itfs->interfaces);
3634 BFT_FREE(itfs);
3635 *ifs = itfs;
3636 }
3637 }
3638
3639 /*----------------------------------------------------------------------------*/
3640 /*!
3641 * \brief Duplicate an interface set, applying an optional constant stride.
3642 *
3643 * \param[in, out] ifs pointer to interface set structure
3644 * \param[in] stride if > 1, each element subdivided in stride elements
3645 *
3646 * \return pointer to new interface set
3647 */
3648 /*----------------------------------------------------------------------------*/
3649
3650 cs_interface_set_t *
cs_interface_set_dup(const cs_interface_set_t * ifs,cs_lnum_t stride)3651 cs_interface_set_dup(const cs_interface_set_t *ifs,
3652 cs_lnum_t stride)
3653 {
3654 cs_interface_set_t *ifs_new = NULL;
3655
3656 if (ifs == NULL)
3657 return ifs_new;
3658
3659 if (stride < 1)
3660 stride = 1;
3661
3662 BFT_MALLOC(ifs_new, 1, cs_interface_set_t);
3663
3664 ifs_new->size = ifs->size;
3665 ifs_new->periodicity = ifs->periodicity;
3666 ifs_new->match_id_rc = 0;
3667
3668 #if defined(HAVE_MPI)
3669 ifs_new->comm = ifs->comm;
3670 #endif
3671
3672 BFT_MALLOC(ifs_new->interfaces,
3673 ifs->size,
3674 cs_interface_t *);
3675
3676 /* Loop on interfaces */
3677
3678 for (int i = 0; i < ifs->size; i++) {
3679
3680 cs_interface_t *o = ifs->interfaces[i];
3681 cs_interface_t *n = _cs_interface_create();
3682
3683 n->rank = o->rank;
3684 n->size = o->size * stride;
3685
3686 n->tr_index_size = o->tr_index_size;
3687 if (o->tr_index != NULL) {
3688 BFT_MALLOC(n->tr_index, n->tr_index_size, cs_lnum_t);
3689 for (int j = 0; j < n->tr_index_size; j++)
3690 n->tr_index[j] = o->tr_index[j] * stride;
3691 }
3692
3693 n->elt_id = _copy_sub_strided(o->size, stride, o->elt_id);
3694 n->send_order = _copy_sub_strided(o->size, stride, o->send_order);
3695
3696 n->match_id = NULL;
3697
3698 ifs_new->interfaces[i] = n;
3699 }
3700
3701 return ifs_new;
3702 }
3703
3704 /*----------------------------------------------------------------------------*/
3705 /*!
3706 * \brief Duplicate an interface set for coupled variable blocks.
3707 *
3708 * \param[in, out] ifs pointer to interface set structure
3709 * \param[in] block_size local block size (number of elements)
3710 * \param[in] n_blocks number of associated blocks
3711 *
3712 * \return pointer to new interface set
3713 */
3714 /*----------------------------------------------------------------------------*/
3715
3716 cs_interface_set_t *
cs_interface_set_dup_blocks(cs_interface_set_t * ifs,cs_lnum_t block_size,cs_lnum_t n_blocks)3717 cs_interface_set_dup_blocks(cs_interface_set_t *ifs,
3718 cs_lnum_t block_size,
3719 cs_lnum_t n_blocks)
3720 {
3721 cs_interface_set_t *ifs_new = NULL;
3722
3723 if (ifs == NULL)
3724 return ifs_new;
3725
3726 if (n_blocks < 1)
3727 n_blocks = 1;
3728
3729 BFT_MALLOC(ifs_new, 1, cs_interface_set_t);
3730 ifs->match_id_rc = 0;
3731
3732 ifs_new->size = ifs->size;
3733 ifs_new->periodicity = ifs->periodicity;
3734
3735 cs_lnum_t *d_block_size;
3736 BFT_MALLOC(d_block_size, ifs->size, cs_lnum_t);
3737
3738 int n_ranks = 1;
3739
3740 #if defined(HAVE_MPI)
3741
3742 ifs_new->comm = ifs->comm;
3743
3744 /* Exchange block sizes */
3745
3746 int request_count = 0, local_rank = -1;
3747 MPI_Request *request = NULL;
3748 MPI_Status *status = NULL;
3749
3750 if (ifs->comm != MPI_COMM_NULL) {
3751 MPI_Comm_rank(ifs->comm, &local_rank);
3752 MPI_Comm_size(ifs->comm, &n_ranks);
3753 }
3754
3755 if (n_ranks > 1) {
3756
3757 cs_lnum_t send_buf[1] = {block_size};
3758
3759 BFT_MALLOC(request, ifs->size*2, MPI_Request);
3760 BFT_MALLOC(status, ifs->size*2, MPI_Status);
3761
3762 for (int i = 0; i < ifs->size; i++) {
3763 cs_interface_t *itf = ifs->interfaces[i];
3764 if (itf->rank != local_rank)
3765 MPI_Irecv(d_block_size + i,
3766 1,
3767 CS_MPI_LNUM,
3768 itf->rank,
3769 itf->rank,
3770 ifs->comm,
3771 &(request[request_count++]));
3772 else
3773 d_block_size[i] = block_size;
3774 }
3775
3776 for (int i = 0; i < ifs->size; i++) {
3777 cs_interface_t *itf = ifs->interfaces[i];
3778 if (itf->rank != local_rank)
3779 MPI_Isend(send_buf,
3780 1,
3781 CS_MPI_LNUM,
3782 itf->rank,
3783 local_rank,
3784 ifs->comm,
3785 &(request[request_count++]));
3786 }
3787
3788 MPI_Waitall(request_count, request, status);
3789
3790 BFT_FREE(request);
3791 BFT_FREE(status);
3792
3793 }
3794
3795 #endif /* defined(HAVE_MPI) */
3796
3797 if (n_ranks <= 1 && ifs->size > 0) {
3798 cs_interface_t *itf = ifs->interfaces[0];
3799
3800 assert(ifs->size <= 1);
3801 assert(itf->rank == 0);
3802
3803 d_block_size[0] = block_size;
3804 }
3805
3806 /* Ensure match ids are available on reference interface */
3807
3808 cs_interface_set_add_match_ids(ifs);
3809
3810 /* Build new interface
3811 ------------------- */
3812
3813 BFT_MALLOC(ifs_new->interfaces,
3814 ifs->size,
3815 cs_interface_t *);
3816
3817 /* Loop on interfaces */
3818
3819 for (int i = 0; i < ifs->size; i++) {
3820 cs_interface_t *o = ifs->interfaces[i];
3821 ifs_new->interfaces[i]
3822 = _copy_sub_blocked(o, block_size, d_block_size[i], n_blocks);
3823 }
3824
3825 /* Free memory */
3826
3827 cs_interface_set_free_match_ids(ifs);
3828
3829 BFT_FREE(d_block_size);
3830
3831 /* Finish preparation of interface set and return */
3832
3833 _match_id_to_send_order(ifs_new);
3834
3835 return ifs_new;
3836 }
3837
3838 /*----------------------------------------------------------------------------*/
3839 /*!
3840 * \brief Return number of interfaces associated with an interface set.
3841 *
3842 * \param[in] ifs pointer to interface set structure
3843 *
3844 * \return number of interfaces in set
3845 */
3846 /*----------------------------------------------------------------------------*/
3847
3848 int
cs_interface_set_size(const cs_interface_set_t * ifs)3849 cs_interface_set_size(const cs_interface_set_t *ifs)
3850 {
3851 int retval = 0;
3852
3853 if (ifs != NULL)
3854 retval = ifs->size;
3855
3856 return retval;
3857 }
3858
3859 /*----------------------------------------------------------------------------*/
3860 /*!
3861 * \brief Return total number of elements in interface set.
3862 *
3863 * This is equal to the sum of cs_interface_size() on the cs_interface_size()
3864 * interfaces of a set.
3865 *
3866 * \param[in] ifs pointer to interface set structure
3867 *
3868 * \return number of interfaces in set
3869 */
3870 /*----------------------------------------------------------------------------*/
3871
3872 cs_lnum_t
cs_interface_set_n_elts(const cs_interface_set_t * ifs)3873 cs_interface_set_n_elts(const cs_interface_set_t *ifs)
3874 {
3875 int i;
3876
3877 cs_lnum_t retval = 0;
3878
3879 for (i = 0; i < ifs->size; i++)
3880 retval += (ifs->interfaces[i])->size;
3881
3882 return retval;
3883 }
3884
3885 /*----------------------------------------------------------------------------*/
3886 /*!
3887 * \brief Return pointer to a given interface in an interface set.
3888 *
3889 * \param[in] ifs <-- pointer to interface set structure
3890 * \param[in] interface_id <-- index of interface in set (0 to n-1)
3891 *
3892 * \return pointer to interface structure
3893 */
3894 /*----------------------------------------------------------------------------*/
3895
3896 const cs_interface_t *
cs_interface_set_get(const cs_interface_set_t * ifs,int interface_id)3897 cs_interface_set_get(const cs_interface_set_t *ifs,
3898 int interface_id)
3899 {
3900 const cs_interface_t *retval = NULL;
3901
3902 if (ifs != NULL) {
3903 if (interface_id > -1 && interface_id < ifs->size)
3904 retval = ifs->interfaces[interface_id];
3905 }
3906
3907 return retval;
3908 }
3909
3910 /*----------------------------------------------------------------------------*/
3911 /*!
3912 * \brief Return pointer to the periocicity structure associated of an
3913 * interface set.
3914 *
3915 * \param[in] ifs pointer to interface set structure
3916 *
3917 * \return pointer to periodicity structure, or NULL
3918 */
3919 /*----------------------------------------------------------------------------*/
3920
3921 const fvm_periodicity_t *
cs_interface_set_periodicity(const cs_interface_set_t * ifs)3922 cs_interface_set_periodicity(const cs_interface_set_t *ifs)
3923 {
3924 const fvm_periodicity_t *retval = NULL;
3925
3926 if (ifs != NULL)
3927 retval = ifs->periodicity;
3928
3929 return retval;
3930 }
3931
3932 /*----------------------------------------------------------------------------*/
3933 /*!
3934 * \brief Apply renumbering of elements referenced by an interface set.
3935 *
3936 * For any given element i, a negative old_to_new[i] value means that that
3937 * element does not appear anymore in the new numbering.
3938 *
3939 * \param[in, out] ifs pointer to interface set structure
3940 * \param[in] old_to_new renumbering array (0 to n-1 numbering)
3941 */
3942 /*----------------------------------------------------------------------------*/
3943
3944 void
cs_interface_set_renumber(cs_interface_set_t * ifs,const cs_lnum_t old_to_new[])3945 cs_interface_set_renumber(cs_interface_set_t *ifs,
3946 const cs_lnum_t old_to_new[])
3947 {
3948 int i;
3949 int n_interfaces = 0;
3950
3951 assert(ifs != NULL);
3952 assert(old_to_new != NULL);
3953
3954 /* Compute new element and match ids */
3955
3956 _set_renumber_update_ids(ifs, old_to_new);
3957
3958 _order_elt_id(ifs);
3959
3960 /* Remove references to elements not appearing anymore */
3961
3962 for (i = 0; i < ifs->size; i++) {
3963
3964 cs_lnum_t j, k;
3965 cs_interface_t *itf = ifs->interfaces[i];
3966
3967 cs_lnum_t *loc_id = itf->elt_id;
3968 cs_lnum_t *dist_id = itf->match_id;
3969
3970 if (itf->tr_index_size == 0) {
3971 for (j = 0, k = 0; j < itf->size; j++) {
3972 if (loc_id[j] > -1 && dist_id[j] > -1) {
3973 loc_id[k] = loc_id[j];
3974 dist_id[k] = dist_id[j];
3975 k += 1;
3976 }
3977 }
3978 }
3979 else {
3980 int tr_id;
3981 cs_lnum_t end_id = itf->tr_index[0];
3982 k = 0;
3983 for (tr_id = 0; tr_id < itf->tr_index_size - 1; tr_id++) {
3984 cs_lnum_t start_id = end_id;
3985 end_id = itf->tr_index[tr_id + 1];
3986 for (j = start_id; j < end_id; j++) {
3987 if (loc_id[j] > -1 && dist_id[j] > -1) {
3988 loc_id[k] = loc_id[j];
3989 dist_id[k] = dist_id[j];
3990 k += 1;
3991 }
3992 }
3993 itf->tr_index[tr_id + 1] = k;
3994 }
3995 }
3996
3997 if (k < itf->size) {
3998 if (k > 0) {
3999 itf->size = k;
4000 BFT_REALLOC(itf->elt_id, k, cs_lnum_t);
4001 BFT_REALLOC(itf->match_id, k, cs_lnum_t);
4002 }
4003 else {
4004 BFT_FREE(itf->elt_id);
4005 BFT_FREE(itf->match_id);
4006 BFT_FREE(ifs->interfaces[i]);
4007 }
4008 }
4009 }
4010
4011 for (i = 0, n_interfaces = 0; i < ifs->size; i++) {
4012 if (ifs->interfaces[i] != NULL)
4013 ifs->interfaces[n_interfaces++] = ifs->interfaces[i];
4014 }
4015
4016 if (n_interfaces < ifs->size) {
4017 BFT_REALLOC(ifs->interfaces,
4018 n_interfaces,
4019 cs_interface_t *);
4020 ifs->size = n_interfaces;
4021 }
4022
4023 _match_id_to_send_order(ifs);
4024 }
4025
4026 /*----------------------------------------------------------------------------*/
4027 /*!
4028 * \brief Copy array from distant or matching interface elements to
4029 * local elements.
4030 *
4031 * Source and destination arrays define values for all elements in the
4032 * interface set (i.e. all elements listed by cs_interface_get_elt_ids()
4033 * when looping over interfaces of a set.
4034 *
4035 * \param[in] ifs pointer to interface set structure
4036 * \param[in] datatype type of data considered
4037 * \param[in] stride number of values per entity (interlaced)
4038 * \param[in] src_on_parent true if source array is defined on the elements
4039 * defined by ifs->elt_ids, false if source array
4040 * defined directly on cs_interface_set_n_elts(ifs)
4041 * \param[in] src source array (size:
4042 * cs_interface_set_n_elts(ifs)*stride
4043 * or parent array size * stride)
4044 * \param[out] dest destination array (size:
4045 * cs_interface_set_n_elts(ifs)*stride)
4046 */
4047 /*----------------------------------------------------------------------------*/
4048
4049 void
cs_interface_set_copy_array(const cs_interface_set_t * ifs,cs_datatype_t datatype,int stride,bool src_on_parent,const void * src,void * dest)4050 cs_interface_set_copy_array(const cs_interface_set_t *ifs,
4051 cs_datatype_t datatype,
4052 int stride,
4053 bool src_on_parent,
4054 const void *src,
4055 void *dest)
4056 {
4057 int i;
4058 cs_lnum_t j;
4059 int local_rank = 0;
4060 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4061 unsigned char *send_buf = NULL;
4062 unsigned char *_dest = dest;
4063 const unsigned char *_src = src;
4064
4065 #if defined(HAVE_MPI)
4066
4067 int n_ranks = 1;
4068 int request_count = 0;
4069 MPI_Datatype mpi_type = cs_datatype_to_mpi[datatype];
4070 MPI_Request *request = NULL;
4071 MPI_Status *status = NULL;
4072
4073 if (ifs->comm != MPI_COMM_NULL) {
4074 MPI_Comm_rank(ifs->comm, &local_rank);
4075 MPI_Comm_size(ifs->comm, &n_ranks);
4076 }
4077
4078 #endif
4079
4080 assert(ifs != NULL);
4081
4082 BFT_MALLOC(send_buf,
4083 cs_interface_set_n_elts(ifs)*stride_size,
4084 unsigned char);
4085
4086 /* Prepare send buffer first (so that src is not used
4087 anymore after this, and may overlap with dest if desired); */
4088
4089 for (i = 0, j = 0; i < ifs->size; i++) {
4090
4091 cs_lnum_t k, l;
4092 cs_interface_t *itf = ifs->interfaces[i];
4093 unsigned char *p = send_buf;
4094
4095 p += j*stride_size;
4096
4097 if (src_on_parent) {
4098 for (k = 0; k < itf->size; k++) {
4099 cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
4100 for (l = 0; l < stride_size; l++)
4101 p[k*stride_size + l] = _src[send_id*stride_size + l];
4102 }
4103 }
4104 else {
4105 for (k = 0; k < itf->size; k++) {
4106 cs_lnum_t send_id = itf->send_order[k] + j;
4107 for (l = 0; l < stride_size; l++)
4108 p[k*stride_size + l] = _src[send_id*stride_size + l];
4109 }
4110 }
4111
4112 j += itf->size;
4113
4114 }
4115
4116 /* Now exchange data */
4117
4118 #if defined(HAVE_MPI)
4119 if (n_ranks > 1) {
4120 BFT_MALLOC(request, ifs->size*2, MPI_Request);
4121 BFT_MALLOC(status, ifs->size*2, MPI_Status);
4122 }
4123 #endif
4124
4125 for (i = 0, j = 0; i < ifs->size; i++) {
4126
4127 cs_interface_t *itf = ifs->interfaces[i];
4128
4129 if (itf->rank == local_rank)
4130 memcpy(_dest + j*stride_size,
4131 send_buf + j*stride_size,
4132 itf->size*stride_size);
4133
4134 #if defined(HAVE_MPI)
4135
4136 else /* if (itf->rank != local_rank) */
4137 MPI_Irecv(_dest + j*stride_size,
4138 itf->size*stride,
4139 mpi_type,
4140 itf->rank,
4141 itf->rank,
4142 ifs->comm,
4143 &(request[request_count++]));
4144
4145 #endif
4146
4147 j += itf->size;
4148
4149 }
4150
4151 #if defined(HAVE_MPI)
4152
4153 if (n_ranks > 1) {
4154
4155 for (i = 0, j = 0; i < ifs->size; i++) {
4156 cs_interface_t *itf = ifs->interfaces[i];
4157 if (itf->rank != local_rank)
4158 MPI_Isend(send_buf + j*stride_size,
4159 itf->size*stride,
4160 mpi_type,
4161 itf->rank,
4162 local_rank,
4163 ifs->comm,
4164 &(request[request_count++]));
4165 j += itf->size;
4166 }
4167
4168 MPI_Waitall(request_count, request, status);
4169
4170 BFT_FREE(request);
4171 BFT_FREE(status);
4172
4173 }
4174
4175 #endif /* defined(HAVE_MPI) */
4176
4177 BFT_FREE(send_buf);
4178 }
4179
4180 /*----------------------------------------------------------------------------*/
4181 /*!
4182 * \brief Copy indexed array from distant or matching interface elements to
4183 * local elements.
4184 *
4185 * Source and destination arrays define values for all elements in the
4186 * interface set (i.e. all elements listed by cs_interface_get_elt_ids()
4187 * when looping over interfaces of a set.
4188 *
4189 * Note that when copying the same type of data to all matching elements,
4190 * the source and destination index may be the same, if src_on_parent is true.
4191 * To avoid requiring a separate destination index, the dest_index argument
4192 * may be set to NULL, in which case it is assumed that source and destination
4193 * are symmetric, and src_index is sufficient to determine sizes (whether
4194 * src_on_parent is true or not).
4195 *
4196 * In some use cases, for example when copying values only in one direction,
4197 * the copying is not symmetric, so both a source and destination buffer must
4198 * be provided.
4199 *
4200 * \param[in] ifs pointer to interface set structure
4201 * \param[in] datatype type of data considered
4202 * \param[in] src_on_parent true if source array is defined on the elements
4203 * defined by ifs->elt_ids, false if source array
4204 * defined directly on cs_interface_set_n_elts(ifs)
4205 * \param[in] src_index index for source array
4206 * \param[in] dest_index index for destination array, or NULL
4207 * \param[in] src source array (size:
4208 * src_index[cs_interface_set_n_elts(ifs)]
4209 * or parent array size)
4210 * \param[out] dest destination array (size:
4211 * src_index[cs_interface_set_n_elts(ifs)] or
4212 * dest_index[cs_interface_set_n_elts(ifs)])
4213 */
4214 /*----------------------------------------------------------------------------*/
4215
4216 void
cs_interface_set_copy_indexed(const cs_interface_set_t * ifs,cs_datatype_t datatype,bool src_on_parent,const cs_lnum_t src_index[],const cs_lnum_t dest_index[],const void * src,void * dest)4217 cs_interface_set_copy_indexed(const cs_interface_set_t *ifs,
4218 cs_datatype_t datatype,
4219 bool src_on_parent,
4220 const cs_lnum_t src_index[],
4221 const cs_lnum_t dest_index[],
4222 const void *src,
4223 void *dest)
4224 {
4225 int i;
4226 cs_lnum_t j;
4227 int local_rank = 0;
4228 int type_size = cs_datatype_size[datatype];
4229 cs_lnum_t send_size = 0, itf_index_size = 0;
4230 cs_lnum_t *itf_index = NULL, *itf_s_index = NULL, *itf_r_index = NULL;
4231 unsigned char *send_buf = NULL;
4232 unsigned char *_dest = dest;
4233 const unsigned char *_src = src;
4234
4235 #if defined(HAVE_MPI)
4236
4237 int n_ranks = 1;
4238 int request_count = 0;
4239 MPI_Datatype mpi_type = cs_datatype_to_mpi[datatype];
4240 MPI_Request *request = NULL;
4241 MPI_Status *status = NULL;
4242
4243 if (ifs->comm != MPI_COMM_NULL) {
4244 MPI_Comm_rank(ifs->comm, &local_rank);
4245 MPI_Comm_size(ifs->comm, &n_ranks);
4246 }
4247
4248 #endif
4249
4250 assert(ifs != NULL);
4251
4252 /* Count number of elements to send or receive */
4253
4254 itf_index_size = ifs->size + 1;
4255 if (dest_index != NULL)
4256 itf_index_size *= 2;
4257
4258 BFT_MALLOC(itf_index, 2*(ifs->size + 1), cs_lnum_t);
4259 itf_s_index = itf_index;
4260 itf_s_index[0] = 0;
4261
4262 if (src_on_parent) {
4263 for (i = 0, j = 0; i < ifs->size; i++) {
4264 cs_lnum_t k;
4265 cs_interface_t *itf = ifs->interfaces[i];
4266 for (k = 0; k < itf->size; k++) {
4267 cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
4268 send_size += src_index[send_id+1] - src_index[send_id];
4269 }
4270 itf_s_index[i+1] = send_size;
4271 }
4272 }
4273 else {
4274 for (i = 0, j = 0; i < ifs->size; i++) {
4275 cs_interface_t *itf = ifs->interfaces[i];
4276 j += itf->size;
4277 itf_s_index[i+1] = src_index[j];
4278 }
4279 send_size = itf_s_index[ifs->size];
4280 }
4281
4282 if (dest_index != NULL) {
4283 itf_r_index = itf_index + ifs->size + 1;
4284 itf_r_index[0] = 0;
4285 for (i = 0, j = 0; i < ifs->size; i++) {
4286 cs_interface_t *itf = ifs->interfaces[i];
4287 j += itf->size;
4288 itf_r_index[i+1] = dest_index[j];
4289 }
4290 }
4291 else
4292 itf_r_index = itf_s_index;
4293
4294 BFT_MALLOC(send_buf, send_size*type_size, unsigned char);
4295
4296 /* Prepare send buffer first (so that src is not used
4297 anymore after this, and may overlap with dest if desired); */
4298
4299 for (i = 0, j = 0; i < ifs->size; i++) {
4300
4301 cs_lnum_t k, l, m;
4302 cs_interface_t *itf = ifs->interfaces[i];
4303 unsigned char *p = send_buf + (itf_s_index[i]*type_size);
4304
4305 if (src_on_parent) {
4306 for (k = 0, l = 0; k < itf->size; k++) {
4307 cs_lnum_t send_id = itf->elt_id[itf->send_order[k]];
4308 cs_lnum_t start_id = src_index[send_id]*type_size;
4309 cs_lnum_t end_id = src_index[send_id+1]*type_size;
4310 for (m = start_id; m < end_id; m++)
4311 p[l++] = _src[m];
4312 }
4313 }
4314 else {
4315 for (k = 0, l = 0; k < itf->size; k++) {
4316 cs_lnum_t send_id = itf->send_order[k] + j;
4317 cs_lnum_t start_id = src_index[send_id]*type_size;
4318 cs_lnum_t end_id = src_index[send_id+1]*type_size;
4319 for (m = start_id; m < end_id; m++)
4320 p[l++] = _src[m];
4321 }
4322 j += itf->size;
4323 }
4324 }
4325
4326 /* Now exchange data */
4327
4328 #if defined(HAVE_MPI)
4329 if (n_ranks > 1) {
4330 BFT_MALLOC(request, ifs->size*2, MPI_Request);
4331 BFT_MALLOC(status, ifs->size*2, MPI_Status);
4332 }
4333 #endif
4334
4335 for (i = 0, j = 0; i < ifs->size; i++) {
4336
4337 cs_interface_t *itf = ifs->interfaces[i];
4338 cs_lnum_t r_buf_shift = itf_r_index[i]*type_size;
4339
4340 if (itf->rank == local_rank) {
4341 cs_lnum_t s_buf_shift = itf_s_index[i]*type_size;
4342 int msg_size = (itf_s_index[i+1]-itf_s_index[i])*type_size;
4343 memcpy(_dest + r_buf_shift, send_buf + s_buf_shift, msg_size);
4344 }
4345
4346 #if defined(HAVE_MPI)
4347
4348 else /* if (itf->rank != local_rank) */
4349 MPI_Irecv(_dest + r_buf_shift,
4350 (itf_r_index[i+1]-itf_r_index[i]),
4351 mpi_type,
4352 itf->rank,
4353 itf->rank,
4354 ifs->comm,
4355 &(request[request_count++]));
4356
4357 #endif
4358
4359 j += itf->size;
4360
4361 }
4362
4363 #if defined(HAVE_MPI)
4364
4365 if (n_ranks > 1) {
4366
4367 for (i = 0, j = 0; i < ifs->size; i++) {
4368 cs_interface_t *itf = ifs->interfaces[i];
4369 cs_lnum_t s_buf_shift = itf_s_index[i]*type_size;
4370 if (itf->rank != local_rank)
4371 MPI_Isend(send_buf + s_buf_shift,
4372 (itf_s_index[i+1]-itf_s_index[i]),
4373 mpi_type,
4374 itf->rank,
4375 local_rank,
4376 ifs->comm,
4377 &(request[request_count++]));
4378 }
4379
4380 MPI_Waitall(request_count, request, status);
4381
4382 BFT_FREE(request);
4383 BFT_FREE(status);
4384
4385 }
4386
4387 #endif /* defined(HAVE_MPI) */
4388
4389 BFT_FREE(send_buf);
4390 BFT_FREE(itf_index);
4391 }
4392
4393 /*----------------------------------------------------------------------------*/
4394 /*!
4395 * \brief Update values using the bitwise inclusive or operation for elements
4396 * associated with an interface set.
4397 *
4398 * On input, the variable array should contain local contributions. On output,
4399 * contributions from matching elements on parallel or periodic boundaries
4400 * have been processed.
4401 *
4402 * Only the values of elements belonging to the interfaces are modified.
4403 *
4404 * \param[in] ifs pointer to a fvm_interface_set_t structure
4405 * \param[in] n_elts number of elements in var buffer
4406 * \param[in] stride number of values (non interlaced) by entity
4407 * \param[in] interlace true if variable is interlaced (for stride > 1)
4408 * \param[in] datatype type of data considered
4409 * \param[in, out] var variable buffer
4410 */
4411 /*----------------------------------------------------------------------------*/
4412
4413 void
cs_interface_set_inclusive_or(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)4414 cs_interface_set_inclusive_or(const cs_interface_set_t *ifs,
4415 cs_lnum_t n_elts,
4416 cs_lnum_t stride,
4417 bool interlace,
4418 cs_datatype_t datatype,
4419 void *var)
4420 {
4421 int i;
4422 cs_lnum_t j, k, l;
4423 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4424 unsigned char *buf = NULL;
4425
4426 BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
4427
4428 if (stride < 2 || interlace)
4429 cs_interface_set_copy_array(ifs,
4430 datatype,
4431 stride,
4432 true, /* src_on_parent */
4433 var,
4434 buf);
4435
4436 else
4437 _interface_set_copy_array_ni(ifs,
4438 datatype,
4439 n_elts,
4440 stride,
4441 var,
4442 buf);
4443
4444 /* Now update values */
4445
4446 switch (datatype) {
4447
4448 case CS_CHAR:
4449 for (i = 0, j = 0; i < ifs->size; i++) {
4450 cs_interface_t *itf = ifs->interfaces[i];
4451 char *v = var;
4452 const char *p = (const char *)buf + j*stride;
4453 if (stride < 2 || interlace) {
4454 for (k = 0; k < itf->size; k++) {
4455 cs_lnum_t elt_id = itf->elt_id[k];
4456 for (l = 0; l < stride; l++)
4457 v[elt_id*stride + l] |= p[k*stride + l];
4458 }
4459 }
4460 else {
4461 for (k = 0; k < itf->size; k++) {
4462 cs_lnum_t elt_id = itf->elt_id[k];
4463 for (l = 0; l < stride; l++)
4464 v[elt_id + l*n_elts] |= p[k*stride + l];
4465 }
4466 }
4467 j += itf->size;
4468 }
4469 break;
4470
4471 case CS_INT32:
4472 for (i = 0, j = 0; i < ifs->size; i++) {
4473 cs_interface_t *itf = ifs->interfaces[i];
4474 int32_t *v = var;
4475 const int32_t *p = (const int32_t *)buf + j*stride;
4476 if (stride < 2 || interlace) {
4477 for (k = 0; k < itf->size; k++) {
4478 cs_lnum_t elt_id = itf->elt_id[k];
4479 for (l = 0; l < stride; l++)
4480 v[elt_id*stride + l] |= p[k*stride + l];
4481 }
4482 }
4483 else {
4484 for (k = 0; k < itf->size; k++) {
4485 cs_lnum_t elt_id = itf->elt_id[k];
4486 for (l = 0; l < stride; l++)
4487 v[elt_id + l*n_elts] |= p[k*stride + l];
4488 }
4489 }
4490 j += itf->size;
4491 }
4492 break;
4493
4494 case CS_INT64:
4495 for (i = 0, j = 0; i < ifs->size; i++) {
4496 cs_interface_t *itf = ifs->interfaces[i];
4497 int64_t *v = var;
4498 const int64_t *p = (const int64_t *)buf + j*stride;
4499 if (stride < 2 || interlace) {
4500 for (k = 0; k < itf->size; k++) {
4501 cs_lnum_t elt_id = itf->elt_id[k];
4502 for (l = 0; l < stride; l++)
4503 v[elt_id*stride + l] |= p[k*stride + l];
4504 }
4505 }
4506 else {
4507 for (k = 0; k < itf->size; k++) {
4508 cs_lnum_t elt_id = itf->elt_id[k];
4509 for (l = 0; l < stride; l++)
4510 v[elt_id + l*n_elts] |= p[k*stride + l];
4511 }
4512 }
4513 j += itf->size;
4514 }
4515 break;
4516
4517 case CS_UINT16:
4518 for (i = 0, j = 0; i < ifs->size; i++) {
4519 cs_interface_t *itf = ifs->interfaces[i];
4520 uint16_t *v = var;
4521 const uint16_t *p = (const uint16_t *)buf + j*stride;
4522 if (stride < 2 || interlace) {
4523 for (k = 0; k < itf->size; k++) {
4524 cs_lnum_t elt_id = itf->elt_id[k];
4525 for (l = 0; l < stride; l++)
4526 v[elt_id*stride + l] |= p[k*stride + l];
4527 }
4528 }
4529 else {
4530 for (k = 0; k < itf->size; k++) {
4531 cs_lnum_t elt_id = itf->elt_id[k];
4532 for (l = 0; l < stride; l++)
4533 v[elt_id + l*n_elts] |= p[k*stride + l];
4534 }
4535 }
4536 j += itf->size;
4537 }
4538 break;
4539
4540 case CS_UINT32:
4541 for (i = 0, j = 0; i < ifs->size; i++) {
4542 cs_interface_t *itf = ifs->interfaces[i];
4543 uint32_t *v = var;
4544 const uint32_t *p = (const uint32_t *)buf + j*stride;
4545 if (stride < 2 || interlace) {
4546 for (k = 0; k < itf->size; k++) {
4547 cs_lnum_t elt_id = itf->elt_id[k];
4548 for (l = 0; l < stride; l++)
4549 v[elt_id*stride + l] |= p[k*stride + l];
4550 }
4551 }
4552 else {
4553 for (k = 0; k < itf->size; k++) {
4554 cs_lnum_t elt_id = itf->elt_id[k];
4555 for (l = 0; l < stride; l++)
4556 v[elt_id + l*n_elts] |= p[k*stride + l];
4557 }
4558 }
4559 j += itf->size;
4560 }
4561 break;
4562
4563 case CS_UINT64:
4564 for (i = 0, j = 0; i < ifs->size; i++) {
4565 cs_interface_t *itf = ifs->interfaces[i];
4566 uint64_t *v = var;
4567 const uint64_t *p = (const uint64_t *)buf + j*stride;
4568 if (stride < 2 || interlace) {
4569 for (k = 0; k < itf->size; k++) {
4570 cs_lnum_t elt_id = itf->elt_id[k];
4571 for (l = 0; l < stride; l++)
4572 v[elt_id*stride + l] |= p[k*stride + l];
4573 }
4574 }
4575 else {
4576 for (k = 0; k < itf->size; k++) {
4577 cs_lnum_t elt_id = itf->elt_id[k];
4578 for (l = 0; l < stride; l++)
4579 v[elt_id + l*n_elts] |= p[k*stride + l];
4580 }
4581 }
4582 j += itf->size;
4583 }
4584 break;
4585
4586 default:
4587 bft_error(__FILE__, __LINE__, 0,
4588 _("Called %s with unhandled datatype (%d)."), __func__,
4589 (int)datatype);
4590 }
4591
4592 BFT_FREE(buf);
4593 }
4594
4595 /*----------------------------------------------------------------------------*/
4596 /*!
4597 * \brief Update the sum of values for elements associated with an
4598 * interface set.
4599 *
4600 * On input, the variable array should contain local contributions. On output,
4601 * contributions from matching elements on parallel or periodic boundaries
4602 * have been added.
4603 *
4604 * Only the values of elements belonging to the interfaces are modified.
4605 *
4606 * \param[in] ifs pointer to a fvm_interface_set_t structure
4607 * \param[in] n_elts number of elements in var buffer
4608 * \param[in] stride number of values (non interlaced) by entity
4609 * \param[in] interlace true if variable is interlaced (for stride > 1)
4610 * \param[in] datatype type of data considered
4611 * \param[in, out] var variable buffer
4612 */
4613 /*----------------------------------------------------------------------------*/
4614
4615 void
cs_interface_set_sum(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)4616 cs_interface_set_sum(const cs_interface_set_t *ifs,
4617 cs_lnum_t n_elts,
4618 cs_lnum_t stride,
4619 bool interlace,
4620 cs_datatype_t datatype,
4621 void *var)
4622 {
4623 int i;
4624 cs_lnum_t j, k, l;
4625 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4626 unsigned char *buf = NULL;
4627
4628 BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
4629
4630 if (stride < 2 || interlace)
4631 cs_interface_set_copy_array(ifs,
4632 datatype,
4633 stride,
4634 true, /* src_on_parent */
4635 var,
4636 buf);
4637
4638 else
4639 _interface_set_copy_array_ni(ifs,
4640 datatype,
4641 n_elts,
4642 stride,
4643 var,
4644 buf);
4645
4646 /* Now increment values */
4647
4648 switch (datatype) {
4649
4650 case CS_CHAR:
4651 for (i = 0, j = 0; i < ifs->size; i++) {
4652 cs_interface_t *itf = ifs->interfaces[i];
4653 char *v = var;
4654 const char *p = (const char *)buf + j*stride;
4655 if (stride < 2 || interlace) {
4656 for (k = 0; k < itf->size; k++) {
4657 cs_lnum_t elt_id = itf->elt_id[k];
4658 for (l = 0; l < stride; l++)
4659 v[elt_id*stride + l] += p[k*stride + l];
4660 }
4661 }
4662 else {
4663 for (k = 0; k < itf->size; k++) {
4664 cs_lnum_t elt_id = itf->elt_id[k];
4665 for (l = 0; l < stride; l++)
4666 v[elt_id + l*n_elts] += p[k*stride + l];
4667 }
4668 }
4669 j += itf->size;
4670 }
4671 break;
4672
4673 case CS_FLOAT:
4674 for (i = 0, j = 0; i < ifs->size; i++) {
4675 cs_interface_t *itf = ifs->interfaces[i];
4676 float *v = var;
4677 const float *p = (const float *)buf + j*stride;
4678 if (stride < 2 || interlace) {
4679 for (k = 0; k < itf->size; k++) {
4680 cs_lnum_t elt_id = itf->elt_id[k];
4681 for (l = 0; l < stride; l++)
4682 v[elt_id*stride + l] += p[k*stride + l];
4683 }
4684 }
4685 else {
4686 for (k = 0; k < itf->size; k++) {
4687 cs_lnum_t elt_id = itf->elt_id[k];
4688 for (l = 0; l < stride; l++)
4689 v[elt_id + l*n_elts] += p[k*stride + l];
4690 }
4691 }
4692 j += itf->size;
4693 }
4694 break;
4695
4696 case CS_DOUBLE:
4697 for (i = 0, j = 0; i < ifs->size; i++) {
4698 cs_interface_t *itf = ifs->interfaces[i];
4699 double *v = var;
4700 const double *p = (const double *)buf + j*stride;
4701 if (stride < 2 || interlace) {
4702 for (k = 0; k < itf->size; k++) {
4703 cs_lnum_t elt_id = itf->elt_id[k];
4704 for (l = 0; l < stride; l++)
4705 v[elt_id*stride + l] += p[k*stride + l];
4706 }
4707 }
4708 else {
4709 for (k = 0; k < itf->size; k++) {
4710 cs_lnum_t elt_id = itf->elt_id[k];
4711 for (l = 0; l < stride; l++)
4712 v[elt_id + l*n_elts] += p[k*stride + l];
4713 }
4714 }
4715 j += itf->size;
4716 }
4717 break;
4718
4719 case CS_INT32:
4720 for (i = 0, j = 0; i < ifs->size; i++) {
4721 cs_interface_t *itf = ifs->interfaces[i];
4722 int32_t *v = var;
4723 const int32_t *p = (const int32_t *)buf + j*stride;
4724 if (stride < 2 || interlace) {
4725 for (k = 0; k < itf->size; k++) {
4726 cs_lnum_t elt_id = itf->elt_id[k];
4727 for (l = 0; l < stride; l++)
4728 v[elt_id*stride + l] += p[k*stride + l];
4729 }
4730 }
4731 else {
4732 for (k = 0; k < itf->size; k++) {
4733 cs_lnum_t elt_id = itf->elt_id[k];
4734 for (l = 0; l < stride; l++)
4735 v[elt_id + l*n_elts] += p[k*stride + l];
4736 }
4737 }
4738 j += itf->size;
4739 }
4740 break;
4741
4742 case CS_INT64:
4743 for (i = 0, j = 0; i < ifs->size; i++) {
4744 cs_interface_t *itf = ifs->interfaces[i];
4745 int64_t *v = var;
4746 const int64_t *p = (const int64_t *)buf + j*stride;
4747 if (stride < 2 || interlace) {
4748 for (k = 0; k < itf->size; k++) {
4749 cs_lnum_t elt_id = itf->elt_id[k];
4750 for (l = 0; l < stride; l++)
4751 v[elt_id*stride + l] += p[k*stride + l];
4752 }
4753 }
4754 else {
4755 for (k = 0; k < itf->size; k++) {
4756 cs_lnum_t elt_id = itf->elt_id[k];
4757 for (l = 0; l < stride; l++)
4758 v[elt_id + l*n_elts] += p[k*stride + l];
4759 }
4760 }
4761 j += itf->size;
4762 }
4763 break;
4764
4765 case CS_UINT16:
4766 for (i = 0, j = 0; i < ifs->size; i++) {
4767 cs_interface_t *itf = ifs->interfaces[i];
4768 uint16_t *v = var;
4769 const uint16_t *p = (const uint16_t *)buf + j*stride;
4770 if (stride < 2 || interlace) {
4771 for (k = 0; k < itf->size; k++) {
4772 cs_lnum_t elt_id = itf->elt_id[k];
4773 for (l = 0; l < stride; l++)
4774 v[elt_id*stride + l] += p[k*stride + l];
4775 }
4776 }
4777 else {
4778 for (k = 0; k < itf->size; k++) {
4779 cs_lnum_t elt_id = itf->elt_id[k];
4780 for (l = 0; l < stride; l++)
4781 v[elt_id + l*n_elts] += p[k*stride + l];
4782 }
4783 }
4784 j += itf->size;
4785 }
4786 break;
4787
4788 case CS_UINT32:
4789 for (i = 0, j = 0; i < ifs->size; i++) {
4790 cs_interface_t *itf = ifs->interfaces[i];
4791 uint32_t *v = var;
4792 const uint32_t *p = (const uint32_t *)buf + j*stride;
4793 if (stride < 2 || interlace) {
4794 for (k = 0; k < itf->size; k++) {
4795 cs_lnum_t elt_id = itf->elt_id[k];
4796 for (l = 0; l < stride; l++)
4797 v[elt_id*stride + l] += p[k*stride + l];
4798 }
4799 }
4800 else {
4801 for (k = 0; k < itf->size; k++) {
4802 cs_lnum_t elt_id = itf->elt_id[k];
4803 for (l = 0; l < stride; l++)
4804 v[elt_id + l*n_elts] += p[k*stride + l];
4805 }
4806 }
4807 j += itf->size;
4808 }
4809 break;
4810
4811 case CS_UINT64:
4812 for (i = 0, j = 0; i < ifs->size; i++) {
4813 cs_interface_t *itf = ifs->interfaces[i];
4814 uint64_t *v = var;
4815 const uint64_t *p = (const uint64_t *)buf + j*stride;
4816 if (stride < 2 || interlace) {
4817 for (k = 0; k < itf->size; k++) {
4818 cs_lnum_t elt_id = itf->elt_id[k];
4819 for (l = 0; l < stride; l++)
4820 v[elt_id*stride + l] += p[k*stride + l];
4821 }
4822 }
4823 else {
4824 for (k = 0; k < itf->size; k++) {
4825 cs_lnum_t elt_id = itf->elt_id[k];
4826 for (l = 0; l < stride; l++)
4827 v[elt_id + l*n_elts] += p[k*stride + l];
4828 }
4829 }
4830 j += itf->size;
4831 }
4832 break;
4833
4834 default:
4835 bft_error(__FILE__, __LINE__, 0,
4836 _("Called cs_interface_set_sum with unhandled datatype (%d)."),
4837 (int)datatype);
4838 }
4839
4840 BFT_FREE(buf);
4841 }
4842
4843 /*----------------------------------------------------------------------------*/
4844 /*!
4845 * \brief Update the sum of values for elements associated with an
4846 * interface set, allowing control over periodicity.
4847 *
4848 * On input, the variable array should contain local contributions. On output,
4849 * contributions from matching elements on parallel or periodic boundaries
4850 * have been added.
4851 *
4852 * Only the values of elements belonging to the interfaces are modified.
4853 *
4854 * \param[in] ifs pointer to a fvm_interface_set_t structure
4855 * \param[in] n_elts number of elements in var buffer
4856 * \param[in] stride number of values (non interlaced) by entity
4857 * \param[in] interlace true if variable is interlaced (for stride > 1)
4858 * \param[in] datatype type of data considered
4859 * \param[in] tr_ignore if > 0, ignore periodicity with rotation;
4860 * if > 1, ignore all periodic transforms
4861 * \param[in, out] var variable buffer
4862 */
4863 /*----------------------------------------------------------------------------*/
4864
4865 void
cs_interface_set_sum_tr(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,int tr_ignore,void * var)4866 cs_interface_set_sum_tr(const cs_interface_set_t *ifs,
4867 cs_lnum_t n_elts,
4868 cs_lnum_t stride,
4869 bool interlace,
4870 cs_datatype_t datatype,
4871 int tr_ignore,
4872 void *var)
4873 {
4874 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
4875 unsigned char *buf = NULL;
4876
4877 int n_tr = 0;
4878
4879 if (tr_ignore > 0 && ifs->periodicity != NULL) {
4880 if (tr_ignore < 2) {
4881 int n_tr_max = fvm_periodicity_get_n_transforms(ifs->periodicity);
4882 for (int tr_id = 0; tr_id < n_tr_max; tr_id++) {
4883 if (fvm_periodicity_get_type(ifs->periodicity, tr_id)
4884 < FVM_PERIODICITY_ROTATION)
4885 n_tr = tr_id + 1;
4886 }
4887 }
4888 n_tr += 1; /* add base "identity" transform_id */
4889 }
4890
4891 if (n_tr < 1) {
4892 cs_interface_set_sum(ifs,
4893 n_elts,
4894 stride,
4895 interlace,
4896 datatype,
4897 var);
4898 return;
4899 }
4900
4901 /* We can use a fixed max periodicity type here based on translation,
4902 because when even translation is ignored, n_tr has been set to 1 above,
4903 and the first transform is "no transform", so tests should always
4904 return a correct value */
4905
4906 fvm_periodicity_type_t tr_threshold = FVM_PERIODICITY_ROTATION;
4907
4908 BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
4909
4910 if (stride < 2 || interlace)
4911 cs_interface_set_copy_array(ifs,
4912 datatype,
4913 stride,
4914 true, /* src_on_parent */
4915 var,
4916 buf);
4917
4918 else
4919 _interface_set_copy_array_ni(ifs,
4920 datatype,
4921 n_elts,
4922 stride,
4923 var,
4924 buf);
4925
4926 /* Now increment values */
4927
4928 cs_lnum_t j = 0;
4929
4930 switch (datatype) {
4931
4932 case CS_FLOAT:
4933 for (int i = 0; i < ifs->size; i++) {
4934 cs_interface_t *itf = ifs->interfaces[i];
4935 for (int tr_id = 0; tr_id < n_tr; tr_id++) {
4936 cs_lnum_t s_id = itf->tr_index[tr_id];
4937 cs_lnum_t e_id = itf->tr_index[tr_id+1];
4938 if (e_id > s_id && tr_id > 0) {
4939 if ( fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
4940 >= tr_threshold)
4941 continue;
4942 }
4943 float *v = var;
4944 const float *p = (const float *)buf + j*stride;
4945 if (stride < 2 || interlace) {
4946 for (cs_lnum_t k = s_id; k < e_id; k++) {
4947 cs_lnum_t elt_id = itf->elt_id[k];
4948 for (cs_lnum_t l = 0; l < stride; l++)
4949 v[elt_id*stride + l] += p[k*stride + l];
4950 }
4951 }
4952 else {
4953 for (cs_lnum_t k = s_id; k < e_id; k++) {
4954 cs_lnum_t elt_id = itf->elt_id[k];
4955 for (cs_lnum_t l = 0; l < stride; l++)
4956 v[elt_id + l*n_elts] += p[k*stride + l];
4957 }
4958 }
4959 }
4960 j += itf->size;
4961 }
4962 break;
4963
4964 case CS_DOUBLE:
4965 for (int i = 0; i < ifs->size; i++) {
4966 cs_interface_t *itf = ifs->interfaces[i];
4967 for (int tr_id = 0; tr_id < n_tr; tr_id++) {
4968 cs_lnum_t s_id = itf->tr_index[tr_id];
4969 cs_lnum_t e_id = itf->tr_index[tr_id+1];
4970 if (e_id > s_id && tr_id > 0) {
4971 if ( fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
4972 >= tr_threshold)
4973 continue;
4974 }
4975 double *v = var;
4976 const double *p = (const double *)buf + j*stride;
4977 if (stride < 2 || interlace) {
4978 for (cs_lnum_t k = s_id; k < e_id; k++) {
4979 cs_lnum_t elt_id = itf->elt_id[k];
4980 for (cs_lnum_t l = 0; l < stride; l++)
4981 v[elt_id*stride + l] += p[k*stride + l];
4982 }
4983 }
4984 else {
4985 for (cs_lnum_t k = s_id; k < e_id; k++) {
4986 cs_lnum_t elt_id = itf->elt_id[k];
4987 for (cs_lnum_t l = 0; l < stride; l++)
4988 v[elt_id + l*n_elts] += p[k*stride + l];
4989 }
4990 }
4991 }
4992 j += itf->size;
4993 }
4994 break;
4995
4996 default:
4997 for (int i = 0; i < ifs->size; i++) {
4998 cs_interface_t *itf = ifs->interfaces[i];
4999 for (int tr_id = 0; tr_id < n_tr; tr_id++) {
5000 cs_lnum_t s_id = itf->tr_index[tr_id];
5001 cs_lnum_t e_id = itf->tr_index[tr_id+1];
5002 if (e_id > s_id && tr_id > 0) {
5003 if ( fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
5004 >= tr_threshold)
5005 continue;
5006 }
5007 char *v = var;
5008 cs_lnum_t _stride = stride * cs_datatype_size[datatype];
5009 const char *p = (const char *)buf + j*_stride;
5010 if (stride < 2 || interlace) {
5011 for (cs_lnum_t k = s_id; k < e_id; k++) {
5012 cs_lnum_t elt_id = itf->elt_id[k];
5013 for (cs_lnum_t l = 0; l < _stride; l++)
5014 v[elt_id*_stride + l] += p[k*_stride + l];
5015 }
5016 }
5017 else {
5018 cs_lnum_t elt_size = cs_datatype_size[datatype];
5019 for (cs_lnum_t k = s_id; k < e_id; k++) {
5020 cs_lnum_t elt_id = itf->elt_id[k];
5021 for (cs_lnum_t l = 0; l < stride; l++) {
5022 for (cs_lnum_t q = 0; q < elt_size; q++)
5023 v[elt_id*elt_size + l*n_elts + q]
5024 += p[k*_stride + l*elt_size + q];
5025 }
5026 }
5027 }
5028 }
5029 j += itf->size;
5030 }
5031
5032 }
5033
5034 BFT_FREE(buf);
5035 }
5036
5037 /*----------------------------------------------------------------------------*/
5038 /*!
5039 * \brief Update to minimum value for elements associated with an
5040 * interface set.
5041 *
5042 * On input, the variable array should contain local contributions. On output,
5043 * contributions from matching elements on parallel or periodic boundaries
5044 * have been added.
5045 *
5046 * Only the values of elements belonging to the interfaces are modified.
5047 *
5048 * \param[in] ifs pointer to a fvm_interface_set_t structure
5049 * \param[in] n_elts number of elements in var buffer
5050 * \param[in] stride number of values (non interlaced) by entity
5051 * \param[in] interlace true if variable is interlaced (for stride > 1)
5052 * \param[in] datatype type of data considered
5053 * \param[in, out] var variable buffer
5054 */
5055 /*----------------------------------------------------------------------------*/
5056
5057 void
cs_interface_set_min(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)5058 cs_interface_set_min(const cs_interface_set_t *ifs,
5059 cs_lnum_t n_elts,
5060 cs_lnum_t stride,
5061 bool interlace,
5062 cs_datatype_t datatype,
5063 void *var)
5064 {
5065 int i;
5066 cs_lnum_t j, k, l;
5067 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
5068 unsigned char *buf = NULL;
5069
5070 BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
5071
5072 if (stride < 2 || interlace)
5073 cs_interface_set_copy_array(ifs,
5074 datatype,
5075 stride,
5076 true, /* src_on_parent */
5077 var,
5078 buf);
5079
5080 else
5081 _interface_set_copy_array_ni(ifs,
5082 datatype,
5083 n_elts,
5084 stride,
5085 var,
5086 buf);
5087
5088 /* Now increment values */
5089
5090 switch (datatype) {
5091
5092 case CS_CHAR:
5093 for (i = 0, j = 0; i < ifs->size; i++) {
5094 cs_interface_t *itf = ifs->interfaces[i];
5095 char *v = var;
5096 const char *p = (const char *)buf + j*stride;
5097 if (stride < 2 || interlace) {
5098 for (k = 0; k < itf->size; k++) {
5099 cs_lnum_t elt_id = itf->elt_id[k];
5100 for (l = 0; l < stride; l++)
5101 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5102 p[k*stride + l]);
5103 }
5104 }
5105 else {
5106 for (k = 0; k < itf->size; k++) {
5107 cs_lnum_t elt_id = itf->elt_id[k];
5108 for (l = 0; l < stride; l++)
5109 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5110 p[k*stride + l]);
5111 }
5112 }
5113 j += itf->size;
5114 }
5115 break;
5116
5117 case CS_FLOAT:
5118 for (i = 0, j = 0; i < ifs->size; i++) {
5119 cs_interface_t *itf = ifs->interfaces[i];
5120 float *v = var;
5121 const float *p = (const float *)buf + j*stride;
5122 if (stride < 2 || interlace) {
5123 for (k = 0; k < itf->size; k++) {
5124 cs_lnum_t elt_id = itf->elt_id[k];
5125 for (l = 0; l < stride; l++)
5126 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5127 p[k*stride + l]);
5128 }
5129 }
5130 else {
5131 for (k = 0; k < itf->size; k++) {
5132 cs_lnum_t elt_id = itf->elt_id[k];
5133 for (l = 0; l < stride; l++)
5134 v[elt_id + l*n_elts] = CS_MIN(p[k*stride + l],
5135 v[elt_id + l*n_elts]);
5136 }
5137 }
5138 j += itf->size;
5139 }
5140 break;
5141
5142 case CS_DOUBLE:
5143 for (i = 0, j = 0; i < ifs->size; i++) {
5144 cs_interface_t *itf = ifs->interfaces[i];
5145 double *v = var;
5146 const double *p = (const double *)buf + j*stride;
5147 if (stride < 2 || interlace) {
5148 for (k = 0; k < itf->size; k++) {
5149 cs_lnum_t elt_id = itf->elt_id[k];
5150 for (l = 0; l < stride; l++)
5151 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5152 p[k*stride + l]);
5153 }
5154 }
5155 else {
5156 for (k = 0; k < itf->size; k++) {
5157 cs_lnum_t elt_id = itf->elt_id[k];
5158 for (l = 0; l < stride; l++)
5159 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5160 p[k*stride + l]);
5161 }
5162 }
5163 j += itf->size;
5164 }
5165 break;
5166
5167 case CS_INT32:
5168 for (i = 0, j = 0; i < ifs->size; i++) {
5169 cs_interface_t *itf = ifs->interfaces[i];
5170 int32_t *v = var;
5171 const int32_t *p = (const int32_t *)buf + j*stride;
5172 if (stride < 2 || interlace) {
5173 for (k = 0; k < itf->size; k++) {
5174 cs_lnum_t elt_id = itf->elt_id[k];
5175 for (l = 0; l < stride; l++)
5176 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5177 p[k*stride + l]);
5178 }
5179 }
5180 else {
5181 for (k = 0; k < itf->size; k++) {
5182 cs_lnum_t elt_id = itf->elt_id[k];
5183 for (l = 0; l < stride; l++)
5184 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5185 p[k*stride + l]);
5186 }
5187 }
5188 j += itf->size;
5189 }
5190 break;
5191
5192 case CS_INT64:
5193 for (i = 0, j = 0; i < ifs->size; i++) {
5194 cs_interface_t *itf = ifs->interfaces[i];
5195 int64_t *v = var;
5196 const int64_t *p = (const int64_t *)buf + j*stride;
5197 if (stride < 2 || interlace) {
5198 for (k = 0; k < itf->size; k++) {
5199 cs_lnum_t elt_id = itf->elt_id[k];
5200 for (l = 0; l < stride; l++)
5201 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5202 p[k*stride + l]);
5203 }
5204 }
5205 else {
5206 for (k = 0; k < itf->size; k++) {
5207 cs_lnum_t elt_id = itf->elt_id[k];
5208 for (l = 0; l < stride; l++)
5209 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5210 p[k*stride + l]);
5211 }
5212 }
5213 j += itf->size;
5214 }
5215 break;
5216
5217 case CS_UINT16:
5218 for (i = 0, j = 0; i < ifs->size; i++) {
5219 cs_interface_t *itf = ifs->interfaces[i];
5220 uint16_t *v = var;
5221 const uint16_t *p = (const uint16_t *)buf + j*stride;
5222 if (stride < 2 || interlace) {
5223 for (k = 0; k < itf->size; k++) {
5224 cs_lnum_t elt_id = itf->elt_id[k];
5225 for (l = 0; l < stride; l++)
5226 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5227 p[k*stride + l]);
5228 }
5229 }
5230 else {
5231 for (k = 0; k < itf->size; k++) {
5232 cs_lnum_t elt_id = itf->elt_id[k];
5233 for (l = 0; l < stride; l++)
5234 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5235 p[k*stride + l]);
5236 }
5237 }
5238 j += itf->size;
5239 }
5240 break;
5241
5242 case CS_UINT32:
5243 for (i = 0, j = 0; i < ifs->size; i++) {
5244 cs_interface_t *itf = ifs->interfaces[i];
5245 uint32_t *v = var;
5246 const uint32_t *p = (const uint32_t *)buf + j*stride;
5247 if (stride < 2 || interlace) {
5248 for (k = 0; k < itf->size; k++) {
5249 cs_lnum_t elt_id = itf->elt_id[k];
5250 for (l = 0; l < stride; l++)
5251 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5252 p[k*stride + l]);
5253 }
5254 }
5255 else {
5256 for (k = 0; k < itf->size; k++) {
5257 cs_lnum_t elt_id = itf->elt_id[k];
5258 for (l = 0; l < stride; l++)
5259 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5260 p[k*stride + l]);
5261 }
5262 }
5263 j += itf->size;
5264 }
5265 break;
5266
5267 case CS_UINT64:
5268 for (i = 0, j = 0; i < ifs->size; i++) {
5269 cs_interface_t *itf = ifs->interfaces[i];
5270 uint64_t *v = var;
5271 const uint64_t *p = (const uint64_t *)buf + j*stride;
5272 if (stride < 2 || interlace) {
5273 for (k = 0; k < itf->size; k++) {
5274 cs_lnum_t elt_id = itf->elt_id[k];
5275 for (l = 0; l < stride; l++)
5276 v[elt_id*stride + l] = CS_MIN(v[elt_id*stride + l],
5277 p[k*stride + l]);
5278 }
5279 }
5280 else {
5281 for (k = 0; k < itf->size; k++) {
5282 cs_lnum_t elt_id = itf->elt_id[k];
5283 for (l = 0; l < stride; l++)
5284 v[elt_id + l*n_elts] = CS_MIN(v[elt_id + l*n_elts],
5285 p[k*stride + l]);
5286 }
5287 }
5288 j += itf->size;
5289 }
5290 break;
5291
5292 default:
5293 bft_error(__FILE__, __LINE__, 0,
5294 _("Called cs_interface_set_max with unhandled datatype (%d)."),
5295 (int)datatype);
5296 }
5297
5298 BFT_FREE(buf);
5299 }
5300
5301 /*----------------------------------------------------------------------------*/
5302 /*!
5303 * \brief Update to maximum value for elements associated with an
5304 * interface set.
5305 *
5306 * On input, the variable array should contain local contributions. On output,
5307 * contributions from matching elements on parallel or periodic boundaries
5308 * have been added.
5309 *
5310 * Only the values of elements belonging to the interfaces are modified.
5311 *
5312 * \param[in] ifs pointer to a fvm_interface_set_t structure
5313 * \param[in] n_elts number of elements in var buffer
5314 * \param[in] stride number of values (non interlaced) by entity
5315 * \param[in] interlace true if variable is interlaced (for stride > 1)
5316 * \param[in] datatype type of data considered
5317 * \param[in, out] var variable buffer
5318 */
5319 /*----------------------------------------------------------------------------*/
5320
5321 void
cs_interface_set_max(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,void * var)5322 cs_interface_set_max(const cs_interface_set_t *ifs,
5323 cs_lnum_t n_elts,
5324 cs_lnum_t stride,
5325 bool interlace,
5326 cs_datatype_t datatype,
5327 void *var)
5328 {
5329 int i;
5330 cs_lnum_t j, k, l;
5331 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
5332 unsigned char *buf = NULL;
5333
5334 BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
5335
5336 if (stride < 2 || interlace)
5337 cs_interface_set_copy_array(ifs,
5338 datatype,
5339 stride,
5340 true, /* src_on_parent */
5341 var,
5342 buf);
5343
5344 else
5345 _interface_set_copy_array_ni(ifs,
5346 datatype,
5347 n_elts,
5348 stride,
5349 var,
5350 buf);
5351
5352 /* Now increment values */
5353
5354 switch (datatype) {
5355
5356 case CS_CHAR:
5357 for (i = 0, j = 0; i < ifs->size; i++) {
5358 cs_interface_t *itf = ifs->interfaces[i];
5359 char *v = var;
5360 const char *p = (const char *)buf + j*stride;
5361 if (stride < 2 || interlace) {
5362 for (k = 0; k < itf->size; k++) {
5363 cs_lnum_t elt_id = itf->elt_id[k];
5364 for (l = 0; l < stride; l++)
5365 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5366 p[k*stride + l]);
5367 }
5368 }
5369 else {
5370 for (k = 0; k < itf->size; k++) {
5371 cs_lnum_t elt_id = itf->elt_id[k];
5372 for (l = 0; l < stride; l++)
5373 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5374 p[k*stride + l]);
5375 }
5376 }
5377 j += itf->size;
5378 }
5379 break;
5380
5381 case CS_FLOAT:
5382 for (i = 0, j = 0; i < ifs->size; i++) {
5383 cs_interface_t *itf = ifs->interfaces[i];
5384 float *v = var;
5385 const float *p = (const float *)buf + j*stride;
5386 if (stride < 2 || interlace) {
5387 for (k = 0; k < itf->size; k++) {
5388 cs_lnum_t elt_id = itf->elt_id[k];
5389 for (l = 0; l < stride; l++)
5390 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5391 p[k*stride + l]);
5392 }
5393 }
5394 else {
5395 for (k = 0; k < itf->size; k++) {
5396 cs_lnum_t elt_id = itf->elt_id[k];
5397 for (l = 0; l < stride; l++)
5398 v[elt_id + l*n_elts] = CS_MAX(p[k*stride + l],
5399 v[elt_id + l*n_elts]);
5400 }
5401 }
5402 j += itf->size;
5403 }
5404 break;
5405
5406 case CS_DOUBLE:
5407 for (i = 0, j = 0; i < ifs->size; i++) {
5408 cs_interface_t *itf = ifs->interfaces[i];
5409 double *v = var;
5410 const double *p = (const double *)buf + j*stride;
5411 if (stride < 2 || interlace) {
5412 for (k = 0; k < itf->size; k++) {
5413 cs_lnum_t elt_id = itf->elt_id[k];
5414 for (l = 0; l < stride; l++)
5415 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5416 p[k*stride + l]);
5417 }
5418 }
5419 else {
5420 for (k = 0; k < itf->size; k++) {
5421 cs_lnum_t elt_id = itf->elt_id[k];
5422 for (l = 0; l < stride; l++)
5423 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5424 p[k*stride + l]);
5425 }
5426 }
5427 j += itf->size;
5428 }
5429 break;
5430
5431 case CS_INT32:
5432 for (i = 0, j = 0; i < ifs->size; i++) {
5433 cs_interface_t *itf = ifs->interfaces[i];
5434 int32_t *v = var;
5435 const int32_t *p = (const int32_t *)buf + j*stride;
5436 if (stride < 2 || interlace) {
5437 for (k = 0; k < itf->size; k++) {
5438 cs_lnum_t elt_id = itf->elt_id[k];
5439 for (l = 0; l < stride; l++)
5440 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5441 p[k*stride + l]);
5442 }
5443 }
5444 else {
5445 for (k = 0; k < itf->size; k++) {
5446 cs_lnum_t elt_id = itf->elt_id[k];
5447 for (l = 0; l < stride; l++)
5448 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5449 p[k*stride + l]);
5450 }
5451 }
5452 j += itf->size;
5453 }
5454 break;
5455
5456 case CS_INT64:
5457 for (i = 0, j = 0; i < ifs->size; i++) {
5458 cs_interface_t *itf = ifs->interfaces[i];
5459 int64_t *v = var;
5460 const int64_t *p = (const int64_t *)buf + j*stride;
5461 if (stride < 2 || interlace) {
5462 for (k = 0; k < itf->size; k++) {
5463 cs_lnum_t elt_id = itf->elt_id[k];
5464 for (l = 0; l < stride; l++)
5465 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5466 p[k*stride + l]);
5467 }
5468 }
5469 else {
5470 for (k = 0; k < itf->size; k++) {
5471 cs_lnum_t elt_id = itf->elt_id[k];
5472 for (l = 0; l < stride; l++)
5473 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5474 p[k*stride + l]);
5475 }
5476 }
5477 j += itf->size;
5478 }
5479 break;
5480
5481 case CS_UINT16:
5482 for (i = 0, j = 0; i < ifs->size; i++) {
5483 cs_interface_t *itf = ifs->interfaces[i];
5484 uint16_t *v = var;
5485 const uint16_t *p = (const uint16_t *)buf + j*stride;
5486 if (stride < 2 || interlace) {
5487 for (k = 0; k < itf->size; k++) {
5488 cs_lnum_t elt_id = itf->elt_id[k];
5489 for (l = 0; l < stride; l++)
5490 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5491 p[k*stride + l]);
5492 }
5493 }
5494 else {
5495 for (k = 0; k < itf->size; k++) {
5496 cs_lnum_t elt_id = itf->elt_id[k];
5497 for (l = 0; l < stride; l++)
5498 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5499 p[k*stride + l]);
5500 }
5501 }
5502 j += itf->size;
5503 }
5504 break;
5505
5506 case CS_UINT32:
5507 for (i = 0, j = 0; i < ifs->size; i++) {
5508 cs_interface_t *itf = ifs->interfaces[i];
5509 uint32_t *v = var;
5510 const uint32_t *p = (const uint32_t *)buf + j*stride;
5511 if (stride < 2 || interlace) {
5512 for (k = 0; k < itf->size; k++) {
5513 cs_lnum_t elt_id = itf->elt_id[k];
5514 for (l = 0; l < stride; l++)
5515 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5516 p[k*stride + l]);
5517 }
5518 }
5519 else {
5520 for (k = 0; k < itf->size; k++) {
5521 cs_lnum_t elt_id = itf->elt_id[k];
5522 for (l = 0; l < stride; l++)
5523 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5524 p[k*stride + l]);
5525 }
5526 }
5527 j += itf->size;
5528 }
5529 break;
5530
5531 case CS_UINT64:
5532 for (i = 0, j = 0; i < ifs->size; i++) {
5533 cs_interface_t *itf = ifs->interfaces[i];
5534 uint64_t *v = var;
5535 const uint64_t *p = (const uint64_t *)buf + j*stride;
5536 if (stride < 2 || interlace) {
5537 for (k = 0; k < itf->size; k++) {
5538 cs_lnum_t elt_id = itf->elt_id[k];
5539 for (l = 0; l < stride; l++)
5540 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5541 p[k*stride + l]);
5542 }
5543 }
5544 else {
5545 for (k = 0; k < itf->size; k++) {
5546 cs_lnum_t elt_id = itf->elt_id[k];
5547 for (l = 0; l < stride; l++)
5548 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5549 p[k*stride + l]);
5550 }
5551 }
5552 j += itf->size;
5553 }
5554 break;
5555
5556 default:
5557 bft_error(__FILE__, __LINE__, 0,
5558 _("Called cs_interface_set_max with unhandled datatype (%d)."),
5559 (int)datatype);
5560 }
5561
5562 BFT_FREE(buf);
5563 }
5564
5565 /*----------------------------------------------------------------------------*/
5566 /*!
5567 * \brief Update the maximum of values for elements associated with an
5568 * interface set, allowing control over periodicity.
5569 *
5570 * On input, the variable array should contain local contributions. On output,
5571 * contributions from matching elements on parallel or periodic boundaries
5572 * have been added.
5573 *
5574 * Only the values of elements belonging to the interfaces are modified.
5575 *
5576 * \param[in] ifs pointer to a fvm_interface_set_t structure
5577 * \param[in] n_elts number of elements in var buffer
5578 * \param[in] stride number of values (non interlaced) by entity
5579 * \param[in] interlace true if variable is interlaced (for stride > 1)
5580 * \param[in] datatype type of data considered
5581 * \param[in] tr_ignore if > 0, ignore periodicity with rotation;
5582 * if > 1, ignore all periodic transforms
5583 * \param[in, out] var variable buffer
5584 */
5585 /*----------------------------------------------------------------------------*/
5586
5587 void
cs_interface_set_max_tr(const cs_interface_set_t * ifs,cs_lnum_t n_elts,cs_lnum_t stride,bool interlace,cs_datatype_t datatype,int tr_ignore,void * var)5588 cs_interface_set_max_tr(const cs_interface_set_t *ifs,
5589 cs_lnum_t n_elts,
5590 cs_lnum_t stride,
5591 bool interlace,
5592 cs_datatype_t datatype,
5593 int tr_ignore,
5594 void *var)
5595 {
5596 cs_lnum_t stride_size = cs_datatype_size[datatype]*stride;
5597 unsigned char *buf = NULL;
5598
5599 int n_tr = 0;
5600
5601 if (tr_ignore > 0 && ifs->periodicity != NULL) {
5602 if (tr_ignore < 2) {
5603 int n_tr_max = fvm_periodicity_get_n_transforms(ifs->periodicity);
5604 for (int tr_id = 0; tr_id < n_tr_max; tr_id++) {
5605 if (fvm_periodicity_get_type(ifs->periodicity, tr_id)
5606 < FVM_PERIODICITY_ROTATION)
5607 n_tr = tr_id + 1;
5608 }
5609 }
5610 n_tr += 1; /* add base "identity" transform_id */
5611 }
5612
5613 if (n_tr < 1) {
5614 cs_interface_set_max(ifs,
5615 n_elts,
5616 stride,
5617 interlace,
5618 datatype,
5619 var);
5620 return;
5621 }
5622
5623 /* We can use a fixed max periodicity type here based on translation,
5624 because when even translation is ignored, n_tr has been set to 1 above,
5625 and the first transform is "no transform", so tests should always
5626 return a correct value */
5627
5628 fvm_periodicity_type_t tr_threshold = FVM_PERIODICITY_ROTATION;
5629
5630 BFT_MALLOC(buf, cs_interface_set_n_elts(ifs)*stride_size, unsigned char);
5631
5632 if (stride < 2 || interlace)
5633 cs_interface_set_copy_array(ifs,
5634 datatype,
5635 stride,
5636 true, /* src_on_parent */
5637 var,
5638 buf);
5639
5640 else
5641 _interface_set_copy_array_ni(ifs,
5642 datatype,
5643 n_elts,
5644 stride,
5645 var,
5646 buf);
5647
5648 /* Now increment values */
5649
5650 cs_lnum_t j = 0;
5651
5652 for (int i = 0; i < ifs->size; i++) {
5653
5654 cs_interface_t *itf = ifs->interfaces[i];
5655
5656 for (int tr_id = 0; tr_id < n_tr; tr_id++) {
5657
5658 cs_lnum_t s_id = itf->tr_index[tr_id];
5659 cs_lnum_t e_id = itf->tr_index[tr_id+1];
5660 if (e_id > s_id && tr_id > 0) {
5661 if ( fvm_periodicity_get_type(ifs->periodicity, tr_id-1)
5662 >= tr_threshold)
5663 continue;
5664 }
5665
5666 switch (datatype) {
5667
5668 case CS_CHAR:
5669 {
5670 char *v = var;
5671 const char *p = (const char *)buf + j*stride;
5672 if (stride < 2 || interlace) {
5673 for (cs_lnum_t k = s_id; k < e_id; k++) {
5674 cs_lnum_t elt_id = itf->elt_id[k];
5675 for (cs_lnum_t l = 0; l < stride; l++)
5676 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5677 p[k*stride + l]);
5678 }
5679 }
5680 else {
5681 for (cs_lnum_t k = s_id; k < e_id; k++) {
5682 cs_lnum_t elt_id = itf->elt_id[k];
5683 for (cs_lnum_t l = 0; l < stride; l++)
5684 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5685 p[k*stride + l]);
5686 }
5687 }
5688 }
5689 break;
5690
5691 case CS_FLOAT:
5692 {
5693 float *v = var;
5694 const float *p = (const float *)buf + j*stride;
5695 if (stride < 2 || interlace) {
5696 for (cs_lnum_t k = s_id; k < e_id; k++) {
5697 cs_lnum_t elt_id = itf->elt_id[k];
5698 for (cs_lnum_t l = 0; l < stride; l++)
5699 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5700 p[k*stride + l]);
5701 }
5702 }
5703 else {
5704 for (cs_lnum_t k = s_id; k < e_id; k++) {
5705 cs_lnum_t elt_id = itf->elt_id[k];
5706 for (cs_lnum_t l = 0; l < stride; l++)
5707 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5708 p[k*stride + l]);
5709 }
5710 }
5711 }
5712 break;
5713
5714 case CS_DOUBLE:
5715 {
5716 double *v = var;
5717 const double *p = (const double *)buf + j*stride;
5718 if (stride < 2 || interlace) {
5719 for (cs_lnum_t k = s_id; k < e_id; k++) {
5720 cs_lnum_t elt_id = itf->elt_id[k];
5721 for (cs_lnum_t l = 0; l < stride; l++)
5722 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5723 p[k*stride + l]);
5724 }
5725 }
5726 else {
5727 for (cs_lnum_t k = s_id; k < e_id; k++) {
5728 cs_lnum_t elt_id = itf->elt_id[k];
5729 for (cs_lnum_t l = 0; l < stride; l++)
5730 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5731 p[k*stride + l]);
5732 }
5733 }
5734 }
5735 break;
5736
5737 case CS_INT32:
5738 {
5739 int32_t *v = var;
5740 const int32_t *p = (const int32_t *)buf + j*stride;
5741 if (stride < 2 || interlace) {
5742 for (cs_lnum_t k = s_id; k < e_id; k++) {
5743 cs_lnum_t elt_id = itf->elt_id[k];
5744 for (cs_lnum_t l = 0; l < stride; l++)
5745 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5746 p[k*stride + l]);
5747 }
5748 }
5749 else {
5750 for (cs_lnum_t k = s_id; k < e_id; k++) {
5751 cs_lnum_t elt_id = itf->elt_id[k];
5752 for (cs_lnum_t l = 0; l < stride; l++)
5753 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5754 p[k*stride + l]);
5755 }
5756 }
5757 }
5758 break;
5759
5760 case CS_INT64:
5761 {
5762 int64_t *v = var;
5763 const int64_t *p = (const int64_t *)buf + j*stride;
5764 if (stride < 2 || interlace) {
5765 for (cs_lnum_t k = s_id; k < e_id; k++) {
5766 cs_lnum_t elt_id = itf->elt_id[k];
5767 for (cs_lnum_t l = 0; l < stride; l++)
5768 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5769 p[k*stride + l]);
5770 }
5771 }
5772 else {
5773 for (cs_lnum_t k = s_id; k < e_id; k++) {
5774 cs_lnum_t elt_id = itf->elt_id[k];
5775 for (cs_lnum_t l = 0; l < stride; l++)
5776 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5777 p[k*stride + l]);
5778 }
5779 }
5780 }
5781 break;
5782
5783 case CS_UINT16:
5784 {
5785 uint16_t *v = var;
5786 const uint16_t *p = (const uint16_t *)buf + j*stride;
5787 if (stride < 2 || interlace) {
5788 for (cs_lnum_t k = s_id; k < e_id; k++) {
5789 cs_lnum_t elt_id = itf->elt_id[k];
5790 for (cs_lnum_t l = 0; l < stride; l++)
5791 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5792 p[k*stride + l]);
5793 }
5794 }
5795 else {
5796 for (cs_lnum_t k = s_id; k < e_id; k++) {
5797 cs_lnum_t elt_id = itf->elt_id[k];
5798 for (cs_lnum_t l = 0; l < stride; l++)
5799 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5800 p[k*stride + l]);
5801 }
5802 }
5803 }
5804 break;
5805
5806 case CS_UINT32:
5807 {
5808 uint32_t *v = var;
5809 const uint32_t *p = (const uint32_t *)buf + j*stride;
5810 if (stride < 2 || interlace) {
5811 for (cs_lnum_t k = s_id; k < e_id; k++) {
5812 cs_lnum_t elt_id = itf->elt_id[k];
5813 for (cs_lnum_t l = 0; l < stride; l++)
5814 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5815 p[k*stride + l]);
5816 }
5817 }
5818 else {
5819 for (cs_lnum_t k = s_id; k < e_id; k++) {
5820 cs_lnum_t elt_id = itf->elt_id[k];
5821 for (cs_lnum_t l = 0; l < stride; l++)
5822 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5823 p[k*stride + l]);
5824 }
5825 }
5826 }
5827 break;
5828
5829 case CS_UINT64:
5830 {
5831 uint64_t *v = var;
5832 const uint64_t *p = (const uint64_t *)buf + j*stride;
5833 if (stride < 2 || interlace) {
5834 for (cs_lnum_t k = s_id; k < e_id; k++) {
5835 cs_lnum_t elt_id = itf->elt_id[k];
5836 for (cs_lnum_t l = 0; l < stride; l++)
5837 v[elt_id*stride + l] = CS_MAX(v[elt_id*stride + l],
5838 p[k*stride + l]);
5839 }
5840 }
5841 else {
5842 for (cs_lnum_t k = s_id; k < e_id; k++) {
5843 cs_lnum_t elt_id = itf->elt_id[k];
5844 for (cs_lnum_t l = 0; l < stride; l++)
5845 v[elt_id + l*n_elts] = CS_MAX(v[elt_id + l*n_elts],
5846 p[k*stride + l]);
5847 }
5848 }
5849 }
5850 break;
5851
5852 default:
5853 bft_error(__FILE__, __LINE__, 0,
5854 _("Called %s with unhandled datatype (%d)."),
5855 __func__, (int)datatype);
5856 }
5857
5858 } /* End of loop on transformations */
5859
5860 j += itf->size;
5861
5862 }
5863
5864 BFT_FREE(buf);
5865 }
5866
5867 /*----------------------------------------------------------------------------*/
5868 /*!
5869 * \brief Add matching element id information to an interface set.
5870 *
5871 * This information is required by calls to cs_interface_get_match_ids(),
5872 * and may be freed using cs_interface_set_free_match_ids().
5873 *
5874 * \param[in] ifs pointer to interface set structure
5875 */
5876 /*----------------------------------------------------------------------------*/
5877
5878 void
cs_interface_set_add_match_ids(cs_interface_set_t * ifs)5879 cs_interface_set_add_match_ids(cs_interface_set_t *ifs)
5880 {
5881 ifs->match_id_rc += 1;
5882
5883 if (ifs->match_id_rc > 1)
5884 return;
5885
5886 int i;
5887 cs_lnum_t j;
5888 int local_rank = 0;
5889 cs_lnum_t *send_buf = NULL;
5890
5891 #if defined(HAVE_MPI)
5892
5893 int n_ranks = 1;
5894 int request_count = 0;
5895 MPI_Request *request = NULL;
5896 MPI_Status *status = NULL;
5897
5898 if (ifs->comm != MPI_COMM_NULL) {
5899 MPI_Comm_rank(ifs->comm, &local_rank);
5900 MPI_Comm_size(ifs->comm, &n_ranks);
5901 }
5902
5903 #endif
5904
5905 assert(ifs != NULL);
5906
5907 BFT_MALLOC(send_buf, cs_interface_set_n_elts(ifs), cs_lnum_t);
5908
5909 /* Prepare send buffer first (for same rank, send_order is swapped
5910 with match_id directly) */
5911
5912 for (i = 0, j = 0; i < ifs->size; i++) {
5913
5914 cs_lnum_t k;
5915 cs_interface_t *itf = ifs->interfaces[i];
5916
5917 /* When this function is called, a distant-rank interface should have
5918 a send_order array, but not a match_id array */
5919
5920 assert(itf->match_id == NULL);
5921 BFT_MALLOC(itf->match_id, itf->size, cs_lnum_t);
5922
5923 for (k = 0; k < itf->size; k++)
5924 send_buf[j + k] = itf->elt_id[itf->send_order[k]];
5925
5926 j += itf->size;
5927
5928 }
5929
5930 /* Now exchange data */
5931
5932 #if defined(HAVE_MPI)
5933 if (n_ranks > 1) {
5934 BFT_MALLOC(request, ifs->size*2, MPI_Request);
5935 BFT_MALLOC(status, ifs->size*2, MPI_Status);
5936 }
5937 #endif
5938
5939 for (i = 0, j = 0; i < ifs->size; i++) {
5940
5941 cs_interface_t *itf = ifs->interfaces[i];
5942
5943 if (itf->rank == local_rank)
5944 memcpy(itf->match_id, send_buf + j, itf->size*sizeof(cs_lnum_t));
5945
5946 #if defined(HAVE_MPI)
5947
5948 else /* if (itf->rank != local_rank) */
5949 MPI_Irecv(itf->match_id,
5950 itf->size,
5951 CS_MPI_LNUM,
5952 itf->rank,
5953 itf->rank,
5954 ifs->comm,
5955 &(request[request_count++]));
5956
5957 #endif
5958
5959 j += itf->size;
5960
5961 }
5962
5963 #if defined(HAVE_MPI)
5964
5965 if (n_ranks > 1) {
5966
5967 for (i = 0, j = 0; i < ifs->size; i++) {
5968 cs_interface_t *itf = ifs->interfaces[i];
5969 if (itf->rank != local_rank)
5970 MPI_Isend(send_buf + j,
5971 itf->size,
5972 CS_MPI_LNUM,
5973 itf->rank,
5974 local_rank,
5975 ifs->comm,
5976 &(request[request_count++]));
5977 j += itf->size;
5978 }
5979
5980 MPI_Waitall(request_count, request, status);
5981
5982 BFT_FREE(request);
5983 BFT_FREE(status);
5984
5985 }
5986
5987 #endif /* defined(HAVE_MPI) */
5988
5989 BFT_FREE(send_buf);
5990 }
5991
5992 /*----------------------------------------------------------------------------*/
5993 /*!
5994 * \brief Free matching element id information of an interface set.
5995 *
5996 * This information is used by calls to cs_interface_get_match_ids(),
5997 * and may be defined using cs_interface_set_add_match_ids().
5998 *
5999 * \param[in] ifs pointer to interface set structure
6000 */
6001 /*----------------------------------------------------------------------------*/
6002
6003 void
cs_interface_set_free_match_ids(cs_interface_set_t * ifs)6004 cs_interface_set_free_match_ids(cs_interface_set_t *ifs)
6005 {
6006 assert(ifs != NULL);
6007
6008 if (ifs->match_id_rc > 0)
6009 ifs->match_id_rc -= 1;
6010
6011 if (ifs->match_id_rc > 0)
6012 return;
6013
6014 for (int i = 0; i < ifs->size; i++) {
6015 cs_interface_t *itf = ifs->interfaces[i];
6016 assert(itf->send_order != NULL || itf->size == 0);
6017 BFT_FREE(itf->match_id);
6018 }
6019 }
6020
6021 /*----------------------------------------------------------------------------*/
6022 /*!
6023 * \brief Tag mutiple elements of local interface with a given values.
6024 *
6025 * This is effective only on an interface matching the current rank,
6026 * and when multiple (periodic) instances of a given element appear on that
6027 * rank, al instances except the first are tagged with the chosen value.
6028 *
6029 * \param[in] ifs pointer to interface set structure
6030 * \param[in] periodicity periodicity information (NULL if none)
6031 * \param[in] tr_ignore if > 0, ignore periodicity with rotation;
6032 * if > 1, ignore all periodic transforms
6033 * \param[in] tag_value tag to assign
6034 * \param[in, out] tag global tag array for elements
6035 */
6036 /*----------------------------------------------------------------------------*/
6037
6038 void
cs_interface_tag_local_matches(const cs_interface_t * itf,const fvm_periodicity_t * periodicity,int tr_ignore,cs_gnum_t tag_value,cs_gnum_t * tag)6039 cs_interface_tag_local_matches(const cs_interface_t *itf,
6040 const fvm_periodicity_t *periodicity,
6041 int tr_ignore,
6042 cs_gnum_t tag_value,
6043 cs_gnum_t *tag)
6044 {
6045 int l_rank = CS_MAX(cs_glob_rank_id, 0);
6046
6047 if (itf->rank != l_rank)
6048 return;
6049
6050 /* Build temporary local match id */
6051
6052 cs_lnum_t *match_id;
6053 BFT_MALLOC(match_id, itf->size, cs_lnum_t);
6054
6055 for (cs_lnum_t i = 0; i < itf->size; i++)
6056 match_id[i] = itf->elt_id[itf->send_order[i]];
6057
6058 /* Also filter by transform type if needed */
6059
6060 fvm_periodicity_type_t p_type_max = FVM_PERIODICITY_MIXED;
6061 int n_tr_max = itf->tr_index_size - 2;
6062
6063 assert(n_tr_max == fvm_periodicity_get_n_transforms(periodicity));
6064
6065 if (tr_ignore == 1)
6066 p_type_max = FVM_PERIODICITY_TRANSLATION;
6067 else if (tr_ignore == 2)
6068 p_type_max = FVM_PERIODICITY_NULL;
6069
6070 const cs_lnum_t *tr_index = itf->tr_index;
6071
6072 for (int tr_id = 0; tr_id < n_tr_max; tr_id++) {
6073
6074 if (fvm_periodicity_get_type(periodicity, tr_id) > p_type_max)
6075 continue;
6076
6077 cs_lnum_t s_id = tr_index[tr_id+1];
6078 cs_lnum_t e_id = tr_index[tr_id+2];
6079
6080 for (cs_lnum_t j = s_id; j < e_id; j++) {
6081 cs_lnum_t k = CS_MAX(itf->elt_id[j], match_id[j]);
6082 tag[k] = tag_value;
6083 }
6084
6085 }
6086
6087 BFT_FREE(match_id);
6088 }
6089
6090 /*----------------------------------------------------------------------------*/
6091 /*!
6092 * \brief Dump printout of an interface list.
6093 *
6094 * \param[in] ifs pointer to structure that should be dumped
6095 */
6096 /*----------------------------------------------------------------------------*/
6097
6098 void
cs_interface_set_dump(const cs_interface_set_t * ifs)6099 cs_interface_set_dump(const cs_interface_set_t *ifs)
6100 {
6101 int i;
6102 cs_lnum_t j;
6103
6104 /* Dump local info */
6105
6106 if (ifs == NULL) {
6107 bft_printf(" interface list: nil\n");
6108 return;
6109 }
6110
6111 bft_printf(" interface list: %p\n"
6112 " n interfaces: %d\n",
6113 (const void *)ifs, ifs->size);
6114
6115 for (i = 0, j = 0; i < ifs->size; i++) {
6116 bft_printf("\n interface %d:\n", i);
6117 _cs_interface_dump(ifs->interfaces[i]);
6118 j += (ifs->interfaces[i])->size;
6119 }
6120
6121 if (ifs->periodicity != NULL)
6122 bft_printf("\n periodicity %p:\n", (const void *)(ifs->periodicity));
6123 }
6124
6125 /*----------------------------------------------------------------------------*/
6126
6127 END_C_DECLS
6128