1 /*============================================================================
2 * Determine and update geometrical neighborhood information.
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 #include "cs_defs.h"
28
29 /*----------------------------------------------------------------------------
30 * Standard C library headers
31 *----------------------------------------------------------------------------*/
32
33 #include <assert.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <string.h>
37
38 /*----------------------------------------------------------------------------
39 * Local headers
40 *----------------------------------------------------------------------------*/
41
42 #include "bft_mem.h"
43 #include "bft_printf.h"
44
45 #include "fvm_box.h"
46 #include "fvm_box_tree.h"
47
48 #include "cs_order.h"
49 #include "cs_part_to_block.h"
50 #include "cs_timer.h"
51
52 /*----------------------------------------------------------------------------
53 * Header for the current file
54 *----------------------------------------------------------------------------*/
55
56 #include "fvm_neighborhood.h"
57
58 #include "cs_all_to_all.h"
59
60 /*----------------------------------------------------------------------------*/
61
62 BEGIN_C_DECLS
63
64 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
65
66 /*=============================================================================
67 * Local Macro and Type definitions
68 *============================================================================*/
69
70 /* Neighborhood statistics (using box tree) */
71 /*------------------------------------------*/
72
73 typedef struct {
74
75 int dim; /* Layout dimension */
76
77 /* The following fields have 3 global values:
78 mean on ranks, minimum on ranks, and maximum on ranks */
79
80 int depth[3]; /* Tree depth */
81 cs_lnum_t n_leaves[3]; /* Number of leaves */
82 cs_lnum_t n_boxes[3]; /* Number of associated boxes */
83 cs_lnum_t n_threshold_leaves[3]; /* Number of leaves over threshold */
84 cs_lnum_t n_leaf_boxes[3]; /* Number of boxes per leaf */
85 size_t mem_used[3]; /* Memory used */
86 size_t mem_required[3]; /* Memory temporarily required */
87
88 } _box_tree_stats_t;
89
90 /* Main neighborhood structure */
91 /*-----------------------------*/
92
93 struct _fvm_neighborhood_t {
94
95 cs_lnum_t n_elts; /* Number of elements */
96
97 cs_gnum_t *elt_num; /* Global numbers associated with
98 elements in local block
99 (size: n_elts) */
100 cs_lnum_t *neighbor_index; /* Start index of neighbors
101 (size: n_elts + 1) */
102 cs_gnum_t *neighbor_num; /* Global element neighbor numbers
103 (size: neighbor_index[n_elts]) */
104
105 #if defined(HAVE_MPI)
106 MPI_Comm comm; /* Associated MPI communicator */
107 #endif
108
109 /* Algorithm-related options */
110
111 int max_tree_depth; /* Maximum search tree depth */
112 int leaf_threshold; /* Maximum number of boxes which can
113 be related to a leaf of the tree if
114 level < max_tree_depth */
115 float max_box_ratio; /* Stop adding levels to tree when
116 ( n_linked_boxes
117 > max_box_ratio*n_initial_boxes) */
118 float max_box_ratio_distrib; /* In parallel, max_box_ratio for
119 initial coarse tree used for
120 distribution */
121
122 _box_tree_stats_t bt_stats; /* Statistics associated with the
123 box-trees used for search */
124
125 /* Timings */
126
127 double cpu_time[2]; /* CPU time for tree construction and query */
128 double wtime[2]; /* Wall clock time for tree construction and query */
129
130 };
131
132 /*=============================================================================
133 * Static global variables
134 *============================================================================*/
135
136 /*============================================================================
137 * Private function definitions
138 *============================================================================*/
139
140 /*----------------------------------------------------------------------------
141 * Initialize box_tree statistics
142 *
143 * parameters:
144 * bts --> pointer to box tree statistics structure
145 *---------------------------------------------------------------------------*/
146
147 static void
_init_bt_statistics(_box_tree_stats_t * bts)148 _init_bt_statistics(_box_tree_stats_t *bts)
149 {
150 size_t i;
151
152 assert(bts != NULL);
153
154 bts->dim = 0;
155
156 for (i = 0; i < 3; i++) {
157 bts->depth[i] = 0;
158 bts->n_leaves[i] = 0;
159 bts->n_boxes[i] = 0;
160 bts->n_threshold_leaves[i] = 0;
161 bts->n_leaf_boxes[i] = 0;
162 bts->mem_used[i] = 0;
163 bts->mem_required[i] = 0;
164 }
165 }
166 /*----------------------------------------------------------------------------
167 * Update box-tree statistics.
168 *
169 * For most fields, we replace previous values with the current ones.
170 *
171 * For memory required, we are interested in the maximum values over time
172 * (i.e. algorthm steps); this is the case even for the minimal memory
173 * required, we is thus the time maximum of the rank minimum.
174 *
175 * parameters:
176 * bts <-> pointer to box tree statistics structure
177 * boxes <-- pointer to box tree structure
178 *---------------------------------------------------------------------------*/
179
180 static void
_update_bt_statistics(_box_tree_stats_t * bts,const fvm_box_tree_t * bt)181 _update_bt_statistics(_box_tree_stats_t *bts,
182 const fvm_box_tree_t *bt)
183 {
184 int dim;
185 size_t i;
186 size_t mem_required[3];
187
188 assert(bts != NULL);
189
190 dim = fvm_box_tree_get_stats(bt,
191 bts->depth,
192 bts->n_leaves,
193 bts->n_boxes,
194 bts->n_threshold_leaves,
195 bts->n_leaf_boxes,
196 bts->mem_used,
197 mem_required);
198
199 bts->dim = dim;
200
201 for (i = 0; i < 3; i++)
202 bts->mem_required[i] = CS_MAX(bts->mem_required[i], mem_required[i]);
203 }
204
205 #if defined(HAVE_MPI)
206
207 /*----------------------------------------------------------------------------
208 * Distribute bounding boxes over the ranks according to a Morton encoding
209 * index. Try to get a well-balanced distribution and spatially coherent.
210 *
211 * parameters:
212 * n <-> pointer to neighborhood management structure
213 * boxes <-> box set to redistribute
214 *---------------------------------------------------------------------------*/
215
216 static void
_redistribute_boxes(fvm_neighborhood_t * n,fvm_box_set_t * boxes)217 _redistribute_boxes(fvm_neighborhood_t *n,
218 fvm_box_set_t *boxes)
219 {
220 fvm_box_tree_t *coarse_tree = NULL;
221 fvm_box_distrib_t *distrib = NULL;
222
223 /* Sanity checks */
224
225 assert(boxes != NULL);
226
227 coarse_tree = fvm_box_tree_create(n->max_tree_depth,
228 n->leaf_threshold,
229 n->max_box_ratio_distrib);
230
231 /* Build a tree and associate boxes */
232
233 fvm_box_tree_set_boxes(coarse_tree,
234 boxes,
235 FVM_BOX_TREE_SYNC_LEVEL);
236
237 _update_bt_statistics(&(n->bt_stats), coarse_tree);
238
239 /*
240 Compute an index based on Morton encoding to ensure a good distribution
241 of bounding boxes among the ranks.
242 */
243
244 distrib = fvm_box_tree_get_distrib(coarse_tree, boxes);
245
246 fvm_box_tree_destroy(&coarse_tree);
247
248 #if 0 && defined(DEBUG) && !defined(NDEBUG)
249 fvm_box_distrib_dump_statistics(distrib, n->comm);
250 #endif
251
252 /* Define a new distribution of boxes according to the Morton
253 encoding index */
254
255 fvm_box_set_redistribute(distrib, boxes);
256
257 /* Delete intermediate structures */
258
259 fvm_box_distrib_destroy(&distrib);
260 }
261
262 #endif /* defined(HAVE_MPI) */
263
264 /*----------------------------------------------------------------------------
265 * Sort an array "a" between its left bound "l" and its right bound "r"
266 * using a shell sort (Knuth algorithm).
267 *
268 * parameters:
269 * l <-- left bound
270 * r <-- right bound
271 * a <-> array to sort
272 *---------------------------------------------------------------------------*/
273
274 static inline void
_gnum_shellsort(cs_lnum_t l,cs_lnum_t r,cs_gnum_t a[])275 _gnum_shellsort(cs_lnum_t l,
276 cs_lnum_t r,
277 cs_gnum_t a[])
278 {
279 cs_lnum_t i, j, h;
280
281 /* Compute stride */
282 for (h = 1; h <= (r-l)/9; h = 3*h+1);
283
284 /* Sort array */
285 for (; h > 0; h /= 3) {
286
287 for (i = l+h; i < r; i++) {
288
289 cs_gnum_t v = a[i];
290
291 j = i;
292 while ((j >= l+h) && (v < a[j-h])) {
293 a[j] = a[j-h];
294 j -= h;
295 }
296 a[j] = v;
297
298 } /* Loop on array elements */
299
300 } /* End of loop on stride */
301 }
302
303 /*----------------------------------------------------------------------------
304 * Remove multiple element neighbor occurences
305 *
306 * parameters:
307 * n <-> pointer to neighborhood management structure
308 *----------------------------------------------------------------------------*/
309
310 static void
_clean_neighbor_nums(fvm_neighborhood_t * n)311 _clean_neighbor_nums(fvm_neighborhood_t *n)
312 {
313 cs_lnum_t i, j, start_id, end_id, saved_id, n_elts, n_neighbors;
314
315 cs_lnum_t n_count = 0;
316
317 assert(n != NULL);
318
319 if (n->n_elts == 0)
320 return;
321
322 n_elts = n->n_elts;
323 n_neighbors = n->neighbor_index[n_elts];
324
325 /* Remove redundant elements */
326
327 saved_id = n->neighbor_index[0];
328
329 for (i = 0; i < n_elts; i++) {
330
331 start_id = saved_id;
332 end_id = n->neighbor_index[i+1];
333
334 if (end_id - start_id > 0) {
335
336 _gnum_shellsort(start_id, end_id, n->neighbor_num);
337
338 n->neighbor_num[n_count++] = n->neighbor_num[start_id];
339
340 for (j = start_id + 1; j < end_id; j++) {
341 if (n->neighbor_num[j] != n->neighbor_num[j-1])
342 n->neighbor_num[n_count++] = n->neighbor_num[j];
343 }
344
345 }
346
347 saved_id = end_id;
348 n->neighbor_index[i+1] = n_count;
349
350 } /* End of loop on elements */
351
352 if (n_count < n_neighbors)
353 BFT_REALLOC(n->neighbor_num, n_count, cs_gnum_t);
354 }
355
356 /*----------------------------------------------------------------------------
357 * Order a neighborhood based on the global numbering of elements.
358 *
359 * parameters:
360 * n <-> pointer to neighborhood management structure
361 *----------------------------------------------------------------------------*/
362
363 static void
_order_neighborhood(fvm_neighborhood_t * n)364 _order_neighborhood(fvm_neighborhood_t *n)
365 {
366 cs_lnum_t i, j, k, order_id, shift, e_count;
367 cs_lnum_t n_elts, n_neighbors, n_elt_neighbors;
368 cs_gnum_t prev_num, cur_num;
369
370 cs_lnum_t *order = NULL, *old_index = NULL;
371 cs_gnum_t *old_e_num = NULL, *old_n_num = NULL;
372
373 assert(n != NULL);
374
375 if (n->n_elts == 0)
376 return;
377
378 n_elts = n->n_elts;
379 n_neighbors = n->neighbor_index[n_elts];
380
381 BFT_MALLOC(order, n_elts, cs_lnum_t);
382 BFT_MALLOC(old_e_num, n_elts, cs_gnum_t);
383 BFT_MALLOC(old_index, n_elts + 1, cs_lnum_t);
384 BFT_MALLOC(old_n_num, n_neighbors, cs_gnum_t);
385
386 memcpy(old_e_num, n->elt_num, n_elts*sizeof(cs_gnum_t));
387 memcpy(old_index, n->neighbor_index, (n_elts + 1)*sizeof(cs_lnum_t));
388 memcpy(old_n_num, n->neighbor_num, n_neighbors*sizeof(cs_gnum_t));
389
390 /* Order elt_num */
391
392 cs_order_gnum_allocated(NULL, old_e_num, order, n_elts);
393
394 /* Reshape according to the new ordering */
395
396 /* Add first element */
397
398 order_id = order[0];
399 shift = 0;
400
401 n->elt_num[0] = old_e_num[order_id];
402 prev_num = n->elt_num[0];
403
404 n->neighbor_index[0] = 0;
405 n->neighbor_index[1] = old_index[order_id+1] - old_index[order_id];
406
407 /* Loop on second-to last elements, merging data if an element has
408 already appeared */
409
410 for (i = 1, e_count = 1; i < n_elts; i++) {
411
412 order_id = order[i];
413
414 n_elt_neighbors = old_index[order_id+1] - old_index[order_id];
415
416 shift = n->neighbor_index[i];
417
418 cur_num = old_e_num[order_id];
419
420 if (cur_num != prev_num) {
421 n->elt_num[e_count] = cur_num;
422 n->neighbor_index[e_count+1] = ( n->neighbor_index[e_count]
423 + n_elt_neighbors);
424 e_count += 1;
425 prev_num = cur_num;
426 }
427 else
428 n->neighbor_index[e_count] += n_elt_neighbors;
429
430 for (j = old_index[order_id], k = 0; k < n_elt_neighbors; j++, k++)
431 n->neighbor_num[shift + k] = old_n_num[j];
432
433 } /* End of loop on elements */
434
435 assert(n->neighbor_index[e_count] == n_neighbors);
436
437 /* Free temporary memory */
438
439 BFT_FREE(order);
440 BFT_FREE(old_e_num);
441 BFT_FREE(old_index);
442 BFT_FREE(old_n_num);
443 }
444
445 #if defined(HAVE_MPI)
446
447 /*----------------------------------------------------------------------------
448 * Synchronize a neighborhood management structure and distribute the
449 * resulting data over ranks by block.
450 *
451 * parameters:
452 * n <-- pointer to neighborhood management structure
453 * n_g_elts <-- global number of elements
454 *----------------------------------------------------------------------------*/
455
456 static void
_sync_by_block(fvm_neighborhood_t * n,cs_gnum_t n_g_elts)457 _sync_by_block(fvm_neighborhood_t *n,
458 cs_gnum_t n_g_elts)
459 {
460 assert(n != NULL);
461
462 if (n_g_elts == 0 || n->comm == MPI_COMM_NULL)
463 return;
464
465 /* Initialization */
466
467 int rank_id, n_ranks;
468
469 MPI_Comm_rank(n->comm, &rank_id);
470 MPI_Comm_size(n->comm, &n_ranks);
471
472 cs_block_dist_info_t bi = cs_block_dist_compute_sizes(rank_id,
473 n_ranks,
474 0,
475 0,
476 n_g_elts);
477
478 cs_all_to_all_t *d
479 = cs_all_to_all_create_from_block(n->n_elts,
480 0, /* flags */
481 n->elt_num,
482 bi,
483 n->comm);
484
485 cs_gnum_t *send_meta;
486 BFT_MALLOC(send_meta, n->n_elts*2, cs_gnum_t);
487
488 for (cs_lnum_t i = 0; i < n->n_elts; i++) {
489 send_meta[i*2] = n->elt_num[i];
490 send_meta[i*2+1] = n->neighbor_index[i+1] - n->neighbor_index[i];
491 }
492
493 cs_gnum_t *recv_meta
494 = cs_all_to_all_copy_array(d,
495 CS_GNUM_TYPE,
496 2,
497 false, /* reverse */
498 send_meta,
499 NULL);
500
501 BFT_FREE(send_meta);
502
503 cs_lnum_t n_recv = cs_all_to_all_n_elts_dest(d);
504
505 cs_gnum_t *recv_gnum;
506 BFT_MALLOC(recv_gnum, n_recv, cs_gnum_t);
507 cs_lnum_t *recv_index;
508 BFT_MALLOC(recv_index, n_recv+1, cs_lnum_t);
509
510 recv_index[0] = 0;
511 for (cs_lnum_t i = 0; i < n_recv; i++) {
512 recv_gnum[i] = recv_meta[i*2];
513 recv_index[i+1] = recv_index[i] + recv_meta[i*2+1];
514 }
515
516 BFT_FREE(recv_meta);
517
518 cs_gnum_t *recv_neighbor
519 = cs_all_to_all_copy_indexed(d,
520 CS_GNUM_TYPE,
521 false, /* reverse */
522 n->neighbor_index,
523 n->neighbor_num,
524 recv_index,
525 NULL);
526
527 /* Build arrays corresponding to block distribution of neighborhood */
528
529 n->n_elts = bi.gnum_range[1] - bi.gnum_range[0];
530
531 BFT_FREE(n->elt_num);
532 BFT_FREE(n->neighbor_index);
533 BFT_FREE(n->neighbor_num);
534
535 BFT_MALLOC(n->elt_num, n->n_elts, cs_gnum_t);
536 BFT_MALLOC(n->neighbor_index, n->n_elts + 1, cs_lnum_t);
537
538 for (cs_lnum_t i = 0; i < n->n_elts; i++) {
539 n->elt_num[i] = bi.gnum_range[0] + i;
540 n->neighbor_index[i] = 0;
541 }
542 n->neighbor_index[n->n_elts] = 0;
543
544 /* Count element neighbors in block distribution */
545
546 for (cs_lnum_t i = 0; i < n_recv; i++) {
547 cs_lnum_t elt_id = recv_gnum[i] - bi.gnum_range[0];
548 n->neighbor_index[elt_id + 1] += recv_index[i+1] - recv_index[i];
549 }
550
551 /* Transform element neighbor count to index */
552
553 n->neighbor_index[0] = 0;
554 for (cs_lnum_t i = 0; i < n->n_elts; i++)
555 n->neighbor_index[i+1] += n->neighbor_index[i];
556
557 BFT_MALLOC(n->neighbor_num, n->neighbor_index[n->n_elts], cs_gnum_t);
558
559 /* Fill element neighbors in block distribution */
560
561 cs_lnum_t *counter;
562 BFT_MALLOC(counter, n->n_elts, cs_lnum_t);
563
564 for (cs_lnum_t i = 0; i < n->n_elts; i++)
565 counter[i] = 0;
566
567 for (cs_lnum_t i = 0; i < n_recv; i++) {
568
569 cs_lnum_t elt_id = recv_gnum[i] - bi.gnum_range[0];
570
571 cs_lnum_t s_id = recv_index[i];
572 cs_lnum_t e_id = recv_index[i+1];
573
574 cs_lnum_t shift = n->neighbor_index[elt_id] + counter[elt_id];
575
576 for (cs_lnum_t j = s_id; j < e_id; j++)
577 n->neighbor_num[shift++] = recv_neighbor[j];
578
579 counter[elt_id] += e_id - s_id;
580
581 } /* End of loop on ranks */
582
583 BFT_FREE(recv_gnum);
584 BFT_FREE(counter);
585 BFT_FREE(recv_neighbor);
586 BFT_FREE(recv_index);
587
588 cs_all_to_all_destroy(&d);
589
590 /* Remove redundant data */
591
592 _clean_neighbor_nums(n);
593 }
594
595 #endif /* defined(HAVE_MPI) */
596
597 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
598
599 /*============================================================================
600 * Public function definitions
601 *============================================================================*/
602
603 /*----------------------------------------------------------------------------
604 * Create a neighborhood_t structure and initialize it.
605 *
606 * parameters:
607 * comm <-- associated MPI communicator
608 *
609 * returns:
610 * pointer to an empty fvm_box_tree_t structure.
611 *----------------------------------------------------------------------------*/
612
613 #if defined(HAVE_MPI)
614 fvm_neighborhood_t *
fvm_neighborhood_create(MPI_Comm comm)615 fvm_neighborhood_create(MPI_Comm comm)
616 #else
617 fvm_neighborhood_t *
618 fvm_neighborhood_create(void)
619 #endif
620 {
621 double w_start, w_end, cpu_start, cpu_end;
622
623 fvm_neighborhood_t *n = NULL;
624
625 /* Timer start */
626
627 w_start = cs_timer_wtime();
628 cpu_start = cs_timer_cpu_time();
629
630 /* Allocate and initialize */
631
632 BFT_MALLOC(n, 1, fvm_neighborhood_t);
633
634 n->n_elts = 0;
635 n->elt_num = NULL;
636 n->neighbor_index = NULL;
637 n->neighbor_num = NULL;
638
639 #if defined(HAVE_MPI)
640 n->comm = comm;
641 #endif
642
643 /* Algorithm options */
644
645 n->max_tree_depth = 30; /* defaults */
646 n->leaf_threshold = 30;
647 n->max_box_ratio = 10.0;
648 n->max_box_ratio_distrib = 6.0;
649
650 _init_bt_statistics(&(n->bt_stats));
651
652 /* Timer end */
653
654 w_end = cs_timer_wtime();
655 cpu_end = cs_timer_cpu_time();
656
657 n->cpu_time[0] = cpu_end - cpu_start; /* build time */
658 n->wtime[0] = w_end - w_start;
659 n->cpu_time[1] = 0.0; /* query */
660 n->wtime[1] = 0.0;
661
662 /* Return structure */
663
664 return n;
665 }
666
667 /*----------------------------------------------------------------------------
668 * Destroy a neighborhood_t structure.
669 *
670 * parameters:
671 * n <-> pointer to pointer to fvm_neighborhood_t structure to destroy.
672 *----------------------------------------------------------------------------*/
673
674 void
fvm_neighborhood_destroy(fvm_neighborhood_t ** n)675 fvm_neighborhood_destroy(fvm_neighborhood_t **n)
676 {
677 if (n != NULL) {
678 fvm_neighborhood_t *_n = *n;
679 if (_n != NULL) {
680 if (_n->elt_num != NULL)
681 BFT_FREE(_n->elt_num);
682 if (_n->neighbor_index != NULL)
683 BFT_FREE(_n->neighbor_index);
684 if (_n->neighbor_num != NULL)
685 BFT_FREE(_n->neighbor_num);
686 }
687 BFT_FREE(*n);
688 }
689 }
690
691 /*----------------------------------------------------------------------------
692 * Set non-default algorithm parameters for neighborhood management structure.
693 *
694 * parameters:
695 * n <-> pointer to neighborhood management structure
696 * max_tree_depth <-- maximum search tree depth
697 * leaf_threshold <-- maximum number of boxes which can be related to
698 * a leaf of the tree if level < max_tree_depth
699 * max_box_ratio <-- stop adding levels to tree when
700 * (n_linked_boxes > max_box_ratio*n_init_boxes)
701 * max_box_ratio_distrib <-- maximum box ratio when computing coarse
702 * tree prior to parallel distribution
703 *---------------------------------------------------------------------------*/
704
705 void
fvm_neighborhood_set_options(fvm_neighborhood_t * n,int max_tree_depth,int leaf_threshold,float max_box_ratio,float max_box_ratio_distrib)706 fvm_neighborhood_set_options(fvm_neighborhood_t *n,
707 int max_tree_depth,
708 int leaf_threshold,
709 float max_box_ratio,
710 float max_box_ratio_distrib)
711 {
712 if (n == NULL)
713 return;
714
715 n->max_tree_depth = max_tree_depth;
716 n->leaf_threshold = leaf_threshold;
717 n->max_box_ratio = max_box_ratio;
718 n->max_box_ratio_distrib = max_box_ratio_distrib;
719 }
720
721 /*----------------------------------------------------------------------------
722 * Retrieve pointers to of arrays from a neighborhood_t structure.
723 *
724 * Arrays remain the property of the neighborhood_t structure, and must not
725 * be modified by the caller.
726 *
727 * parameters:
728 * n <-> pointer to fvm_neighborhood_t structure.
729 * n_elts --> number of elements with neighbors in block
730 * associated with local rank
731 * elt_num --> global element numbers in local block (size: n_elts)
732 * neighbor_index --> start index of neighbors (size: n_elts + 1)
733 * neighbor_num --> global element neighbor numbers
734 * (size: neighbor_index[n_elts])
735 *----------------------------------------------------------------------------*/
736
737 void
fvm_neighborhood_get_data(const fvm_neighborhood_t * n,cs_lnum_t * n_elts,cs_gnum_t ** const elt_num,cs_lnum_t ** const neighbor_index,cs_gnum_t ** const neighbor_num)738 fvm_neighborhood_get_data(const fvm_neighborhood_t *n,
739 cs_lnum_t *n_elts,
740 cs_gnum_t **const elt_num,
741 cs_lnum_t **const neighbor_index,
742 cs_gnum_t **const neighbor_num)
743 {
744 if (n != NULL) {
745
746 if (n_elts != NULL)
747 *n_elts = n->n_elts;
748
749 if (elt_num != NULL)
750 *elt_num = n->elt_num;
751
752 if (neighbor_index != NULL)
753 *neighbor_index = n->neighbor_index;
754
755 if (neighbor_num != NULL)
756 *neighbor_num = n->neighbor_num;
757 }
758 }
759
760 /*----------------------------------------------------------------------------
761 * Transfer ownership of arrays from a neighborhood_t structure to the
762 * calling program.
763 *
764 * Arrays that are transferred are removed from the structure, so its
765 * use after calling this function is limited to querying timing information
766 * until a new neighborhood is computed.
767 *
768 * parameters:
769 * n <-> pointer to fvm_neighborhood_t structure.
770 * n_elts --> number of elements with neighbors in block
771 * associated with local rank
772 * elt_num --> global element numbers in local block (size: n_elts)
773 * neighbor_index --> start index of neighbors (size: n_elts + 1)
774 * neighbor_num --> global element neighbor numbers
775 * (size: neighbor_index[n_elts])
776 *----------------------------------------------------------------------------*/
777
778 void
fvm_neighborhood_transfer_data(fvm_neighborhood_t * n,cs_lnum_t * n_elts,cs_gnum_t ** elt_num,cs_lnum_t ** neighbor_index,cs_gnum_t ** neighbor_num)779 fvm_neighborhood_transfer_data(fvm_neighborhood_t *n,
780 cs_lnum_t *n_elts,
781 cs_gnum_t **elt_num,
782 cs_lnum_t **neighbor_index,
783 cs_gnum_t **neighbor_num)
784 {
785 if (n != NULL) {
786
787 if (n_elts != NULL)
788 *n_elts = n->n_elts;
789
790 if (elt_num != NULL) {
791 *elt_num = n->elt_num;
792 n->elt_num = NULL;
793 }
794 if (neighbor_index != NULL) {
795 *neighbor_index = n->neighbor_index;
796 n->neighbor_index = NULL;
797 }
798 if (neighbor_num != NULL) {
799 *neighbor_num = n->neighbor_num;
800 n->neighbor_num = NULL;
801 }
802 }
803 }
804
805 /*----------------------------------------------------------------------------
806 * Determine intersecting boxes.
807 *
808 * Box global numbers and extents may be either copied for the structure's
809 * internal use from the caller, or transferred to the neighborhood management
810 * structure: both the box_gnum and extents arguments have an "assigned"
811 * variant, in which case a pointer to a pointer is provided, and the
812 * argument's property is transferred to the neighborhod management structure.
813 * The unused variant of an argument should be set to NULL.
814 *
815 * Boxes may be distributed among processors, so their intersections are
816 * determined using a block distribution, and defined using their
817 * global numbers.
818 *
819 * All global numbers appearing in box_gnum[] will have a matching entry in
820 * the neighborhod structure. To remove global numbers entries with no
821 * neighbors from the structure, the fvm_neighborhood_prune() function
822 * may be used.
823 *
824 * parameters:
825 * n <-> pointer to neighborhood management structure
826 * dim <-- spatial dimension
827 * n_boxes <-- local number of boxes
828 * box_gnum <-- global numbering of boxes
829 * extents <-- coordinate extents (size: n_boxes*dim*2, as
830 * xmin1, ymin1, .. xmax1, ymax1, ..., xmin2, ...)
831 * box_gnum_assigned <-> as box_gnum, ownership transferred (NULL on return)
832 * extents_assigned <-> as extents, ownership transferred (NULL on return)
833 *---------------------------------------------------------------------------*/
834
835 void
fvm_neighborhood_by_boxes(fvm_neighborhood_t * n,int dim,cs_lnum_t n_boxes,const cs_gnum_t * box_gnum,const cs_coord_t * extents,cs_gnum_t ** box_gnum_assigned,cs_coord_t ** extents_assigned)836 fvm_neighborhood_by_boxes(fvm_neighborhood_t *n,
837 int dim,
838 cs_lnum_t n_boxes,
839 const cs_gnum_t *box_gnum,
840 const cs_coord_t *extents,
841 cs_gnum_t **box_gnum_assigned,
842 cs_coord_t **extents_assigned)
843 {
844 double clock_start, clock_end, cpu_start, cpu_end;
845
846 fvm_box_tree_t *bt = NULL;
847 fvm_box_set_t *boxes = NULL;
848
849 const cs_gnum_t *_box_gnum = box_gnum;
850 const cs_coord_t *_extents = extents;
851
852 int n_ranks = 1;
853
854 clock_start = cs_timer_wtime();
855 cpu_start = cs_timer_cpu_time();
856
857 /* Transfer data if necessary */
858
859 if (box_gnum_assigned != NULL)
860 _box_gnum = *box_gnum_assigned;
861 if (extents_assigned != NULL)
862 _extents = *extents_assigned;
863
864 /* Reset structure if necessary */
865
866 n->n_elts = 0;
867 if (n->elt_num != NULL)
868 BFT_FREE(n->elt_num);
869 if (n->neighbor_index != NULL)
870 BFT_FREE(n->neighbor_index);
871 if (n->neighbor_num != NULL)
872 BFT_FREE(n->neighbor_num);
873
874 /* Allocate fvm_box_set_t structure and initialize it */
875
876 #if defined(HAVE_MPI)
877
878 if (n->comm != MPI_COMM_NULL)
879 MPI_Comm_size(n->comm, &n_ranks);
880
881 boxes = fvm_box_set_create(dim,
882 1, /* normalize */
883 1, /* allow_projection */
884 n_boxes,
885 _box_gnum,
886 _extents,
887 n->comm);
888
889 if (n_ranks > 1)
890 _redistribute_boxes(n, boxes);
891
892 #else
893
894 boxes = fvm_box_set_create(dim,
895 1, /* normalize */
896 1, /* allow_projection */
897 n_boxes,
898 _box_gnum,
899 _extents);
900
901 #endif
902
903 /* Free transferred data if applicable */
904
905 if (box_gnum_assigned != NULL) {
906 _box_gnum = NULL;
907 BFT_FREE(*box_gnum_assigned);
908 }
909
910 if (extents_assigned != NULL) {
911 _extents = NULL;
912 BFT_FREE(*extents_assigned);
913 }
914
915 /* Build a tree structure and use it to order bounding boxes */
916
917 /* Create and initialize a box tree structure */
918
919 bt = fvm_box_tree_create(n->max_tree_depth,
920 n->leaf_threshold,
921 n->max_box_ratio);
922
923 /* Build a tree and put bounding boxes */
924
925 fvm_box_tree_set_boxes(bt,
926 boxes,
927 FVM_BOX_TREE_ASYNC_LEVEL);
928
929 _update_bt_statistics((&n->bt_stats), bt);
930
931 /* Update construction times. */
932
933 clock_end = cs_timer_wtime();
934 cpu_end = cs_timer_cpu_time();
935
936 n->cpu_time[0] = cpu_end - cpu_start;
937 n->wtime[0] = clock_end - clock_start;
938
939 clock_start = clock_end;
940 cpu_start = cpu_end;
941
942 /* Allocate structure to store intersections between boxes */
943
944 n->n_elts = fvm_box_set_get_size(boxes);
945
946 BFT_MALLOC(n->elt_num, n->n_elts, cs_gnum_t);
947 if (n->n_elts > 0)
948 memcpy(n->elt_num,
949 fvm_box_set_get_g_num(boxes),
950 n->n_elts*sizeof(cs_gnum_t));
951
952 fvm_box_tree_get_intersects(bt,
953 boxes,
954 &(n->neighbor_index),
955 &(n->neighbor_num));
956
957 #if 0 && defined(DEBUG) && !defined(NDEBUG)
958 fvm_box_tree_dump(bt);
959 fvm_box_set_dump(boxes, 1);
960 #endif
961
962 /* Destroy the associated box tree */
963
964 fvm_box_tree_destroy(&bt);
965
966 /* Compact intersections list, delete redundancies and order intersections */
967
968 _order_neighborhood(n);
969
970 #if defined(HAVE_MPI)
971
972 /* Synchronize list of intersections for each element of the list
973 and distribute it by block over the ranks */
974
975 if (n_ranks > 1)
976 _sync_by_block(n, fvm_box_set_get_global_size(boxes));
977
978 #endif /* HAVE_MPI */
979
980 /* Destroy the box set structures */
981
982 fvm_box_set_destroy(&boxes);
983
984 _clean_neighbor_nums(n);
985
986 /* Update query times. */
987
988 clock_end = cs_timer_wtime();
989 cpu_end = cs_timer_cpu_time();
990
991 n->cpu_time[1] = cpu_end - cpu_start;
992 n->wtime[1] = clock_end - clock_start;
993 }
994
995 /*----------------------------------------------------------------------------
996 * Prune a neighborhood (remove entries with no neighbors).
997 *
998 * parameters:
999 * n <-> pointer to neighborhood management structure
1000 *----------------------------------------------------------------------------*/
1001
1002 void
fvm_neighborhood_prune(fvm_neighborhood_t * n)1003 fvm_neighborhood_prune(fvm_neighborhood_t *n)
1004 {
1005 cs_lnum_t i, start_id, end_id, saved_id, n_elts;
1006
1007 cs_lnum_t e_count = 0;
1008
1009 assert(n != NULL);
1010
1011 if (n->n_elts == 0)
1012 return;
1013
1014 n_elts = n->n_elts;
1015
1016 /* Remove elements with no neighbors */
1017
1018 saved_id = n->neighbor_index[0];
1019
1020 for (i = 0; i < n_elts; i++) {
1021
1022 start_id = saved_id;
1023 end_id = n->neighbor_index[i+1];
1024
1025 if (end_id - start_id > 0) {
1026
1027 n->elt_num[e_count] = n->elt_num[i];
1028
1029 saved_id = end_id;
1030 n->neighbor_index[e_count+1] = end_id;
1031
1032 e_count += 1;
1033
1034 }
1035
1036 }
1037
1038 if (e_count < n_elts) {
1039 n->n_elts = e_count;
1040 BFT_REALLOC(n->elt_num, e_count, cs_gnum_t);
1041 BFT_REALLOC(n->neighbor_index, e_count + 1, cs_lnum_t);
1042 }
1043 }
1044
1045 /*----------------------------------------------------------------------------
1046 * Get global statistics relative to the search structures used
1047 * by fvm_neighborhood_by_boxes().
1048 *
1049 * All fields returned are optional: if their argument is set to NULL,
1050 * the corresponding information will not be returned.
1051 *
1052 * For each field not set to NULL, 3 values are always returned:
1053 * the mean on all ranks (rounded to the closest integer), the minimum,
1054 * and the maximum value respectively.
1055 *
1056 * In serial mode, the mean, minimum, and maximum will be identical for most
1057 * fields, but all 3 values are returned nonetheless.
1058 *
1059 * Note that the final memory use is only relative to the final search
1060 * structure, and the comparison of the total (or average) with the minima
1061 * and maxima may give an indication on load balancing.
1062
1063 * The mem_required field only takes into account the theoretical maximum
1064 * memory footprint of the main search structure during its construction phase,
1065 * and of that of temporary structures before load balancing in parallel mode,
1066 * but does not include minor work arrays or buffers used during the algorithm.
1067 *
1068 * Neither of the 2 memory fields include the footprint of the arrays
1069 * containing the query results.
1070 *
1071 * parameters:
1072 * n <-- pointer to neighborhood management structure
1073 * dim --> layout dimension (3, 2, or 1)
1074 * depth --> tree depth (max level used)
1075 * n_leaves --> number of leaves in the tree
1076 * n_boxes --> number of boxes in the tree
1077 * n_threshold_leaves --> number of leaves where n_boxes > threshold
1078 * n_leaf_boxes --> number of boxes for a leaf
1079 * mem_final --> theoretical memory for final search structure
1080 * mem_required --> theoretical maximum memory for main structures
1081 * used during the algorithm
1082 *
1083 * returns:
1084 * the spatial dimension associated with the box tree layout (3, 2, or 1)
1085 *----------------------------------------------------------------------------*/
1086
1087 int
fvm_neighborhood_get_box_stats(const fvm_neighborhood_t * n,int depth[3],cs_lnum_t n_leaves[3],cs_lnum_t n_boxes[3],cs_lnum_t n_threshold_leaves[3],cs_lnum_t n_leaf_boxes[3],size_t mem_final[3],size_t mem_required[3])1088 fvm_neighborhood_get_box_stats(const fvm_neighborhood_t *n,
1089 int depth[3],
1090 cs_lnum_t n_leaves[3],
1091 cs_lnum_t n_boxes[3],
1092 cs_lnum_t n_threshold_leaves[3],
1093 cs_lnum_t n_leaf_boxes[3],
1094 size_t mem_final[3],
1095 size_t mem_required[3])
1096 {
1097 size_t i;
1098
1099 if (n == NULL)
1100 return 0;
1101
1102 for (i = 0; i < 3; i++) {
1103
1104 if (depth != NULL)
1105 depth[i] = n->bt_stats.depth[i];
1106
1107 if (n_leaves != NULL)
1108 n_leaves[i] = n->bt_stats.n_leaves[i];
1109
1110 if (n_boxes != NULL)
1111 n_boxes[i] = n->bt_stats.n_boxes[i];
1112
1113 if (n_threshold_leaves != NULL)
1114 n_threshold_leaves[i] = n->bt_stats.n_threshold_leaves[i];
1115
1116 if (n_leaf_boxes != NULL)
1117 n_leaf_boxes[i] = n->bt_stats.n_leaf_boxes[i];
1118
1119 if (mem_final != NULL)
1120 mem_final[i] = n->bt_stats.mem_used[i];
1121
1122 if (mem_required != NULL)
1123 mem_required[i] = n->bt_stats.mem_required[i];
1124 }
1125 return n->bt_stats.dim;
1126 }
1127
1128 /*----------------------------------------------------------------------------
1129 * Return timing information.
1130 *
1131 * parameters:
1132 * n <-- pointer to neighborhood management structure
1133 * build_wtime --> initialization Wall-clock time (or NULL)
1134 * build_cpu_time --> initialization CPU time (or NULL)
1135 * query_wtime --> query Wall-clock time (or NULL)
1136 * query_cpu_time --> query CPU time (or NULL)
1137 *----------------------------------------------------------------------------*/
1138
1139 void
fvm_neighborhood_get_times(const fvm_neighborhood_t * n,double * build_wtime,double * build_cpu_time,double * query_wtime,double * query_cpu_time)1140 fvm_neighborhood_get_times(const fvm_neighborhood_t *n,
1141 double *build_wtime,
1142 double *build_cpu_time,
1143 double *query_wtime,
1144 double *query_cpu_time)
1145 {
1146 if (n == NULL)
1147 return;
1148
1149 if (build_wtime != NULL)
1150 *build_wtime = n->wtime[0];
1151 if (build_cpu_time != NULL)
1152 *build_cpu_time = n->cpu_time[0];
1153
1154 if (query_wtime != NULL)
1155 *query_wtime = n->wtime[1];
1156 if (query_cpu_time != NULL)
1157 *query_cpu_time = n->cpu_time[1];
1158 }
1159
1160 /*----------------------------------------------------------------------------
1161 * Dump a neighborhood management structure.
1162 *
1163 * parameters:
1164 * n <-- pointer to neighborhood management structure
1165 *----------------------------------------------------------------------------*/
1166
1167 void
fvm_neighborhood_dump(const fvm_neighborhood_t * n)1168 fvm_neighborhood_dump(const fvm_neighborhood_t *n)
1169 {
1170 cs_lnum_t i, j;
1171
1172 bft_printf("\n"
1173 "Neighborhood information: %p\n\n", (const void *)n);
1174
1175 if (n == NULL)
1176 return;
1177
1178 bft_printf("number of elements: %10d\n"
1179 "list size: %10d\n\n",
1180 (int)(n->n_elts), (int)(n->neighbor_index[n->n_elts]));
1181
1182 bft_printf("max tree depth: %d\n"
1183 "leaf threshold: %d\n"
1184 "max box ratio %f\n\n",
1185 n->max_tree_depth, n->leaf_threshold, n->max_box_ratio);
1186
1187 #if defined(HAVE_MPI)
1188 if (n->comm != MPI_COMM_NULL)
1189 bft_printf("\n"
1190 "Associated MPI communicator: %ld\n",
1191 (long)(n->comm));
1192 #endif
1193
1194 bft_printf("CPU time: %f construction, %f query\n"
1195 "Wall-clock time: %f construction, %f query\n\n",
1196 n->cpu_time[0], n->cpu_time[1],
1197 n->wtime[0], n->wtime[1]);
1198
1199 for (i = 0; i < n->n_elts; i++) {
1200
1201 int n_neighbors = (n->neighbor_index[i+1] - n->neighbor_index[i]);
1202
1203 bft_printf("global num.: %10llu | n_neighbors : %3d |",
1204 (unsigned long long)(n->elt_num[i]), n_neighbors);
1205
1206 for (j = n->neighbor_index[i]; j < n->neighbor_index[i+1]; j++)
1207 bft_printf(" %10llu ", (unsigned long long)(n->neighbor_num[j]));
1208 bft_printf("\n");
1209
1210 }
1211
1212 bft_printf_flush();
1213 }
1214
1215 /*----------------------------------------------------------------------------*/
1216
1217 END_C_DECLS
1218