1 /*============================================================================
2  * Determine and update geometrical neighborhood information.
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------
30  * Standard C library headers
31  *----------------------------------------------------------------------------*/
32 
33 #include <assert.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <string.h>
37 
38 /*----------------------------------------------------------------------------
39  * Local headers
40  *----------------------------------------------------------------------------*/
41 
42 #include "bft_mem.h"
43 #include "bft_printf.h"
44 
45 #include "fvm_box.h"
46 #include "fvm_box_tree.h"
47 
48 #include "cs_order.h"
49 #include "cs_part_to_block.h"
50 #include "cs_timer.h"
51 
52 /*----------------------------------------------------------------------------
53  * Header for the current file
54  *----------------------------------------------------------------------------*/
55 
56 #include "fvm_neighborhood.h"
57 
58 #include "cs_all_to_all.h"
59 
60 /*----------------------------------------------------------------------------*/
61 
62 BEGIN_C_DECLS
63 
64 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
65 
66 /*=============================================================================
67  * Local Macro and Type definitions
68  *============================================================================*/
69 
70 /* Neighborhood statistics (using box tree) */
71 /*------------------------------------------*/
72 
73 typedef struct {
74 
75   int         dim;                     /* Layout dimension */
76 
77   /* The following fields have 3 global values:
78      mean on ranks, minimum on ranks, and maximum on ranks */
79 
80   int         depth[3];                /* Tree depth */
81   cs_lnum_t   n_leaves[3];             /* Number of leaves */
82   cs_lnum_t   n_boxes[3];              /* Number of associated boxes */
83   cs_lnum_t   n_threshold_leaves[3];   /* Number of leaves over threshold */
84   cs_lnum_t   n_leaf_boxes[3];         /* Number of boxes per leaf */
85   size_t      mem_used[3];             /* Memory used */
86   size_t      mem_required[3];         /* Memory temporarily required */
87 
88 } _box_tree_stats_t;
89 
90 /* Main neighborhood structure */
91 /*-----------------------------*/
92 
93 struct _fvm_neighborhood_t {
94 
95   cs_lnum_t         n_elts;          /* Number of elements */
96 
97   cs_gnum_t        *elt_num;         /* Global numbers associated with
98                                         elements in local block
99                                         (size: n_elts) */
100   cs_lnum_t        *neighbor_index;  /* Start index of neighbors
101                                         (size: n_elts + 1) */
102   cs_gnum_t        *neighbor_num;    /* Global element neighbor numbers
103                                         (size: neighbor_index[n_elts]) */
104 
105 #if defined(HAVE_MPI)
106   MPI_Comm  comm;                    /* Associated MPI communicator */
107 #endif
108 
109   /* Algorithm-related options */
110 
111   int  max_tree_depth;               /* Maximum search tree depth */
112   int  leaf_threshold;               /* Maximum number of boxes which can
113                                         be related to a leaf of the tree if
114                                         level < max_tree_depth */
115   float  max_box_ratio;              /* Stop adding levels to tree when
116                                         (  n_linked_boxes
117                                          > max_box_ratio*n_initial_boxes) */
118   float  max_box_ratio_distrib;      /* In parallel, max_box_ratio for
119                                         initial coarse tree used for
120                                         distribution */
121 
122   _box_tree_stats_t  bt_stats;       /* Statistics associated with the
123                                         box-trees used for search */
124 
125   /* Timings */
126 
127   double  cpu_time[2];   /* CPU time for tree construction and query */
128   double  wtime[2];      /* Wall clock time for tree construction and query */
129 
130 };
131 
132 /*=============================================================================
133  * Static global variables
134  *============================================================================*/
135 
136 /*============================================================================
137  * Private function definitions
138  *============================================================================*/
139 
140 /*----------------------------------------------------------------------------
141  * Initialize box_tree statistics
142  *
143  * parameters:
144  *   bts --> pointer to box tree statistics structure
145  *---------------------------------------------------------------------------*/
146 
147 static void
_init_bt_statistics(_box_tree_stats_t * bts)148 _init_bt_statistics(_box_tree_stats_t  *bts)
149 {
150   size_t i;
151 
152   assert(bts != NULL);
153 
154   bts->dim = 0;
155 
156   for (i = 0; i < 3; i++) {
157     bts->depth[i] = 0;
158     bts->n_leaves[i] = 0;
159     bts->n_boxes[i] = 0;
160     bts->n_threshold_leaves[i] = 0;
161     bts->n_leaf_boxes[i] = 0;
162     bts->mem_used[i] = 0;
163     bts->mem_required[i] = 0;
164   }
165 }
166 /*----------------------------------------------------------------------------
167  * Update box-tree statistics.
168  *
169  * For most fields, we replace previous values with the current ones.
170  *
171  * For memory required, we are interested in the maximum values over time
172  * (i.e. algorthm steps); this is the case even for the minimal memory
173  * required, we is thus the time maximum of the rank minimum.
174  *
175  * parameters:
176  *   bts   <-> pointer to box tree statistics structure
177  *   boxes <-- pointer to box tree structure
178  *---------------------------------------------------------------------------*/
179 
180 static void
_update_bt_statistics(_box_tree_stats_t * bts,const fvm_box_tree_t * bt)181 _update_bt_statistics(_box_tree_stats_t     *bts,
182                       const fvm_box_tree_t  *bt)
183 {
184   int dim;
185   size_t i;
186   size_t mem_required[3];
187 
188   assert(bts != NULL);
189 
190   dim = fvm_box_tree_get_stats(bt,
191                                bts->depth,
192                                bts->n_leaves,
193                                bts->n_boxes,
194                                bts->n_threshold_leaves,
195                                bts->n_leaf_boxes,
196                                bts->mem_used,
197                                mem_required);
198 
199   bts->dim = dim;
200 
201   for (i = 0; i < 3; i++)
202     bts->mem_required[i] = CS_MAX(bts->mem_required[i], mem_required[i]);
203 }
204 
205 #if defined(HAVE_MPI)
206 
207 /*----------------------------------------------------------------------------
208  * Distribute bounding boxes over the ranks according to a Morton encoding
209  * index. Try to get a well-balanced distribution and spatially coherent.
210  *
211  * parameters:
212  *   n     <-> pointer to neighborhood management structure
213  *   boxes <-> box set to redistribute
214  *---------------------------------------------------------------------------*/
215 
216 static void
_redistribute_boxes(fvm_neighborhood_t * n,fvm_box_set_t * boxes)217 _redistribute_boxes(fvm_neighborhood_t  *n,
218                     fvm_box_set_t       *boxes)
219 {
220   fvm_box_tree_t  *coarse_tree = NULL;
221   fvm_box_distrib_t  *distrib = NULL;
222 
223   /* Sanity checks */
224 
225   assert(boxes != NULL);
226 
227   coarse_tree = fvm_box_tree_create(n->max_tree_depth,
228                                     n->leaf_threshold,
229                                     n->max_box_ratio_distrib);
230 
231   /* Build a tree and associate boxes */
232 
233   fvm_box_tree_set_boxes(coarse_tree,
234                          boxes,
235                          FVM_BOX_TREE_SYNC_LEVEL);
236 
237   _update_bt_statistics(&(n->bt_stats), coarse_tree);
238 
239   /*
240     Compute an index based on Morton encoding to ensure a good distribution
241     of bounding boxes among the ranks.
242   */
243 
244   distrib = fvm_box_tree_get_distrib(coarse_tree, boxes);
245 
246   fvm_box_tree_destroy(&coarse_tree);
247 
248 #if 0 && defined(DEBUG) && !defined(NDEBUG)
249   fvm_box_distrib_dump_statistics(distrib, n->comm);
250 #endif
251 
252   /* Define a new distribution of boxes according to the Morton
253      encoding index */
254 
255   fvm_box_set_redistribute(distrib, boxes);
256 
257   /* Delete intermediate structures */
258 
259   fvm_box_distrib_destroy(&distrib);
260 }
261 
262 #endif /* defined(HAVE_MPI) */
263 
264 /*----------------------------------------------------------------------------
265  * Sort an array "a" between its left bound "l" and its right bound "r"
266  * using a shell sort (Knuth algorithm).
267  *
268  * parameters:
269  *   l <-- left bound
270  *   r <-- right bound
271  *   a <-> array to sort
272  *---------------------------------------------------------------------------*/
273 
274 static inline void
_gnum_shellsort(cs_lnum_t l,cs_lnum_t r,cs_gnum_t a[])275 _gnum_shellsort(cs_lnum_t   l,
276                 cs_lnum_t   r,
277                 cs_gnum_t   a[])
278 {
279   cs_lnum_t i, j, h;
280 
281   /* Compute stride */
282   for (h = 1; h <= (r-l)/9; h = 3*h+1);
283 
284   /* Sort array */
285   for (; h > 0; h /= 3) {
286 
287     for (i = l+h; i < r; i++) {
288 
289       cs_gnum_t   v = a[i];
290 
291       j = i;
292       while ((j >= l+h) && (v < a[j-h])) {
293         a[j] = a[j-h];
294         j -= h;
295       }
296       a[j] = v;
297 
298     } /* Loop on array elements */
299 
300   } /* End of loop on stride */
301 }
302 
303 /*----------------------------------------------------------------------------
304  * Remove multiple element neighbor occurences
305  *
306  * parameters:
307  *   n <-> pointer to neighborhood management structure
308  *----------------------------------------------------------------------------*/
309 
310 static void
_clean_neighbor_nums(fvm_neighborhood_t * n)311 _clean_neighbor_nums(fvm_neighborhood_t  *n)
312 {
313   cs_lnum_t   i, j, start_id, end_id, saved_id, n_elts, n_neighbors;
314 
315   cs_lnum_t   n_count = 0;
316 
317   assert(n != NULL);
318 
319   if (n->n_elts == 0)
320     return;
321 
322   n_elts = n->n_elts;
323   n_neighbors = n->neighbor_index[n_elts];
324 
325   /* Remove redundant elements */
326 
327   saved_id = n->neighbor_index[0];
328 
329   for (i = 0; i < n_elts; i++) {
330 
331     start_id = saved_id;
332     end_id = n->neighbor_index[i+1];
333 
334     if (end_id - start_id > 0) {
335 
336       _gnum_shellsort(start_id, end_id, n->neighbor_num);
337 
338       n->neighbor_num[n_count++] = n->neighbor_num[start_id];
339 
340       for (j = start_id + 1; j < end_id; j++) {
341         if (n->neighbor_num[j] != n->neighbor_num[j-1])
342           n->neighbor_num[n_count++] = n->neighbor_num[j];
343       }
344 
345     }
346 
347     saved_id = end_id;
348     n->neighbor_index[i+1] = n_count;
349 
350   } /* End of loop on elements */
351 
352   if (n_count < n_neighbors)
353     BFT_REALLOC(n->neighbor_num, n_count, cs_gnum_t);
354 }
355 
356 /*----------------------------------------------------------------------------
357  * Order a neighborhood based on the global numbering of elements.
358  *
359  * parameters:
360  *   n <-> pointer to neighborhood management structure
361  *----------------------------------------------------------------------------*/
362 
363 static void
_order_neighborhood(fvm_neighborhood_t * n)364 _order_neighborhood(fvm_neighborhood_t  *n)
365 {
366   cs_lnum_t   i, j, k, order_id, shift, e_count;
367   cs_lnum_t   n_elts, n_neighbors, n_elt_neighbors;
368   cs_gnum_t   prev_num, cur_num;
369 
370   cs_lnum_t   *order = NULL, *old_index = NULL;
371   cs_gnum_t   *old_e_num = NULL, *old_n_num = NULL;
372 
373   assert(n != NULL);
374 
375   if (n->n_elts == 0)
376     return;
377 
378   n_elts = n->n_elts;
379   n_neighbors = n->neighbor_index[n_elts];
380 
381   BFT_MALLOC(order, n_elts, cs_lnum_t);
382   BFT_MALLOC(old_e_num, n_elts, cs_gnum_t);
383   BFT_MALLOC(old_index, n_elts + 1, cs_lnum_t);
384   BFT_MALLOC(old_n_num, n_neighbors, cs_gnum_t);
385 
386   memcpy(old_e_num, n->elt_num, n_elts*sizeof(cs_gnum_t));
387   memcpy(old_index, n->neighbor_index, (n_elts + 1)*sizeof(cs_lnum_t));
388   memcpy(old_n_num, n->neighbor_num, n_neighbors*sizeof(cs_gnum_t));
389 
390   /* Order elt_num */
391 
392   cs_order_gnum_allocated(NULL, old_e_num, order, n_elts);
393 
394   /* Reshape according to the new ordering */
395 
396   /* Add first element */
397 
398   order_id = order[0];
399   shift = 0;
400 
401   n->elt_num[0] = old_e_num[order_id];
402   prev_num = n->elt_num[0];
403 
404   n->neighbor_index[0] = 0;
405   n->neighbor_index[1] = old_index[order_id+1] - old_index[order_id];
406 
407   /* Loop on second-to last elements, merging data if an element has
408      already appeared */
409 
410   for (i = 1, e_count = 1; i < n_elts; i++) {
411 
412     order_id = order[i];
413 
414     n_elt_neighbors = old_index[order_id+1] - old_index[order_id];
415 
416     shift = n->neighbor_index[i];
417 
418     cur_num = old_e_num[order_id];
419 
420     if (cur_num != prev_num) {
421       n->elt_num[e_count] = cur_num;
422       n->neighbor_index[e_count+1] = (  n->neighbor_index[e_count]
423                                       + n_elt_neighbors);
424       e_count += 1;
425       prev_num = cur_num;
426     }
427     else
428       n->neighbor_index[e_count] += n_elt_neighbors;
429 
430     for (j = old_index[order_id], k = 0; k < n_elt_neighbors; j++, k++)
431       n->neighbor_num[shift + k] = old_n_num[j];
432 
433   } /* End of loop on elements */
434 
435   assert(n->neighbor_index[e_count] == n_neighbors);
436 
437   /* Free temporary memory */
438 
439   BFT_FREE(order);
440   BFT_FREE(old_e_num);
441   BFT_FREE(old_index);
442   BFT_FREE(old_n_num);
443 }
444 
445 #if defined(HAVE_MPI)
446 
447 /*----------------------------------------------------------------------------
448  * Synchronize a neighborhood management structure and distribute the
449  * resulting data over ranks by block.
450  *
451  * parameters:
452  *   n        <-- pointer to neighborhood management structure
453  *   n_g_elts <-- global number of elements
454  *----------------------------------------------------------------------------*/
455 
456 static void
_sync_by_block(fvm_neighborhood_t * n,cs_gnum_t n_g_elts)457 _sync_by_block(fvm_neighborhood_t  *n,
458                cs_gnum_t            n_g_elts)
459 {
460   assert(n != NULL);
461 
462   if (n_g_elts == 0 || n->comm == MPI_COMM_NULL)
463     return;
464 
465   /* Initialization */
466 
467   int  rank_id, n_ranks;
468 
469   MPI_Comm_rank(n->comm, &rank_id);
470   MPI_Comm_size(n->comm, &n_ranks);
471 
472   cs_block_dist_info_t bi = cs_block_dist_compute_sizes(rank_id,
473                                                         n_ranks,
474                                                         0,
475                                                         0,
476                                                         n_g_elts);
477 
478   cs_all_to_all_t *d
479     = cs_all_to_all_create_from_block(n->n_elts,
480                                       0, /* flags */
481                                       n->elt_num,
482                                       bi,
483                                       n->comm);
484 
485   cs_gnum_t *send_meta;
486   BFT_MALLOC(send_meta, n->n_elts*2, cs_gnum_t);
487 
488   for (cs_lnum_t i = 0; i < n->n_elts; i++) {
489     send_meta[i*2] = n->elt_num[i];
490     send_meta[i*2+1] = n->neighbor_index[i+1] - n->neighbor_index[i];
491   }
492 
493   cs_gnum_t *recv_meta
494     = cs_all_to_all_copy_array(d,
495                                CS_GNUM_TYPE,
496                                2,
497                                false, /* reverse */
498                                send_meta,
499                                NULL);
500 
501   BFT_FREE(send_meta);
502 
503   cs_lnum_t n_recv = cs_all_to_all_n_elts_dest(d);
504 
505   cs_gnum_t *recv_gnum;
506   BFT_MALLOC(recv_gnum, n_recv, cs_gnum_t);
507   cs_lnum_t *recv_index;
508   BFT_MALLOC(recv_index, n_recv+1, cs_lnum_t);
509 
510   recv_index[0] = 0;
511   for (cs_lnum_t i = 0; i < n_recv; i++) {
512     recv_gnum[i] = recv_meta[i*2];
513     recv_index[i+1] = recv_index[i] + recv_meta[i*2+1];
514   }
515 
516   BFT_FREE(recv_meta);
517 
518   cs_gnum_t *recv_neighbor
519     = cs_all_to_all_copy_indexed(d,
520                                  CS_GNUM_TYPE,
521                                  false, /* reverse */
522                                  n->neighbor_index,
523                                  n->neighbor_num,
524                                  recv_index,
525                                  NULL);
526 
527   /* Build arrays corresponding to block distribution of neighborhood */
528 
529   n->n_elts = bi.gnum_range[1] - bi.gnum_range[0];
530 
531   BFT_FREE(n->elt_num);
532   BFT_FREE(n->neighbor_index);
533   BFT_FREE(n->neighbor_num);
534 
535   BFT_MALLOC(n->elt_num, n->n_elts, cs_gnum_t);
536   BFT_MALLOC(n->neighbor_index, n->n_elts + 1, cs_lnum_t);
537 
538   for (cs_lnum_t i = 0; i < n->n_elts; i++) {
539     n->elt_num[i] = bi.gnum_range[0] + i;
540     n->neighbor_index[i] = 0;
541   }
542   n->neighbor_index[n->n_elts] = 0;
543 
544   /* Count element neighbors in block distribution */
545 
546   for (cs_lnum_t i = 0; i < n_recv; i++) {
547     cs_lnum_t elt_id = recv_gnum[i] - bi.gnum_range[0];
548     n->neighbor_index[elt_id + 1] += recv_index[i+1] - recv_index[i];
549   }
550 
551   /* Transform element neighbor count to index */
552 
553   n->neighbor_index[0] = 0;
554   for (cs_lnum_t i = 0; i < n->n_elts; i++)
555     n->neighbor_index[i+1] += n->neighbor_index[i];
556 
557   BFT_MALLOC(n->neighbor_num, n->neighbor_index[n->n_elts], cs_gnum_t);
558 
559   /* Fill element neighbors in block distribution */
560 
561   cs_lnum_t *counter;
562   BFT_MALLOC(counter, n->n_elts, cs_lnum_t);
563 
564   for (cs_lnum_t i = 0; i < n->n_elts; i++)
565     counter[i] = 0;
566 
567   for (cs_lnum_t i = 0; i < n_recv; i++) {
568 
569     cs_lnum_t elt_id = recv_gnum[i] - bi.gnum_range[0];
570 
571     cs_lnum_t s_id = recv_index[i];
572     cs_lnum_t e_id = recv_index[i+1];
573 
574     cs_lnum_t shift = n->neighbor_index[elt_id] + counter[elt_id];
575 
576     for (cs_lnum_t j = s_id; j < e_id; j++)
577       n->neighbor_num[shift++] = recv_neighbor[j];
578 
579     counter[elt_id] += e_id - s_id;
580 
581   } /* End of loop on ranks */
582 
583   BFT_FREE(recv_gnum);
584   BFT_FREE(counter);
585   BFT_FREE(recv_neighbor);
586   BFT_FREE(recv_index);
587 
588   cs_all_to_all_destroy(&d);
589 
590   /* Remove redundant data */
591 
592   _clean_neighbor_nums(n);
593 }
594 
595 #endif /* defined(HAVE_MPI) */
596 
597 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
598 
599 /*============================================================================
600  * Public function definitions
601  *============================================================================*/
602 
603 /*----------------------------------------------------------------------------
604  * Create a neighborhood_t structure and initialize it.
605  *
606  * parameters:
607  *   comm  <-- associated MPI communicator
608  *
609  * returns:
610  *   pointer to an empty fvm_box_tree_t structure.
611  *----------------------------------------------------------------------------*/
612 
613 #if defined(HAVE_MPI)
614 fvm_neighborhood_t *
fvm_neighborhood_create(MPI_Comm comm)615 fvm_neighborhood_create(MPI_Comm  comm)
616 #else
617 fvm_neighborhood_t *
618 fvm_neighborhood_create(void)
619 #endif
620 {
621   double  w_start, w_end, cpu_start, cpu_end;
622 
623   fvm_neighborhood_t *n = NULL;
624 
625   /* Timer start */
626 
627   w_start = cs_timer_wtime();
628   cpu_start = cs_timer_cpu_time();
629 
630   /* Allocate and initialize */
631 
632   BFT_MALLOC(n, 1, fvm_neighborhood_t);
633 
634   n->n_elts = 0;
635   n->elt_num = NULL;
636   n->neighbor_index = NULL;
637   n->neighbor_num = NULL;
638 
639 #if defined(HAVE_MPI)
640   n->comm = comm;
641 #endif
642 
643   /* Algorithm options */
644 
645   n->max_tree_depth = 30; /* defaults */
646   n->leaf_threshold = 30;
647   n->max_box_ratio = 10.0;
648   n->max_box_ratio_distrib = 6.0;
649 
650   _init_bt_statistics(&(n->bt_stats));
651 
652   /* Timer end */
653 
654   w_end = cs_timer_wtime();
655   cpu_end = cs_timer_cpu_time();
656 
657   n->cpu_time[0] = cpu_end - cpu_start;  /* build time */
658   n->wtime[0] = w_end - w_start;
659   n->cpu_time[1] = 0.0;                  /* query */
660   n->wtime[1] = 0.0;
661 
662   /* Return structure */
663 
664   return n;
665 }
666 
667 /*----------------------------------------------------------------------------
668  * Destroy a neighborhood_t structure.
669  *
670  * parameters:
671  *   n <-> pointer to pointer to fvm_neighborhood_t structure to destroy.
672  *----------------------------------------------------------------------------*/
673 
674 void
fvm_neighborhood_destroy(fvm_neighborhood_t ** n)675 fvm_neighborhood_destroy(fvm_neighborhood_t  **n)
676 {
677   if (n != NULL) {
678     fvm_neighborhood_t *_n = *n;
679     if (_n != NULL) {
680       if (_n->elt_num != NULL)
681         BFT_FREE(_n->elt_num);
682       if (_n->neighbor_index != NULL)
683         BFT_FREE(_n->neighbor_index);
684       if (_n->neighbor_num != NULL)
685         BFT_FREE(_n->neighbor_num);
686     }
687     BFT_FREE(*n);
688   }
689 }
690 
691 /*----------------------------------------------------------------------------
692  * Set non-default algorithm parameters for neighborhood management structure.
693  *
694  * parameters:
695  *   n                     <-> pointer to neighborhood management structure
696  *   max_tree_depth        <-- maximum search tree depth
697  *   leaf_threshold        <-- maximum number of boxes which can be related to
698  *                             a leaf of the tree if level < max_tree_depth
699  *   max_box_ratio         <-- stop adding levels to tree when
700  *                             (n_linked_boxes > max_box_ratio*n_init_boxes)
701  *   max_box_ratio_distrib <-- maximum box ratio when computing coarse
702  *                             tree prior to parallel distribution
703  *---------------------------------------------------------------------------*/
704 
705 void
fvm_neighborhood_set_options(fvm_neighborhood_t * n,int max_tree_depth,int leaf_threshold,float max_box_ratio,float max_box_ratio_distrib)706 fvm_neighborhood_set_options(fvm_neighborhood_t  *n,
707                              int                  max_tree_depth,
708                              int                  leaf_threshold,
709                              float                max_box_ratio,
710                              float                max_box_ratio_distrib)
711 {
712   if (n == NULL)
713     return;
714 
715   n->max_tree_depth = max_tree_depth;
716   n->leaf_threshold = leaf_threshold;
717   n->max_box_ratio = max_box_ratio;
718   n->max_box_ratio_distrib = max_box_ratio_distrib;
719 }
720 
721 /*----------------------------------------------------------------------------
722  * Retrieve pointers to of arrays from a neighborhood_t structure.
723  *
724  * Arrays remain the property of the neighborhood_t structure, and must not
725  * be modified by the caller.
726  *
727  * parameters:
728  *   n              <-> pointer to fvm_neighborhood_t structure.
729  *   n_elts         --> number of elements with neighbors in block
730  *                      associated with local rank
731  *   elt_num        --> global element numbers in local block (size: n_elts)
732  *   neighbor_index --> start index of neighbors (size: n_elts + 1)
733  *   neighbor_num   --> global element neighbor numbers
734  *                      (size: neighbor_index[n_elts])
735  *----------------------------------------------------------------------------*/
736 
737 void
fvm_neighborhood_get_data(const fvm_neighborhood_t * n,cs_lnum_t * n_elts,cs_gnum_t ** const elt_num,cs_lnum_t ** const neighbor_index,cs_gnum_t ** const neighbor_num)738 fvm_neighborhood_get_data(const fvm_neighborhood_t         *n,
739                           cs_lnum_t                        *n_elts,
740                           cs_gnum_t                 **const elt_num,
741                           cs_lnum_t                 **const neighbor_index,
742                           cs_gnum_t                 **const neighbor_num)
743 {
744   if (n != NULL) {
745 
746     if (n_elts != NULL)
747       *n_elts = n->n_elts;
748 
749     if (elt_num != NULL)
750       *elt_num = n->elt_num;
751 
752     if (neighbor_index != NULL)
753       *neighbor_index = n->neighbor_index;
754 
755     if (neighbor_num != NULL)
756       *neighbor_num = n->neighbor_num;
757   }
758 }
759 
760 /*----------------------------------------------------------------------------
761  * Transfer ownership of arrays from a neighborhood_t structure to the
762  * calling program.
763  *
764  * Arrays that are transferred are removed from the structure, so its
765  * use after calling this function is limited to querying timing information
766  * until a new neighborhood is computed.
767  *
768  * parameters:
769  *   n              <-> pointer to fvm_neighborhood_t structure.
770  *   n_elts         --> number of elements with neighbors in block
771  *                      associated with local rank
772  *   elt_num        --> global element numbers in local block (size: n_elts)
773  *   neighbor_index --> start index of neighbors (size: n_elts + 1)
774  *   neighbor_num   --> global element neighbor numbers
775  *                      (size: neighbor_index[n_elts])
776  *----------------------------------------------------------------------------*/
777 
778 void
fvm_neighborhood_transfer_data(fvm_neighborhood_t * n,cs_lnum_t * n_elts,cs_gnum_t ** elt_num,cs_lnum_t ** neighbor_index,cs_gnum_t ** neighbor_num)779 fvm_neighborhood_transfer_data(fvm_neighborhood_t   *n,
780                                cs_lnum_t            *n_elts,
781                                cs_gnum_t           **elt_num,
782                                cs_lnum_t           **neighbor_index,
783                                cs_gnum_t           **neighbor_num)
784 {
785   if (n != NULL) {
786 
787     if (n_elts != NULL)
788       *n_elts = n->n_elts;
789 
790     if (elt_num != NULL) {
791       *elt_num = n->elt_num;
792       n->elt_num = NULL;
793     }
794     if (neighbor_index != NULL) {
795       *neighbor_index = n->neighbor_index;
796       n->neighbor_index = NULL;
797     }
798     if (neighbor_num != NULL) {
799       *neighbor_num = n->neighbor_num;
800       n->neighbor_num = NULL;
801     }
802   }
803 }
804 
805 /*----------------------------------------------------------------------------
806  * Determine intersecting boxes.
807  *
808  * Box global numbers and extents may be either copied for the structure's
809  * internal use from the caller, or transferred to the neighborhood management
810  * structure: both the box_gnum and extents arguments have an "assigned"
811  * variant, in which case a pointer to a pointer is provided, and the
812  * argument's property is transferred to the neighborhod management structure.
813  * The unused variant of an argument should be set to NULL.
814  *
815  * Boxes may be distributed among processors, so their intersections are
816  * determined using a block distribution, and defined using their
817  * global numbers.
818  *
819  * All global numbers appearing in box_gnum[] will have a matching entry in
820  * the neighborhod structure. To remove global numbers entries with no
821  * neighbors from the structure, the fvm_neighborhood_prune() function
822  * may be used.
823  *
824  * parameters:
825  *   n                 <-> pointer to neighborhood management structure
826  *   dim               <-- spatial dimension
827  *   n_boxes           <-- local number of boxes
828  *   box_gnum          <-- global numbering of boxes
829  *   extents           <-- coordinate extents (size: n_boxes*dim*2, as
830  *                         xmin1, ymin1, .. xmax1, ymax1, ..., xmin2, ...)
831  *   box_gnum_assigned <-> as box_gnum, ownership transferred (NULL on return)
832  *   extents_assigned  <-> as extents, ownership transferred (NULL on return)
833  *---------------------------------------------------------------------------*/
834 
835 void
fvm_neighborhood_by_boxes(fvm_neighborhood_t * n,int dim,cs_lnum_t n_boxes,const cs_gnum_t * box_gnum,const cs_coord_t * extents,cs_gnum_t ** box_gnum_assigned,cs_coord_t ** extents_assigned)836 fvm_neighborhood_by_boxes(fvm_neighborhood_t  *n,
837                           int                  dim,
838                           cs_lnum_t            n_boxes,
839                           const cs_gnum_t     *box_gnum,
840                           const cs_coord_t    *extents,
841                           cs_gnum_t          **box_gnum_assigned,
842                           cs_coord_t         **extents_assigned)
843 {
844   double  clock_start, clock_end, cpu_start, cpu_end;
845 
846   fvm_box_tree_t  *bt = NULL;
847   fvm_box_set_t  *boxes = NULL;
848 
849   const cs_gnum_t   *_box_gnum = box_gnum;
850   const cs_coord_t  *_extents = extents;
851 
852   int  n_ranks = 1;
853 
854   clock_start = cs_timer_wtime();
855   cpu_start = cs_timer_cpu_time();
856 
857   /* Transfer data if necessary */
858 
859   if (box_gnum_assigned != NULL)
860     _box_gnum = *box_gnum_assigned;
861   if (extents_assigned != NULL)
862     _extents = *extents_assigned;
863 
864   /* Reset structure if necessary */
865 
866   n->n_elts = 0;
867   if (n->elt_num != NULL)
868     BFT_FREE(n->elt_num);
869   if (n->neighbor_index != NULL)
870     BFT_FREE(n->neighbor_index);
871   if (n->neighbor_num != NULL)
872     BFT_FREE(n->neighbor_num);
873 
874   /* Allocate fvm_box_set_t structure and initialize it */
875 
876 #if defined(HAVE_MPI)
877 
878   if (n->comm != MPI_COMM_NULL)
879     MPI_Comm_size(n->comm, &n_ranks);
880 
881   boxes = fvm_box_set_create(dim,
882                              1,  /* normalize */
883                              1,  /* allow_projection */
884                              n_boxes,
885                              _box_gnum,
886                              _extents,
887                              n->comm);
888 
889   if (n_ranks > 1)
890     _redistribute_boxes(n, boxes);
891 
892 #else
893 
894   boxes = fvm_box_set_create(dim,
895                              1,  /* normalize */
896                              1,  /* allow_projection */
897                              n_boxes,
898                              _box_gnum,
899                              _extents);
900 
901 #endif
902 
903   /* Free transferred data if applicable */
904 
905   if (box_gnum_assigned != NULL) {
906     _box_gnum = NULL;
907     BFT_FREE(*box_gnum_assigned);
908   }
909 
910   if (extents_assigned != NULL) {
911     _extents = NULL;
912     BFT_FREE(*extents_assigned);
913   }
914 
915   /* Build a tree structure and use it to order bounding boxes */
916 
917   /* Create and initialize a box tree structure */
918 
919   bt = fvm_box_tree_create(n->max_tree_depth,
920                            n->leaf_threshold,
921                            n->max_box_ratio);
922 
923   /* Build a tree and put bounding boxes */
924 
925   fvm_box_tree_set_boxes(bt,
926                          boxes,
927                          FVM_BOX_TREE_ASYNC_LEVEL);
928 
929   _update_bt_statistics((&n->bt_stats), bt);
930 
931   /* Update construction times. */
932 
933   clock_end = cs_timer_wtime();
934   cpu_end = cs_timer_cpu_time();
935 
936   n->cpu_time[0] = cpu_end - cpu_start;
937   n->wtime[0] = clock_end - clock_start;
938 
939   clock_start = clock_end;
940   cpu_start = cpu_end;
941 
942   /* Allocate structure to store intersections between boxes */
943 
944   n->n_elts = fvm_box_set_get_size(boxes);
945 
946   BFT_MALLOC(n->elt_num, n->n_elts, cs_gnum_t);
947   if (n->n_elts > 0)
948     memcpy(n->elt_num,
949            fvm_box_set_get_g_num(boxes),
950            n->n_elts*sizeof(cs_gnum_t));
951 
952   fvm_box_tree_get_intersects(bt,
953                               boxes,
954                               &(n->neighbor_index),
955                               &(n->neighbor_num));
956 
957 #if 0 && defined(DEBUG) && !defined(NDEBUG)
958   fvm_box_tree_dump(bt);
959   fvm_box_set_dump(boxes, 1);
960 #endif
961 
962   /* Destroy the associated box tree */
963 
964   fvm_box_tree_destroy(&bt);
965 
966   /* Compact intersections list, delete redundancies and order intersections */
967 
968   _order_neighborhood(n);
969 
970 #if defined(HAVE_MPI)
971 
972   /* Synchronize list of intersections for each element of the list
973      and distribute it by block over the ranks */
974 
975   if (n_ranks > 1)
976     _sync_by_block(n, fvm_box_set_get_global_size(boxes));
977 
978 #endif /* HAVE_MPI */
979 
980   /* Destroy the box set structures */
981 
982   fvm_box_set_destroy(&boxes);
983 
984   _clean_neighbor_nums(n);
985 
986   /* Update query times. */
987 
988   clock_end = cs_timer_wtime();
989   cpu_end = cs_timer_cpu_time();
990 
991   n->cpu_time[1] = cpu_end - cpu_start;
992   n->wtime[1] = clock_end - clock_start;
993 }
994 
995 /*----------------------------------------------------------------------------
996  * Prune a neighborhood (remove entries with no neighbors).
997  *
998  * parameters:
999  *   n <-> pointer to neighborhood management structure
1000  *----------------------------------------------------------------------------*/
1001 
1002 void
fvm_neighborhood_prune(fvm_neighborhood_t * n)1003 fvm_neighborhood_prune(fvm_neighborhood_t  *n)
1004 {
1005   cs_lnum_t   i, start_id, end_id, saved_id, n_elts;
1006 
1007   cs_lnum_t   e_count = 0;
1008 
1009   assert(n != NULL);
1010 
1011   if (n->n_elts == 0)
1012     return;
1013 
1014   n_elts = n->n_elts;
1015 
1016   /* Remove elements with no neighbors */
1017 
1018   saved_id = n->neighbor_index[0];
1019 
1020   for (i = 0; i < n_elts; i++) {
1021 
1022     start_id = saved_id;
1023     end_id = n->neighbor_index[i+1];
1024 
1025     if (end_id - start_id > 0) {
1026 
1027       n->elt_num[e_count] = n->elt_num[i];
1028 
1029       saved_id = end_id;
1030       n->neighbor_index[e_count+1] = end_id;
1031 
1032       e_count += 1;
1033 
1034     }
1035 
1036   }
1037 
1038   if (e_count < n_elts) {
1039     n->n_elts = e_count;
1040     BFT_REALLOC(n->elt_num, e_count, cs_gnum_t);
1041     BFT_REALLOC(n->neighbor_index, e_count + 1, cs_lnum_t);
1042   }
1043 }
1044 
1045 /*----------------------------------------------------------------------------
1046  * Get global statistics relative to the search structures used
1047  * by fvm_neighborhood_by_boxes().
1048  *
1049  * All fields returned are optional: if their argument is set to NULL,
1050  * the corresponding information will not be returned.
1051  *
1052  * For each field not set to NULL, 3 values are always returned:
1053  * the mean on all ranks (rounded to the closest integer), the minimum,
1054  * and the maximum value respectively.
1055  *
1056  * In serial mode, the mean, minimum, and maximum will be identical for most
1057  * fields, but all 3 values are returned nonetheless.
1058  *
1059  * Note that the final memory use is only relative to the final search
1060  * structure, and the comparison of the total (or average) with the minima
1061  * and maxima may give an indication on load balancing.
1062 
1063  * The mem_required field only takes into account the theoretical maximum
1064  * memory footprint of the main search structure during its construction phase,
1065  * and of that of temporary structures before load balancing in parallel mode,
1066  * but does not include minor work arrays or buffers used during the algorithm.
1067  *
1068  * Neither of the 2 memory fields include the footprint of the arrays
1069  * containing the query results.
1070  *
1071  * parameters:
1072  *   n                  <-- pointer to neighborhood management structure
1073  *   dim                --> layout dimension (3, 2, or 1)
1074  *   depth              --> tree depth (max level used)
1075  *   n_leaves           --> number of leaves in the tree
1076  *   n_boxes            --> number of boxes in the tree
1077  *   n_threshold_leaves --> number of leaves where n_boxes > threshold
1078  *   n_leaf_boxes       --> number of boxes for a leaf
1079  *   mem_final          --> theoretical memory for final search structure
1080  *   mem_required       --> theoretical maximum memory for main structures
1081  *                          used during the algorithm
1082  *
1083  * returns:
1084  *   the spatial dimension associated with the box tree layout (3, 2, or 1)
1085  *----------------------------------------------------------------------------*/
1086 
1087 int
fvm_neighborhood_get_box_stats(const fvm_neighborhood_t * n,int depth[3],cs_lnum_t n_leaves[3],cs_lnum_t n_boxes[3],cs_lnum_t n_threshold_leaves[3],cs_lnum_t n_leaf_boxes[3],size_t mem_final[3],size_t mem_required[3])1088 fvm_neighborhood_get_box_stats(const fvm_neighborhood_t  *n,
1089                                int                        depth[3],
1090                                cs_lnum_t                  n_leaves[3],
1091                                cs_lnum_t                  n_boxes[3],
1092                                cs_lnum_t                  n_threshold_leaves[3],
1093                                cs_lnum_t                  n_leaf_boxes[3],
1094                                size_t                     mem_final[3],
1095                                size_t                     mem_required[3])
1096 {
1097   size_t i;
1098 
1099   if (n == NULL)
1100     return 0;
1101 
1102   for (i = 0; i < 3; i++) {
1103 
1104     if (depth != NULL)
1105       depth[i] = n->bt_stats.depth[i];
1106 
1107     if (n_leaves != NULL)
1108       n_leaves[i] = n->bt_stats.n_leaves[i];
1109 
1110     if (n_boxes != NULL)
1111       n_boxes[i] = n->bt_stats.n_boxes[i];
1112 
1113     if (n_threshold_leaves != NULL)
1114       n_threshold_leaves[i] = n->bt_stats.n_threshold_leaves[i];
1115 
1116     if (n_leaf_boxes != NULL)
1117       n_leaf_boxes[i] = n->bt_stats.n_leaf_boxes[i];
1118 
1119     if (mem_final != NULL)
1120       mem_final[i] = n->bt_stats.mem_used[i];
1121 
1122     if (mem_required != NULL)
1123       mem_required[i] = n->bt_stats.mem_required[i];
1124   }
1125   return n->bt_stats.dim;
1126 }
1127 
1128 /*----------------------------------------------------------------------------
1129  * Return timing information.
1130  *
1131  * parameters:
1132  *   n              <-- pointer to neighborhood management structure
1133  *   build_wtime    --> initialization Wall-clock time (or NULL)
1134  *   build_cpu_time --> initialization CPU time (or NULL)
1135  *   query_wtime    --> query Wall-clock time (or NULL)
1136  *   query_cpu_time --> query CPU time (or NULL)
1137  *----------------------------------------------------------------------------*/
1138 
1139 void
fvm_neighborhood_get_times(const fvm_neighborhood_t * n,double * build_wtime,double * build_cpu_time,double * query_wtime,double * query_cpu_time)1140 fvm_neighborhood_get_times(const fvm_neighborhood_t  *n,
1141                            double                    *build_wtime,
1142                            double                    *build_cpu_time,
1143                            double                    *query_wtime,
1144                            double                    *query_cpu_time)
1145 {
1146   if (n == NULL)
1147     return;
1148 
1149   if (build_wtime != NULL)
1150     *build_wtime = n->wtime[0];
1151   if (build_cpu_time != NULL)
1152     *build_cpu_time = n->cpu_time[0];
1153 
1154   if (query_wtime != NULL)
1155     *query_wtime = n->wtime[1];
1156   if (query_cpu_time != NULL)
1157     *query_cpu_time = n->cpu_time[1];
1158 }
1159 
1160 /*----------------------------------------------------------------------------
1161  * Dump a neighborhood management structure.
1162  *
1163  * parameters:
1164  *   n <-- pointer to neighborhood management structure
1165  *----------------------------------------------------------------------------*/
1166 
1167 void
fvm_neighborhood_dump(const fvm_neighborhood_t * n)1168 fvm_neighborhood_dump(const fvm_neighborhood_t  *n)
1169 {
1170   cs_lnum_t   i, j;
1171 
1172   bft_printf("\n"
1173              "Neighborhood information: %p\n\n", (const void *)n);
1174 
1175   if (n == NULL)
1176     return;
1177 
1178   bft_printf("number of elements: %10d\n"
1179              "list size:          %10d\n\n",
1180              (int)(n->n_elts), (int)(n->neighbor_index[n->n_elts]));
1181 
1182   bft_printf("max tree depth:     %d\n"
1183              "leaf threshold:     %d\n"
1184              "max box ratio       %f\n\n",
1185              n->max_tree_depth, n->leaf_threshold, n->max_box_ratio);
1186 
1187 #if defined(HAVE_MPI)
1188   if (n->comm != MPI_COMM_NULL)
1189     bft_printf("\n"
1190                "Associated MPI communicator: %ld\n",
1191                (long)(n->comm));
1192 #endif
1193 
1194   bft_printf("CPU time:           %f construction, %f query\n"
1195              "Wall-clock time:    %f construction, %f query\n\n",
1196              n->cpu_time[0], n->cpu_time[1],
1197              n->wtime[0], n->wtime[1]);
1198 
1199   for (i = 0; i < n->n_elts; i++) {
1200 
1201     int n_neighbors = (n->neighbor_index[i+1] - n->neighbor_index[i]);
1202 
1203     bft_printf("global num.: %10llu | n_neighbors : %3d |",
1204                (unsigned long long)(n->elt_num[i]), n_neighbors);
1205 
1206     for (j = n->neighbor_index[i]; j < n->neighbor_index[i+1]; j++)
1207       bft_printf("  %10llu ", (unsigned long long)(n->neighbor_num[j]));
1208     bft_printf("\n");
1209 
1210   }
1211 
1212   bft_printf_flush();
1213 }
1214 
1215 /*----------------------------------------------------------------------------*/
1216 
1217 END_C_DECLS
1218