1 /*============================================================================
2  * Locate points in a representation associated with a mesh
3  *============================================================================*/
4 
5 /*
6   This file is part of the "Parallel Location and Exchange" library,
7   intended to provide mesh or particle-based code coupling services.
8 
9   Copyright (C) 2005-2021  EDF S.A.
10 
11   This library is free software; you can redistribute it and/or
12   modify it under the terms of the GNU Lesser General Public
13   License as published by the Free Software Foundation; either
14   version 2.1 of the License, or (at your option) any later version.
15 
16   This library is distributed in the hope that it will be useful,
17   but WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19   Lesser General Public License for more details.
20 
21   You should have received a copy of the GNU Lesser General Public
22   License along with this library; if not, write to the Free Software
23   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24 */
25 
26 /*!
27  * \file ple_locator.c
28  *
29  * \brief Locate points in a representation associated with a mesh.
30  */
31 
32 /*----------------------------------------------------------------------------*/
33 
34 #include "ple_config.h"
35 
36 /*----------------------------------------------------------------------------
37  * Standard C library headers
38  *----------------------------------------------------------------------------*/
39 
40 #include <assert.h>
41 #include <math.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 
46 #if defined(PLE_HAVE_MPI)
47 #include <mpi.h>
48 #if !defined(MPI_VERSION) /* Defined in up-to-date MPI versions */
49 #  define MPI_VERSION 1
50 #endif
51 #endif
52 
53 /*----------------------------------------------------------------------------
54  *  Local headers
55  *----------------------------------------------------------------------------*/
56 
57 #include "ple_config_defs.h"
58 #include "ple_defs.h"
59 
60 /*----------------------------------------------------------------------------
61  *  Header for the current file
62  *----------------------------------------------------------------------------*/
63 
64 #include "ple_locator.h"
65 
66 /*----------------------------------------------------------------------------*/
67 
68 #ifdef __cplusplus
69 extern "C" {
70 #if 0
71 } /* Fake brace to force back Emacs auto-indentation back to column 0 */
72 #endif
73 #endif /* __cplusplus */
74 
75 /*============================================================================
76  * Local macro definitions
77  *============================================================================*/
78 
79 /* In case <math.h> does not define HUGE_VAL, use a "safe" value */
80 #if !defined(HUGE_VAL)
81 #define HUGE_VAL 1.0e+30
82 #endif
83 
84 /* Geometric operation macros*/
85 
86 #define _MODULE(vect) \
87   sqrt(vect[0] * vect[0] + vect[1] * vect[1] + vect[2] * vect[2])
88 
89 /*
90  * Algorithm ID's
91  */
92 
93 #define _LOCATE_BB_SENDRECV          100  /* bounding boxes + send-receive */
94 #define _LOCATE_BB_SENDRECV_ORDERED  200  /* bounding boxes + send-receive
95                                              with communication ordering */
96 
97 #define _EXCHANGE_SENDRECV       100  /* Sendrecv */
98 #define _EXCHANGE_ISEND_IRECV    200  /* Isend/Irecv/Waitall */
99 
100 /*============================================================================
101  * Type definitions
102  *============================================================================*/
103 
104 typedef struct {
105 
106   int       n;       /* Number of intersecting distant ranks */
107   int      *rank;    /* List of intersecting distant ranks */
108   double   *extents; /* List of intersecting distant extents */
109 
110 } _rank_intersects_t;
111 
112 /*----------------------------------------------------------------------------
113  * Structure defining a locator
114  *----------------------------------------------------------------------------*/
115 
116 struct _ple_locator_t {
117 
118   /* Basic information */
119   /*-------------------*/
120 
121   int       dim;                 /* Spatial dimension */
122   int       have_tags;           /* Do we use tags */
123 
124   int       locate_algorithm;    /* Location algorithm id */
125   int       exchange_algorithm;  /* Exchange algorithm id */
126 
127 #if defined(PLE_HAVE_MPI)
128   MPI_Comm  comm;                /* Associated MPI communicator */
129 #endif
130 
131   int       n_ranks;             /* Number of MPI ranks of distant location */
132   int       start_rank;          /* First MPI rank of distant location */
133 
134   int       n_intersects;        /* Number of intersecting distant ranks */
135   int      *intersect_rank;      /* List of intersecting distant ranks */
136   int      *comm_order;          /* Optional communication ordering */
137 
138   ple_lnum_t    point_id_base;   /* base numbering for (external) point ids */
139 
140   ple_lnum_t   *local_points_idx;   /* Start index of local points per rank
141                                        (size: n_intersects + 1)*/
142   ple_lnum_t   *distant_points_idx; /* Start index of distant points per rank
143                                        (size: n_intersects + 1)*/
144 
145   ple_lnum_t   *local_point_ids;        /* Local point index for data received
146                                            (with blocs starting at
147                                            local_points_idx[] indexes,
148                                            0 to n-1 numbering) */
149 
150   ple_lnum_t   *distant_point_location; /* Location of distant points by parent
151                                            element number (with blocs starting
152                                            at distant_points_idx[] indexes) */
153   ple_coord_t  *distant_point_coords;   /* Coordinates of distant points
154                                            (with blocs starting at
155                                            distant_points_idx[]*dim indexes) */
156 
157   ple_lnum_t    n_interior;         /* Number of local points located */
158   ple_lnum_t   *interior_list;      /* List of points located */
159   ple_lnum_t    n_exterior;         /* Number of local points not located */
160   ple_lnum_t   *exterior_list;      /* List of points not located */
161 
162   /* Timing information (2 fields/time; 0: total; 1: communication) */
163 
164   double  location_wtime[2];       /* Location Wall-clock time */
165   double  location_cpu_time[2];    /* Location CPU time */
166   double  exchange_wtime[2];       /* Variable exchange Wall-clock time */
167   double  exchange_cpu_time[2];    /* Variable exchange CPU time */
168 };
169 
170 /*============================================================================
171  * Local function pointer type documentation
172  *============================================================================*/
173 
174 #ifdef DOXYGEN_ONLY
175 
176 /*!
177  * \brief Query number of extents and compute extents of a mesh representation.
178  *
179  * For future optimizations, computation of extents should not be limited
180  * to mesh extents, but to 1 to n extents, allowing different extent
181  * refinements, from global mesh to individual element extents.
182  *
183  * The minimum required functionality for this function is to compute
184  * whole mesh extents, but it could also return extents of individual
185  * elements, or intermediate extents of mesh subdivisions or coarsened
186  * elements. As such, it takes an argument indicating the maximum
187  * local number of extents it should compute (based on the size of
188  * the extents array argument), but returns the number of extents
189  * really computed, which may be lower (usually 1 for mesh extents,
190  * possibly even 0 if the local mesh is empty). If n_max_extents = 1,
191  * the whole mesh extents should be computed.
192  *
193  * If n_max_extents is set to a negative value (-1), no extents are computed,
194  * but the function returns the maximum number of extents it may compute.
195  * This query mode allows for the caller to allocate the correct amount
196  * of memory for a subsequent call.
197  *
198  * \param[in] mesh          pointer to mesh representation structure
199  * \param[in] n_max_extents maximum number of sub-extents (such as element
200  *                          extents) to compute, or -1 to query
201  * \param[in] tolerance     addition to local extents of each element:
202  *                          extent = base_extent * (1 + tolerance)
203  * \param[in] extents       extents associated with the mesh or elements (or
204  *                          even aggregated elements in case of coarser
205  *                          representation):
206  *                          x_min_0, y_min_0, ..., x_max_i, y_max_i, ...
207  *                          (size: 2*dim*n_max_extents), ignored in query mode
208  *
209  * \return the number of extents computed
210  */
211 
212 typedef ple_lnum_t
213 (ple_mesh_extents_t) (const void  *mesh,
214                       ple_lnum_t   n_max_extents,
215                       double       tolerance,
216                       double       extents[]);
217 
218 /*!
219  * \brief Find elements in a given local mesh containing points: updates the
220  * location[] and distance[] arrays associated with a set of points
221  * for points that are in an element of this mesh, or closer to one
222  * than to previously encountered elements.
223  *
224  * \param[in]      this_nodal          pointer to nodal mesh representation
225  *                                     structure
226  * \param[in]      tolerance_base      associated fixed tolerance
227  * \param[in]      tolerance_fraction  associated fraction of element bounding
228  *                                     boxes added to tolerance
229  * \param[in]      n_points            number of points to locate
230  * \param[in]      point_coords        point coordinates
231  * \param[in, out] location            number of element containing or closest
232  *                                     to each point (size: n_points)
233  * \param[in, out] distance            distance from point to element indicated
234  *                                     by location[]: < 0 if unlocated, >= 0
235  *                                     if inside; the choice of distance metric
236  *                                     is left to the calling code
237  *                                     (size: n_points)
238  */
239 
240 typedef void
241 (ple_mesh_elements_locate_t) (const void         *mesh,
242                               float               tolerance_base,
243                               float               tolerance_fraction,
244                               ple_lnum_t          n_points,
245                               const ple_coord_t   point_coords[],
246                               ple_lnum_t          location[],
247                               float               distance[]);
248 
249 /*!
250  * \brief Function pointer type for user definable logging/profiling
251  * type functions
252  */
253 
254 typedef int
255 (ple_locator_log_t) (int         event,
256                      int         data,
257                      const char *string);
258 
259 #endif /* DOXYGEN_ONLY */
260 
261 /*============================================================================
262  * Static global variables
263  *============================================================================*/
264 
265 #if defined(PLE_HAVE_MPI)
266 
267 /* maximum number of exchanging ranks for which we use asynchronous
268    MPI sends and receives instead of MPI_SendRecv */
269 
270 static int _ple_locator_async_threshold = 128;
271 
272 /* global logging function */
273 
274 static ple_locator_log_t   *_ple_locator_log_func = NULL;
275 
276 /* global variables associated with communication logging */
277 
278 static int  _ple_locator_log_start_p_comm = 0;
279 static int  _ple_locator_log_end_p_comm = 0;
280 static int  _ple_locator_log_start_g_comm = 0;
281 static int  _ple_locator_log_end_g_comm = 0;
282 
283 #endif
284 
285 /*============================================================================
286  * Private function definitions
287  *============================================================================*/
288 
289 #if defined(PLE_HAVE_MPI)
290 
291 /*----------------------------------------------------------------------------
292  * Log communication start.
293  *
294  * parameters:
295  *   start_p_comm <-- event number for the start of the "send/recv"
296  *                    or "global send/recv" state
297  *   timing       <-> 0: wall-clock total; 1: CPU total;
298  *                    2: wall-clock timer start; 3: CPU timer start
299  *----------------------------------------------------------------------------*/
300 
301 inline static void
_locator_trace_start_comm(int start_p_comm,double timing[4])302 _locator_trace_start_comm(int      start_p_comm,
303                           double   timing[4])
304 {
305   timing[2] = ple_timer_wtime();
306   timing[3] = ple_timer_cpu_time();
307 
308   if(_ple_locator_log_func != NULL)
309     _ple_locator_log_func(start_p_comm, 0, NULL);
310 }
311 
312 /*----------------------------------------------------------------------------
313  * Log communication end.
314  *
315  * parameters:
316  *   end_p_comm  <-- event number for the end of the "send/recv" or
317  *                   "global send/recv" state
318  *   timing      <-> 0: wall-clock total; 1 CPU total;
319  *                   2: wall-clock timer start; 3: CPU timer start
320  *----------------------------------------------------------------------------*/
321 
322 inline static void
_locator_trace_end_comm(int end_p_comm,double timing[4])323 _locator_trace_end_comm(int      end_p_comm,
324                         double   timing[4])
325 {
326   if(_ple_locator_log_func != NULL)
327     _ple_locator_log_func(end_p_comm, 0, NULL);
328 
329   timing[0] += ple_timer_wtime() - timing[2];
330   timing[1] += ple_timer_cpu_time() - timing[3];
331 }
332 
333 /*----------------------------------------------------------------------------
334  * Descend binary tree for the ordering of an integer array.
335  *
336  * parameters:
337  *   number  <-- pointer to numbers of entities that should be ordered.
338  *               (if NULL, a default 1 to n numbering is considered)
339  *   level   <-- level of the binary tree to descend
340  *   n       <-- number of entities in the binary tree to descend
341  *   order   <-> ordering array
342  *----------------------------------------------------------------------------*/
343 
344 inline static void
_order_int_descend_tree(const int number[],size_t level,const size_t n,int order[])345 _order_int_descend_tree(const int     number[],
346                         size_t        level,
347                         const size_t  n,
348                         int           order[])
349 {
350   size_t i_save, i1, i2, lv_cur;
351 
352   i_save = (size_t)(order[level]);
353 
354   while (level <= (n/2)) {
355 
356     lv_cur = (2*level) + 1;
357 
358     if (lv_cur < n - 1) {
359 
360       i1 = (size_t)(order[lv_cur+1]);
361       i2 = (size_t)(order[lv_cur]);
362 
363       if (number[i1] > number[i2]) lv_cur++;
364     }
365 
366     if (lv_cur >= n) break;
367 
368     i1 = i_save;
369     i2 = (size_t)(order[lv_cur]);
370 
371     if (number[i1] >= number[i2]) break;
372 
373     order[level] = order[lv_cur];
374     level = lv_cur;
375 
376   }
377 
378   order[level] = i_save;
379 }
380 
381 /*----------------------------------------------------------------------------
382  * Order an array of local numbers.
383  *
384  * parameters:
385  *   number  <-- array of entity numbers (if NULL, a default 1 to n
386  *               numbering is considered)
387  *   order   <-- pre-allocated ordering table
388  *   n       <-- number of entities considered
389  *----------------------------------------------------------------------------*/
390 
391 static void
_order_int(const int number[],int order[],const size_t n)392 _order_int(const int     number[],
393            int           order[],
394            const size_t  n)
395 {
396   size_t i;
397   int o_save;
398 
399   /* Initialize ordering array */
400 
401   for (i = 0; i < n; i++)
402     order[i] = i;
403 
404   if (n < 2)
405     return;
406 
407   /* Create binary tree */
408 
409   i = (n / 2);
410   do {
411     i--;
412     _order_int_descend_tree(number, i, n, order);
413   } while (i > 0);
414 
415   /* Sort binary tree */
416 
417   for (i = n - 1; i > 0; i--) {
418     o_save   = order[0];
419     order[0] = order[i];
420     order[i] = o_save;
421     _order_int_descend_tree(number, 0, i, order);
422   }
423 }
424 
425 /*----------------------------------------------------------------------------
426  * Order communicating ranks to reduce serialization
427  *
428  * parameters:
429  *   rank_id       <--  local rank id
430  *   n_ranks       <--  communicator size
431  *   n_comm_ranks  <--  number of communicating ranks
432  *   comm_rank     <--  communicating ranks
433  *   order         -->  communication order
434  *----------------------------------------------------------------------------*/
435 
436 static void
_order_comm_ranks(int rank_id,int n_ranks,int n_comm_ranks,const int comm_rank[],int order[])437 _order_comm_ranks(int        rank_id,
438                   int        n_ranks,
439                   int        n_comm_ranks,
440                   const int  comm_rank[],
441                   int        order[])
442 {
443   int *sub_rank, *rank_dist;
444 
445   PLE_MALLOC(sub_rank, n_comm_ranks, int);
446   PLE_MALLOC(rank_dist, n_comm_ranks, int);
447 
448   /* Compute ordering ("distance") key */
449 
450   for (int i = 0; i < n_comm_ranks; i++) {
451     sub_rank[i] = comm_rank[i];
452     rank_dist[i] = 0;
453   }
454 
455   int l_r_id = rank_id;
456   int m_r_id = (n_ranks+1) / 2;
457 
458   while (true) {
459 
460     if (l_r_id < m_r_id) {
461       for (int i = 0; i < n_comm_ranks; i++) {
462         if (sub_rank[i] >= m_r_id) {
463           rank_dist[i] = rank_dist[i]*2 + 1;
464           sub_rank[i] -= m_r_id;
465         }
466         else
467           rank_dist[i] = rank_dist[i]*2;
468       }
469     }
470     else {
471       for (int i = 0; i < n_comm_ranks; i++) {
472         if (sub_rank[i] >= m_r_id) {
473           rank_dist[i] = rank_dist[i]*2;
474           sub_rank[i] -= m_r_id;
475         }
476         else
477           rank_dist[i] = rank_dist[i]*2 + 1;
478       }
479       l_r_id -= m_r_id;
480     }
481 
482     if (m_r_id < 2)
483       break;
484 
485     m_r_id = (m_r_id+1) / 2;
486 
487   }
488 
489   /* Order results */
490 
491   _order_int(rank_dist, order, n_comm_ranks);
492 
493   PLE_FREE(sub_rank);
494   PLE_FREE(rank_dist);
495 }
496 
497 #endif /* defined(PLE_HAVE_MPI) */
498 
499 /*----------------------------------------------------------------------------
500  * Test if two extents intersect
501  *
502  * parameters:
503  *   dim             <-- spatial (coordinates) dimension
504  *   extents_1       <-- extents: x_min, y_min, ..., x_max, y_max, ...
505  *                       size: dim*2
506  *   extents_2       <-- extents: x_min, y_min, ..., x_max, y_max, ...
507  *                       size: dim*2
508  *
509  * returns:
510  *   true if extents intersect, false otherwise
511  *----------------------------------------------------------------------------*/
512 
513 inline static _Bool
_intersect_extents(int dim,const double extents_1[],const double extents_2[])514 _intersect_extents(int           dim,
515                    const double  extents_1[],
516                    const double  extents_2[])
517 {
518   int i;
519   _Bool retval = true;
520 
521   for (i = 0; i < dim; i++) {
522     if (   (extents_1[i] > extents_2[i + dim])
523         || (extents_2[i] > extents_1[i + dim])) {
524       retval = false;
525       break;
526     }
527   }
528 
529   return retval;
530 }
531 
532 /*----------------------------------------------------------------------------
533  * Test if a point is within given extents
534  *
535  * parameters:
536  *   dim             <-- spatial (coordinates) dimension
537  *   coords          <-- coordinates: x, y, ...
538  *                       size: dim
539  *   extents         <-- extents: x_min, y_min, ..., x_max, y_max, ...
540  *                       size: dim*2
541  *
542  * returns:
543  *   true if point lies within extents, false otherwise
544  *----------------------------------------------------------------------------*/
545 
546 inline static _Bool
_within_extents(int dim,const ple_coord_t coords[],const double extents[])547 _within_extents(int                dim,
548                 const ple_coord_t  coords[],
549                 const double       extents[])
550 {
551   int i;
552   _Bool retval = true;
553 
554   for (i = 0; i < dim; i++) {
555     if (   (coords[i] < extents[i])
556         || (coords[i] > extents[i + dim])) {
557       retval = false;
558       break;
559     }
560   }
561 
562   return retval;
563 }
564 
565 /*----------------------------------------------------------------------------
566  * Compute extents of a point set
567  *
568  * parameters:
569  *   dim             <-- space dimension of points to locate
570  *   point_list_base <-- base numbering for point list
571  *   n_points        <-- number of points to locate
572  *   point_list      <-- optional indirection array to point_coords
573  *   point_coords    <-- coordinates of points to locate
574  *                       (dimension: dim * n_points)
575  *   location        <-- existing location_information, or NULL
576  *   extents         --> extents associated with mesh:
577  *                       x_min, y_min, ..., x_max, y_max, ... (size: 2*dim)
578  *----------------------------------------------------------------------------*/
579 
580 static void
_point_extents(int dim,ple_lnum_t point_list_base,ple_lnum_t n_points,const ple_lnum_t point_list[],const ple_coord_t point_coords[],const ple_lnum_t location[],double extents[])581 _point_extents(int                  dim,
582                ple_lnum_t           point_list_base,
583                ple_lnum_t           n_points,
584                const ple_lnum_t     point_list[],
585                const ple_coord_t    point_coords[],
586                const ple_lnum_t     location[],
587                double               extents[])
588 {
589   int i;
590   ple_lnum_t j, coord_idx;
591 
592   /* initialize extents in case mesh is empty or dim < 3 */
593   for (i = 0; i < dim; i++) {
594     extents[i]       =  HUGE_VAL;
595     extents[i + dim] = -HUGE_VAL;
596   }
597 
598   /* Compute extents */
599 
600   if (location != NULL) {
601 
602     if (point_list != NULL) {
603 
604       for (j = 0; j < n_points; j++) {
605         coord_idx = point_list[j] - point_list_base;
606         for (i = 0; i < dim; i++) {
607           if (extents[i]       > point_coords[(coord_idx * dim) + i])
608             extents[i]       = point_coords[(coord_idx * dim) + i];
609           if (extents[i + dim] < point_coords[(coord_idx * dim) + i])
610             extents[i + dim] = point_coords[(coord_idx * dim) + i];
611         }
612       }
613     }
614 
615     else {
616 
617       for (coord_idx = 0; coord_idx < n_points; coord_idx++) {
618         for (i = 0; i < dim; i++) {
619           if (extents[i]       > point_coords[(coord_idx * dim) + i])
620             extents[i]       = point_coords[(coord_idx * dim) + i];
621           if (extents[i + dim] < point_coords[(coord_idx * dim) + i])
622             extents[i + dim] = point_coords[(coord_idx * dim) + i];
623         }
624       }
625     }
626 
627   }
628   else {
629 
630     if (point_list != NULL) {
631 
632       for (j = 0; j < n_points; j++) {
633         coord_idx = point_list[j] - point_list_base;
634         for (i = 0; i < dim; i++) {
635           if (extents[i]       > point_coords[(coord_idx * dim) + i])
636             extents[i]       = point_coords[(coord_idx * dim) + i];
637           if (extents[i + dim] < point_coords[(coord_idx * dim) + i])
638             extents[i + dim] = point_coords[(coord_idx * dim) + i];
639         }
640       }
641     }
642 
643     else {
644 
645       for (j = 0; j < n_points; j++) {
646         for (i = 0; i < dim; i++) {
647           if (extents[i]       > point_coords[(j * dim) + i])
648             extents[i]       = point_coords[(j * dim) + i];
649           if (extents[i + dim] < point_coords[(j * dim) + i])
650             extents[i + dim] = point_coords[(j * dim) + i];
651         }
652       }
653     }
654 
655   }
656 
657 }
658 
659 /*----------------------------------------------------------------------------
660  * Clear previous location information.
661  *
662  * parameters:
663  *   this_locator      <-> pointer to locator structure
664  *----------------------------------------------------------------------------*/
665 
666 static void
_clear_location_info(ple_locator_t * this_locator)667 _clear_location_info(ple_locator_t  *this_locator)
668 {
669   this_locator->n_intersects = 0;
670 
671   PLE_FREE(this_locator->intersect_rank);
672 
673   PLE_FREE(this_locator->local_points_idx);
674   PLE_FREE(this_locator->distant_points_idx);
675 
676   PLE_FREE(this_locator->local_point_ids);
677 
678   PLE_FREE(this_locator->distant_point_location);
679   PLE_FREE(this_locator->distant_point_coords);
680 
681   PLE_FREE(this_locator->interior_list);
682   PLE_FREE(this_locator->exterior_list);
683 }
684 
685 #if defined(PLE_HAVE_MPI)
686 
687 /*----------------------------------------------------------------------------
688  * Update intersection rank information once location is done.
689  *
690  * parameters:
691  *   this_locator      <-> pointer to locator structure
692  *   n_points          <-- number of points to locate
693  *   location_rank_id  <-> rank on which a point is located in, id
694  *                         in updated locator intersection rank info out
695  *----------------------------------------------------------------------------*/
696 
697 static void
_location_ranks(ple_locator_t * this_locator,ple_lnum_t n_points,ple_lnum_t location_rank_id[])698 _location_ranks(ple_locator_t  *this_locator,
699                 ple_lnum_t      n_points,
700                 ple_lnum_t      location_rank_id[])
701 {
702   int i, k;
703   ple_lnum_t j;
704 
705   int loc_vals[2], max_vals[2];
706   int comm_size;
707   int *send_flag = NULL, *recv_flag = NULL, *intersect_rank_id = NULL;
708 
709   double comm_timing[4] = {0., 0., 0., 0.};
710 
711   /* Initialization */
712 
713   MPI_Comm_size(this_locator->comm, &comm_size);
714 
715   PLE_MALLOC(send_flag, comm_size, int);
716   PLE_MALLOC(recv_flag, comm_size, int);
717 
718   for (i = 0; i < comm_size; i++)
719     send_flag[i] = 0;
720 
721   for (j = 0; j < n_points; j++) {
722     if (location_rank_id[j] > -1)
723       send_flag[location_rank_id[j]] = 1;
724   }
725 
726   /* As exchange detection is asymetric, synchronize it */
727 
728   _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
729 
730   MPI_Alltoall(send_flag, 1, MPI_INT, recv_flag, 1, MPI_INT,
731                this_locator->comm);
732 
733   _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
734 
735   /* Update number of "intersects" and matching rank info */
736 
737   this_locator->n_intersects = 0;
738 
739   for (i = 0; i < this_locator->n_ranks; i++) {
740     j = i + this_locator->start_rank;
741     if (send_flag[j] == 1 || recv_flag[j] == 1)
742       this_locator->n_intersects++;
743   }
744 
745   PLE_REALLOC(this_locator->intersect_rank,
746               this_locator->n_intersects,
747               int);
748 
749   for (i = 0, k = 0; i < this_locator->n_ranks; i++) {
750     j = i + this_locator->start_rank;
751     if (send_flag[j] == 1 || recv_flag[j] == 1)
752       this_locator->intersect_rank[k++] = j;
753   }
754 
755   if (this_locator->locate_algorithm == _LOCATE_BB_SENDRECV_ORDERED) {
756     int rank_id;
757     MPI_Comm_rank(this_locator->comm, &rank_id);
758     PLE_REALLOC(this_locator->comm_order, this_locator->n_intersects, int);
759     _order_comm_ranks(rank_id,
760                       comm_size,
761                       this_locator->n_intersects,
762                       this_locator->intersect_rank,
763                       this_locator->comm_order);
764   }
765 
766   PLE_FREE(send_flag);
767   PLE_FREE(recv_flag);
768 
769   /* Now convert location rank id to intersect rank */
770 
771   PLE_MALLOC(intersect_rank_id, this_locator->n_ranks, int);
772 
773   for (i = 0; i < this_locator->n_ranks; i++)
774     intersect_rank_id[i] = -1;
775 
776   for (i = 0; i < this_locator->n_intersects; i++) {
777     intersect_rank_id[  this_locator->intersect_rank[i]
778                       - this_locator->start_rank] = i;
779   }
780 
781   for (j = 0; j < n_points; j++) {
782     k = location_rank_id[j] - this_locator->start_rank;
783     if (k > -1)
784       location_rank_id[j] = intersect_rank_id[k];
785   }
786 
787   PLE_FREE(intersect_rank_id);
788 
789   _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
790 
791   loc_vals[0] = this_locator->n_intersects;
792   loc_vals[1] = _ple_locator_async_threshold;
793 
794   MPI_Allreduce(loc_vals, max_vals, 2, MPI_INT, MPI_MAX,
795                 this_locator->comm);
796 
797   if (max_vals[0] <= max_vals[1])
798     this_locator->exchange_algorithm = _EXCHANGE_ISEND_IRECV;
799   else
800     this_locator->exchange_algorithm = _EXCHANGE_SENDRECV;
801 
802   _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
803 
804   this_locator->location_wtime[1] += comm_timing[0];
805   this_locator->location_cpu_time[1] += comm_timing[1];
806 }
807 
808 /*----------------------------------------------------------------------------
809  * Determine or update possibly intersecting ranks for unlocated elements,
810  * in parallel.
811  *
812  * parameters:
813  *   this_locator       <-- pointer to locator structure
814  *   mesh               <-- pointer to mesh representation structure
815  *   tolerance_base     <-- associated fixed tolerance
816  *   tolerance_fraction <-- associated fraction of element bounding
817  *                          boxes added to tolerance
818  *   n_points           <-- number of points to locate
819  *   point_list         <-- optional indirection array to point_coords
820  *   point_coords       <-- coordinates of points to locate
821  *                          (dimension: dim * n_points)
822  *   location           <-> number of distant element containing or closest
823  *                          to each point, or -1 (size: n_points)
824  *   mesh_extents_f     <-- pointer to function computing mesh or mesh
825  *                          subset or element extents
826  *
827  * returns:
828  *   local rank intersection info
829  *----------------------------------------------------------------------------*/
830 
831 static _rank_intersects_t
_intersects_distant(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const ple_coord_t point_coords[],const ple_lnum_t location[],ple_mesh_extents_t * mesh_extents_f)832 _intersects_distant(ple_locator_t       *this_locator,
833                     const void          *mesh,
834                     float                tolerance_base,
835                     float                tolerance_fraction,
836                     ple_lnum_t           n_points,
837                     const ple_lnum_t     point_list[],
838                     const ple_coord_t    point_coords[],
839                     const ple_lnum_t     location[],
840                     ple_mesh_extents_t  *mesh_extents_f)
841 {
842   int stride2;
843   double extents[12];
844 
845   int j;
846   int stride4;
847   int comm_rank, comm_size;
848   int n_intersects;
849   int  *intersect_rank;
850   double *recvbuf;
851 
852   double comm_timing[4] = {0., 0., 0., 0.};
853   const int dim = this_locator->dim;
854 
855   _rank_intersects_t intersects;
856 
857   /* Update intersects */
858 
859   intersects.n = 0;
860 
861   /* initialize mesh extents in case mesh is empty or dim < 3 */
862   for (int i = 0; i < dim; i++) {
863     extents[i]       =  HUGE_VAL;
864     extents[i + dim] = -HUGE_VAL;
865   }
866 
867   mesh_extents_f(mesh,
868                  1,
869                  tolerance_fraction,
870                  extents);
871 
872   _point_extents(dim,
873                  this_locator->point_id_base,
874                  n_points,
875                  point_list,
876                  point_coords,
877                  location,
878                  extents + 2*dim);
879 
880   for (int i = 0; i < dim; i++) {
881 
882     if (extents[i] > -HUGE_VAL + tolerance_base)
883       extents[i]         -= tolerance_base;
884     else
885       extents[i] = -HUGE_VAL;
886     if (extents[i] < HUGE_VAL - tolerance_base)
887       extents[i +   dim] += tolerance_base;
888     else
889       extents[i +   dim] = HUGE_VAL;
890 
891     if (extents[i + 2*dim] > -HUGE_VAL + tolerance_base)
892       extents[i + 2*dim] -= tolerance_base;
893     else
894       extents[i + 2*dim] = -HUGE_VAL;
895     if (extents[i + 3*dim] < HUGE_VAL - tolerance_base)
896       extents[i + 3*dim] += tolerance_base;
897     else
898       extents[i + 3*dim] = HUGE_VAL;
899 
900   }
901 
902   /* Exchange extent information */
903 
904   MPI_Comm_rank(this_locator->comm, &comm_rank);
905   MPI_Comm_size(this_locator->comm, &comm_size);
906 
907   stride2 = dim * 2; /* Stride for one type of extent */
908   stride4 = dim * 4; /* Stride for element and vertex
909                         extents, end-to-end */
910 
911   PLE_MALLOC(recvbuf, stride4*comm_size, double);
912 
913   _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
914 
915   MPI_Allgather(extents, stride4, MPI_DOUBLE, recvbuf, stride4, MPI_DOUBLE,
916                 this_locator->comm);
917 
918   _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
919 
920   /* Count and mark possible overlaps */
921 
922   n_intersects = 0;
923   PLE_MALLOC(intersect_rank, this_locator->n_ranks, int);
924 
925   for (int i = 0; i < this_locator->n_ranks; i++) {
926     j = this_locator->start_rank + i;
927     if (  (_intersect_extents(dim,
928                               extents + (2*dim),
929                               recvbuf + (j*stride4)) == true)
930         || (_intersect_extents(dim,
931                                extents,
932                                recvbuf + (j*stride4) + (2*dim)) == true)) {
933       intersect_rank[n_intersects] = j;
934       n_intersects += 1;
935     }
936   }
937 
938   intersects.n = n_intersects;
939   PLE_MALLOC(intersects.rank, intersects.n, int);
940   PLE_MALLOC(intersects.extents, intersects.n * stride2, double);
941 
942   for (int i = 0; i < intersects.n; i++) {
943 
944     intersects.rank[i] = intersect_rank[i];
945 
946     /* Copy only distant element (and not point) extents */
947 
948     for (j = 0; j < stride2; j++)
949       intersects.extents[i*stride2 + j]
950         = recvbuf[intersect_rank[i]*stride4 + j];
951 
952   }
953 
954   /* Free temporary memory */
955 
956   PLE_FREE(intersect_rank);
957   PLE_FREE(recvbuf);
958 
959   /* Finalize timing */
960 
961   this_locator->location_wtime[1] += comm_timing[0];
962   this_locator->location_cpu_time[1] += comm_timing[1];
963 
964   return intersects;
965 }
966 
967 /*----------------------------------------------------------------------------
968  * Initialize location information from previous locator info,
969  * in parallel mode.
970  *
971  * parameters:
972  *   this_locator     <-> pointer to locator structure
973  *   n_points         <-- number of points to locate
974  *   location         --> number of distant element containing or closest
975  *                        to each point, or -1 (size: n_points)
976  *   location_rank_id --> rank id for distant element containing or closest
977  *                        to each point, or -1
978  *----------------------------------------------------------------------------*/
979 
980 static void
_transfer_location_distant(ple_locator_t * this_locator,ple_lnum_t n_points,ple_lnum_t location[],ple_lnum_t location_rank_id[])981 _transfer_location_distant(ple_locator_t  *this_locator,
982                            ple_lnum_t      n_points,
983                            ple_lnum_t      location[],
984                            ple_lnum_t      location_rank_id[])
985 {
986   int dist_rank;
987   ple_lnum_t j, k, n_points_loc, n_points_dist, dist_v_idx;
988 
989   double comm_timing[4] = {0., 0., 0., 0.};
990 
991   const ple_lnum_t idb = this_locator->point_id_base;
992   const ple_lnum_t *_interior_list = this_locator->interior_list;
993 
994   /* Initialize locations */
995 
996   for (j = 0; j < n_points; j++) {
997     location[j] = -1;
998     location_rank_id[j] = -1;
999   }
1000 
1001   /* Update with existing location info *
1002      exact location info is not necessary, so not determined;
1003      all that is necessary is to mark located points as such */
1004 
1005   for (int li = 0; li < this_locator->n_intersects; li++) {
1006 
1007     int i = (this_locator->comm_order != NULL) ?
1008       this_locator->comm_order[li] : li;
1009 
1010     MPI_Status status;
1011     ple_lnum_t *loc_v_buf, *dist_v_ptr;
1012     const ple_lnum_t *_local_point_ids
1013       = this_locator->local_point_ids + this_locator->local_points_idx[i];
1014 
1015     dist_rank = this_locator->intersect_rank[i];
1016 
1017     n_points_loc =    this_locator->local_points_idx[i+1]
1018                     - this_locator->local_points_idx[i];
1019 
1020     n_points_dist =   this_locator->distant_points_idx[i+1]
1021                     - this_locator->distant_points_idx[i];
1022 
1023     dist_v_idx = this_locator->distant_points_idx[i];
1024 
1025     PLE_MALLOC(loc_v_buf, n_points_loc, ple_lnum_t);
1026 
1027     /* Exchange information */
1028 
1029     dist_v_ptr = this_locator->distant_point_location + dist_v_idx;
1030 
1031     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1032 
1033     MPI_Sendrecv(dist_v_ptr, n_points_dist, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1034                  loc_v_buf, n_points_loc, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1035                  this_locator->comm, &status);
1036 
1037     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1038 
1039     for (k = 0; k < n_points_loc; k++) {
1040       ple_lnum_t pt_id = _interior_list[_local_point_ids[k]] - idb;
1041       location[pt_id] = loc_v_buf[k];
1042       location_rank_id[pt_id] = dist_rank;
1043     }
1044 
1045     PLE_FREE(loc_v_buf);
1046   } /* End of loop on MPI ranks */
1047 
1048   this_locator->n_intersects = 0;
1049   PLE_FREE(this_locator->intersect_rank);
1050   PLE_FREE(this_locator->comm_order);
1051   PLE_FREE(this_locator->local_points_idx);
1052   PLE_FREE(this_locator->distant_points_idx);
1053   PLE_FREE(this_locator->local_point_ids);
1054   PLE_FREE(this_locator->distant_point_location);
1055   PLE_FREE(this_locator->distant_point_coords);
1056 
1057   this_locator->n_interior = 0;
1058   this_locator->n_exterior = 0;
1059   PLE_FREE(this_locator->interior_list);
1060   PLE_FREE(this_locator->exterior_list);
1061 }
1062 
1063 /*----------------------------------------------------------------------------
1064  * Location of points not yet located on the closest elements.
1065  *
1066  * parameters:
1067  *   this_locator       <-> pointer to locator structure
1068  *   mesh               <-- pointer to mesh representation structure
1069  *   tolerance_base     <-- associated fixed tolerance
1070  *   tolerance_fraction <-- associated fraction of element bounding
1071  *                          boxes added to tolerance
1072  *   n_points           <-- number of points to locate
1073  *   point_list         <-- optional indirection array to point_coords
1074  *   point_tag          <-- optional point tag (size: n_points)
1075  *   point_coords       <-- coordinates of points to locate
1076  *                          (dimension: dim * n_points)
1077  *   location           <-> number of distant element containing or closest
1078  *                          to each point, or -1 (size: n_points)
1079  *   location_rank_id   <-> rank id for distant element containing or closest
1080  *                          to each point, or -1
1081  *   distance           <-> distance from point to element indicated by
1082  *                          location[]: < 0 if unlocated, 0 - 1 if inside,
1083  *                          > 1 if outside (size: n_points)
1084  *   mesh_extents_f     <-- function computing mesh or mesh subset extents
1085  *   mesh_locate_f      <-- function locating the points on local elements
1086  *----------------------------------------------------------------------------*/
1087 
1088 static void
_locate_distant(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],ple_lnum_t location[],ple_lnum_t location_rank_id[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)1089 _locate_distant(ple_locator_t               *this_locator,
1090                 const void                  *mesh,
1091                 float                        tolerance_base,
1092                 float                        tolerance_fraction,
1093                 ple_lnum_t                   n_points,
1094                 const ple_lnum_t             point_list[],
1095                 const int                    point_tag[],
1096                 const ple_coord_t            point_coords[],
1097                 ple_lnum_t                   location[],
1098                 ple_lnum_t                   location_rank_id[],
1099                 float                        distance[],
1100                 ple_mesh_extents_t          *mesh_extents_f,
1101                 ple_mesh_elements_locate_t  *mesh_locate_f)
1102 {
1103   int k;
1104   int dist_rank, dist_index;
1105   ple_lnum_t j;
1106   ple_lnum_t n_coords_loc, n_coords_dist;
1107   ple_lnum_t *location_loc, *location_dist;
1108   int *tag_dist, *send_tag;
1109   ple_coord_t *coords_dist, *send_coords;
1110   float *distance_dist, *distance_loc;
1111 
1112   _rank_intersects_t intersects;
1113   MPI_Status status;
1114 
1115   ple_lnum_t _n_points = 0;
1116   double extents[6];
1117 
1118   double comm_timing[4] = {0., 0., 0., 0.};
1119 
1120   ple_lnum_t *_point_list = NULL, *_point_id = NULL, *send_id = NULL;
1121   const ple_lnum_t *_point_list_p = NULL;
1122 
1123   const int dim = this_locator->dim;
1124   const int stride = dim * 2;
1125   const int have_tags = this_locator->have_tags;
1126   const ple_lnum_t idb = this_locator->point_id_base;
1127 
1128   /* Filter non-located points */
1129 
1130   _n_points = 0;
1131   _point_list_p = point_list;
1132 
1133   for (j = 0; j < n_points; j++) {
1134     if (location[j] < 0)
1135       _n_points++;
1136   }
1137 
1138   if (_n_points < n_points) {
1139 
1140     PLE_MALLOC(_point_list, _n_points, ple_lnum_t);
1141     _point_list_p = _point_list;
1142 
1143     _n_points = 0;
1144     if (point_list == NULL) {
1145       _point_id = _point_list;
1146       for (j = 0; j < n_points; j++) {
1147         if (location[j] < 0)
1148           _point_list[_n_points++] = j + idb;
1149       }
1150     }
1151     else {
1152       PLE_MALLOC(_point_id, _n_points, ple_lnum_t);
1153       for (j = 0; j < n_points; j++) {
1154         if (location[j] < 0) {
1155           _point_list[_n_points] = point_list[j];
1156           _point_id[_n_points] = j + idb;
1157           _n_points++;
1158         }
1159       }
1160     }
1161 
1162   }
1163 
1164   /* Update intersect for current point list */
1165 
1166   intersects = _intersects_distant(this_locator,
1167                                    mesh,
1168                                    tolerance_base,
1169                                    tolerance_fraction,
1170                                    _n_points,
1171                                    _point_list_p,
1172                                    point_coords,
1173                                    location,
1174                                    mesh_extents_f);
1175 
1176   /* Allocate buffers */
1177 
1178   PLE_MALLOC(send_coords, _n_points * dim, ple_coord_t);
1179   PLE_MALLOC(send_id, _n_points, ple_lnum_t);
1180 
1181   if (have_tags)
1182     PLE_MALLOC(send_tag, _n_points, int);
1183   else
1184     send_tag = NULL;
1185 
1186   int *comm_order = NULL;
1187   if (this_locator->locate_algorithm == _LOCATE_BB_SENDRECV_ORDERED) {
1188     int comm_size, rank_id;
1189     MPI_Comm_size(this_locator->comm, &comm_size);
1190     MPI_Comm_rank(this_locator->comm, &rank_id);
1191     PLE_MALLOC(comm_order, intersects.n, int);
1192     _order_comm_ranks(rank_id,
1193                       comm_size,
1194                       intersects.n,
1195                       intersects.rank,
1196                       comm_order);
1197   }
1198 
1199   /* First loop on possibly intersecting distant ranks */
1200   /*---------------------------------------------------*/
1201 
1202   for (int li = 0; li < intersects.n; li++) {
1203 
1204     int i = (comm_order != NULL) ?
1205       comm_order[li] : li;
1206 
1207     dist_index = i; /* Ordering (communication schema) not yet optimized */
1208     dist_rank  = intersects.rank[dist_index];
1209 
1210     /* Prepare and send coords that should fit in each send buffer */
1211     /* Reset buffers for current intersect rank */
1212 
1213     n_coords_loc = 0;
1214 
1215     for (k = 0; k < stride; k++)
1216       extents[k] = intersects.extents[dist_index*stride + k];
1217 
1218     /* Build partial buffer */
1219 
1220     for (j = 0; j < _n_points; j++) {
1221 
1222       ple_lnum_t coord_idx;
1223 
1224       if (_point_list_p != NULL)
1225         coord_idx = _point_list_p[j] - idb;
1226       else
1227         coord_idx = j;
1228 
1229       if (_within_extents(dim,
1230                           &(point_coords[dim*coord_idx]),
1231                           extents) == true) {
1232 
1233         if (_point_id != NULL)
1234           send_id[n_coords_loc] = _point_id[j] -idb;
1235         else
1236           send_id[n_coords_loc] = j;
1237 
1238         for (k = 0; k < dim; k++)
1239           send_coords[n_coords_loc*dim + k] = point_coords[dim*coord_idx + k];
1240 
1241         if (have_tags)
1242           send_tag[n_coords_loc] = point_tag[j];
1243 
1244         n_coords_loc += 1;
1245       }
1246 
1247     }
1248 
1249     /* Send then receive partial buffer */
1250 
1251     dist_rank = intersects.rank[dist_index];
1252 
1253     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1254 
1255     MPI_Sendrecv(&n_coords_loc, 1, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1256                  &n_coords_dist, 1, PLE_MPI_LNUM, dist_rank,
1257                  PLE_MPI_TAG, this_locator->comm, &status);
1258 
1259     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1260 
1261     PLE_MALLOC(coords_dist, n_coords_dist*dim, ple_coord_t);
1262     if (have_tags)
1263       PLE_MALLOC(tag_dist, n_coords_dist, int);
1264     else
1265       tag_dist = NULL;
1266 
1267     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1268 
1269     MPI_Sendrecv(send_coords, (int)(n_coords_loc*dim),
1270                  PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1271                  coords_dist, (int)(n_coords_dist*dim),
1272                  PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1273                  this_locator->comm, &status);
1274 
1275     if (have_tags)
1276       MPI_Sendrecv(send_tag, (int)(n_coords_loc),
1277                    MPI_INT, dist_rank, PLE_MPI_TAG,
1278                    tag_dist, (int)(n_coords_dist),
1279                    MPI_INT, dist_rank, PLE_MPI_TAG,
1280                    this_locator->comm, &status);
1281 
1282     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1283 
1284     /* Now locate received coords on local rank */
1285 
1286     PLE_MALLOC(location_dist, n_coords_dist, ple_lnum_t);
1287     PLE_MALLOC(distance_dist, n_coords_dist, float);
1288 
1289     for (j = 0; j < n_coords_dist; j++) {
1290       location_dist[j] = -1;
1291       distance_dist[j] = -1.0;
1292     }
1293 
1294     mesh_locate_f(mesh,
1295                   tolerance_base,
1296                   tolerance_fraction,
1297                   n_coords_dist,
1298                   coords_dist,
1299                   tag_dist,
1300                   location_dist,
1301                   distance_dist);
1302 
1303     PLE_FREE(tag_dist);
1304     PLE_FREE(coords_dist);
1305 
1306     /* Exchange location return information with distant rank */
1307 
1308     PLE_MALLOC(location_loc, n_coords_loc, ple_lnum_t);
1309     PLE_MALLOC(distance_loc, n_coords_loc, float);
1310 
1311     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1312 
1313     MPI_Sendrecv(location_dist, (int)n_coords_dist,
1314                  PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1315                  location_loc, (int)n_coords_loc,
1316                  PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1317                  this_locator->comm, &status);
1318 
1319     MPI_Sendrecv(distance_dist, (int)n_coords_dist,
1320                  MPI_FLOAT, dist_rank, PLE_MPI_TAG,
1321                  distance_loc, (int)n_coords_loc,
1322                  MPI_FLOAT, dist_rank, PLE_MPI_TAG,
1323                  this_locator->comm, &status);
1324 
1325     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1326 
1327     PLE_FREE(location_dist);
1328     PLE_FREE(distance_dist);
1329 
1330     /* Now update location information */
1331 
1332     for (j = 0; j < n_coords_loc; j++) {
1333 
1334       ple_lnum_t l = send_id[j];
1335 
1336       if (   (distance_loc[j] > -0.1)
1337           && (distance_loc[j] < distance[l] || location[l] < 0)) {
1338         location_rank_id[l] = dist_rank;
1339         location[l] = location_loc[j];
1340         distance[l] = distance_loc[j];
1341       }
1342 
1343     }
1344 
1345     PLE_FREE(location_loc);
1346     PLE_FREE(distance_loc);
1347 
1348   }
1349 
1350   PLE_FREE(comm_order);
1351 
1352   /* Free temporary arrays */
1353 
1354   PLE_FREE(send_id);
1355   PLE_FREE(send_tag);
1356   PLE_FREE(send_coords);
1357 
1358   if (_point_list != point_list) {
1359     if (_point_id != _point_list)
1360       PLE_FREE(_point_id);
1361     PLE_FREE(_point_list);
1362   }
1363 
1364   PLE_FREE(intersects.rank);
1365   PLE_FREE(intersects.extents);
1366 
1367   /* Finalize timing */
1368 
1369   this_locator->location_wtime[1] += comm_timing[0];
1370   this_locator->location_cpu_time[1] += comm_timing[1];
1371 }
1372 
1373 /*----------------------------------------------------------------------------
1374  * Prepare locator for use with a given mesh representation and point set.
1375  *
1376  * parameters:
1377  *   this_locator      <-> pointer to locator structure
1378  *   mesh              <-- pointer to mesh representation structure
1379  *   dim               <-- spatial dimension
1380  *   n_points          <-- number of points to locate
1381  *   point_list        <-- optional indirection array to point_coords
1382  *   point_tag         <-- optional point tag (size: n_points)
1383  *   point_coords      <-- coordinates of points to locate
1384  *                         (dimension: dim * n_points)
1385  *   location          <-> number of distant element containing or closest
1386  *                          to each point, or -1 (size: n_points)
1387  *   location_rank_id  <-> rank id for distant element containing or closest
1388  *                          to each point, or -1
1389  *   distance          --> optional distance from point to matching element:
1390  *                         < 0 if unlocated; 0 - 1 if inside and > 1 if
1391  *                         outside a volume element, or absolute distance
1392  *                         to a surface element (size: n_points)
1393  *   mesh_extents_f    <-- pointer to function computing mesh or mesh
1394  *                         subset or element extents
1395  *   mesh_locate_f     <-- function locating points in or on elements
1396  *----------------------------------------------------------------------------*/
1397 
1398 static void
_locate_all_distant(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],ple_lnum_t location[],ple_lnum_t location_rank_id[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)1399 _locate_all_distant(ple_locator_t               *this_locator,
1400                     const void                  *mesh,
1401                     float                        tolerance_base,
1402                     float                        tolerance_fraction,
1403                     ple_lnum_t                   n_points,
1404                     const ple_lnum_t             point_list[],
1405                     const int                    point_tag[],
1406                     const ple_coord_t            point_coords[],
1407                     ple_lnum_t                   location[],
1408                     ple_lnum_t                   location_rank_id[],
1409                     float                        distance[],
1410                     ple_mesh_extents_t          *mesh_extents_f,
1411                     ple_mesh_elements_locate_t  *mesh_locate_f)
1412 {
1413   int k;
1414   int dist_rank;
1415   ple_lnum_t j;
1416   ple_lnum_t n_coords_loc, n_coords_dist, n_interior, n_exterior;
1417   ple_lnum_t coord_idx, start_idx;
1418   ple_lnum_t *send_id, *send_location;
1419   ple_lnum_t *location_count, *location_shift;
1420   float *_distance = distance;
1421 
1422   MPI_Status status;
1423 
1424   double comm_timing[4] = {0., 0., 0., 0.};
1425   const int dim = this_locator->dim;
1426   const ple_lnum_t idb = this_locator->point_id_base;
1427 
1428   if (distance == NULL) {
1429     PLE_MALLOC(_distance, n_points, float);
1430     for (j = 0; j < n_points; j++)
1431       _distance[j] = -1.0;
1432   }
1433 
1434   /* Now update locations */
1435 
1436   _locate_distant(this_locator,
1437                   mesh,
1438                   tolerance_base,
1439                   tolerance_fraction,
1440                   n_points,
1441                   point_list,
1442                   point_tag,
1443                   point_coords,
1444                   location,
1445                   location_rank_id,
1446                   _distance,
1447                   mesh_extents_f,
1448                   mesh_locate_f);
1449 
1450   /* Update info on communicating ranks and matching ids */
1451   /*----------------------------------------------------*/
1452 
1453   _location_ranks(this_locator,
1454                   n_points,
1455                   location_rank_id);
1456 
1457   /* Reorganize location information */
1458   /*---------------------------------*/
1459 
1460   /* Now that location is done, the location[] array contains
1461      either -1 if a point was not located, or a local index
1462      (associated with the corresponding rank); the distance[] array
1463      is not needed anymore now that all comparisons have been done */
1464 
1465   if (_distance != distance)
1466     PLE_FREE(_distance);
1467 
1468   PLE_MALLOC(location_shift, this_locator->n_intersects, ple_lnum_t);
1469   PLE_MALLOC(location_count, this_locator->n_intersects, ple_lnum_t);
1470 
1471   for (int i = 0; i < this_locator->n_intersects; i++)
1472     location_count[i] = 0;
1473 
1474   n_exterior = 0;
1475   for (j = 0; j < n_points; j++) {
1476     if (location_rank_id[j] > -1)
1477       location_count[location_rank_id[j]] += 1;
1478     else
1479       n_exterior += 1;
1480   }
1481 
1482   this_locator->n_interior = n_points - n_exterior;
1483   PLE_MALLOC(this_locator->interior_list, this_locator->n_interior, ple_lnum_t);
1484 
1485   this_locator->n_exterior = n_exterior;
1486   PLE_MALLOC(this_locator->exterior_list, this_locator->n_exterior, ple_lnum_t);
1487 
1488   if (this_locator->n_intersects > 0)
1489     location_shift[0] = 0;
1490   for (int i = 1; i < this_locator->n_intersects; i++)
1491     location_shift[i] = location_shift[i-1] + location_count[i-1];
1492 
1493   for (int i = 0; i < this_locator->n_intersects; i++)
1494     location_count[i] = 0;
1495 
1496   PLE_MALLOC(send_id, n_points, ple_lnum_t);
1497   PLE_MALLOC(send_location, n_points, ple_lnum_t);
1498 
1499   /* send_id[] will now contain information for all blocks */
1500   for (j = 0; j < n_points; j++)
1501     send_id[j] = -1;
1502 
1503   n_interior = 0;
1504   n_exterior = 0;
1505   for (j = 0; j < n_points; j++) {
1506     const int l_rank = location_rank_id[j];
1507     if (l_rank > -1) {
1508       send_id[location_shift[l_rank] + location_count[l_rank]] = j;
1509       location_count[l_rank] += 1;
1510       this_locator->interior_list[n_interior] = j + idb;
1511       n_interior += 1;
1512     }
1513     else {
1514       this_locator->exterior_list[n_exterior] = j + idb;
1515       n_exterior += 1;
1516     }
1517   }
1518 
1519   /* Second loop on possibly intersecting distant ranks */
1520   /*----------------------------------------------------*/
1521 
1522   /* Count and organize total number of local and distant points */
1523 
1524   PLE_REALLOC(this_locator->local_points_idx,
1525               this_locator->n_intersects + 1,
1526               ple_lnum_t);
1527 
1528   PLE_REALLOC(this_locator->distant_points_idx,
1529               this_locator->n_intersects + 1,
1530               ple_lnum_t);
1531 
1532   this_locator->local_points_idx[0] = 0;
1533   this_locator->distant_points_idx[0] = 0;
1534 
1535   for (int li = 0; li < this_locator->n_intersects; li++) {
1536 
1537     int i = (this_locator->comm_order != NULL) ?
1538       this_locator->comm_order[li] : li;
1539 
1540     dist_rank = this_locator->intersect_rank[i];
1541 
1542     n_coords_loc = location_count[i];
1543 
1544     this_locator->local_points_idx[i+1] = n_coords_loc;
1545 
1546     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1547 
1548     MPI_Sendrecv(&n_coords_loc, 1, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1549                  &n_coords_dist, 1, PLE_MPI_LNUM, dist_rank,
1550                  PLE_MPI_TAG, this_locator->comm, &status);
1551 
1552     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1553 
1554     this_locator->distant_points_idx[i+1] = n_coords_dist;
1555 
1556   }
1557 
1558   /* Counts to index */
1559 
1560   this_locator->local_points_idx[0] = 0;
1561   this_locator->distant_points_idx[0] = 0;
1562 
1563   for (int i = 0; i < this_locator->n_intersects; i++) {
1564     this_locator->local_points_idx[i+1] += this_locator->local_points_idx[i];
1565     this_locator->distant_points_idx[i+1] += this_locator->distant_points_idx[i];
1566   }
1567 
1568   /* Third loop on possibly intersecting distant ranks */
1569   /*----------------------------------------------------*/
1570 
1571   PLE_REALLOC(this_locator->local_point_ids,
1572               this_locator->local_points_idx[this_locator->n_intersects],
1573               ple_lnum_t);
1574 
1575   PLE_REALLOC(this_locator->distant_point_location,
1576               this_locator->distant_points_idx[this_locator->n_intersects],
1577               ple_lnum_t);
1578 
1579   PLE_REALLOC(this_locator->distant_point_coords,
1580               this_locator->distant_points_idx[this_locator->n_intersects] * dim,
1581               ple_coord_t);
1582 
1583   for (int li = 0; li < this_locator->n_intersects; li++) {
1584 
1585     ple_coord_t *send_coords;
1586 
1587     int i = (this_locator->comm_order != NULL) ?
1588       this_locator->comm_order[li] : li;
1589     dist_rank = this_locator->intersect_rank[i];
1590 
1591     n_coords_loc =    this_locator->local_points_idx[i+1]
1592                     - this_locator->local_points_idx[i];
1593 
1594     n_coords_dist =   this_locator->distant_points_idx[i+1]
1595                     - this_locator->distant_points_idx[i];
1596 
1597     start_idx = this_locator->local_points_idx[i];
1598 
1599     PLE_MALLOC(send_coords, n_coords_loc * dim, ple_coord_t);
1600 
1601     for (j = 0; j < n_coords_loc; j++) {
1602 
1603       coord_idx = send_id[location_shift[i] + j];
1604       assert(coord_idx > -1);
1605       this_locator->local_point_ids[start_idx + j] = coord_idx;
1606       send_location[j] = location[coord_idx];
1607       if (point_list != NULL) {
1608         for (k = 0; k < dim; k++)
1609           send_coords[j*dim + k]
1610             = point_coords[dim*(point_list[coord_idx] - idb) + k];
1611       }
1612       else {
1613         for (k = 0; k < dim; k++)
1614           send_coords[j*dim + k]
1615             = point_coords[dim*coord_idx + k];
1616       }
1617     }
1618 
1619     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1620 
1621     MPI_Sendrecv(send_location, (int)n_coords_loc,
1622                  PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1623                  (this_locator->distant_point_location
1624                   + this_locator->distant_points_idx[i]), (int)n_coords_dist,
1625                  PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1626                  this_locator->comm, &status);
1627 
1628     MPI_Sendrecv(send_coords, (int)(n_coords_loc*dim),
1629                  PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1630                  (this_locator->distant_point_coords
1631                   + (this_locator->distant_points_idx[i]*dim)),
1632                  (int)(n_coords_dist*dim),
1633                  PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1634                  this_locator->comm, &status);
1635 
1636     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1637 
1638     PLE_FREE(send_coords);
1639 
1640   }
1641 
1642   PLE_FREE(location_count);
1643   PLE_FREE(location_shift);
1644 
1645   PLE_FREE(send_location);
1646   PLE_FREE(send_id);
1647 
1648   PLE_FREE(location);
1649 
1650   this_locator->location_wtime[1] += comm_timing[0];
1651   this_locator->location_cpu_time[1] += comm_timing[1];
1652 }
1653 
1654 #endif /* defined(PLE_HAVE_MPI) */
1655 
1656 /*----------------------------------------------------------------------------
1657  * Initialize location information from previous locator info,
1658  * in serial mode.
1659  *
1660  * parameters:
1661  *   this_locator      <-> pointer to locator structure
1662  *   n_points          <-- number of points to locate
1663  *   location          --> number of distant element containing or closest
1664  *                         to each point, or -1 (size: n_points)
1665  *----------------------------------------------------------------------------*/
1666 
1667 static void
_transfer_location_local(ple_locator_t * this_locator,ple_lnum_t n_points,ple_lnum_t location[])1668 _transfer_location_local(ple_locator_t  *this_locator,
1669                          ple_lnum_t      n_points,
1670                          ple_lnum_t      location[])
1671 {
1672   ple_lnum_t j;
1673 
1674   /* Initialize locations */
1675 
1676   for (j = 0; j < n_points; j++)
1677     location[j] = -1;
1678 
1679   /* Update with existing location info *
1680      exact location info is not necessary, so not determined;
1681      all that is necessary is to mark located points as such */
1682 
1683   if (this_locator->n_intersects == 1) {
1684 
1685     const ple_lnum_t _n_points =    this_locator->local_points_idx[1]
1686                                   - this_locator->local_points_idx[0];
1687 
1688     const ple_lnum_t idb = this_locator->point_id_base;
1689     const ple_lnum_t *_interior_list = this_locator->interior_list;
1690 
1691     const ple_lnum_t *_local_point_ids = this_locator->local_point_ids;
1692     const ple_lnum_t *dist_v_ptr = this_locator->distant_point_location;
1693 
1694     for (j = 0; j < _n_points; j++) {
1695       ple_lnum_t k = _interior_list[_local_point_ids[j] - idb];
1696       location[k] = dist_v_ptr[j];
1697     }
1698 
1699   }
1700 
1701   this_locator->n_intersects = 0;
1702   PLE_FREE(this_locator->intersect_rank);
1703   PLE_FREE(this_locator->comm_order);
1704   PLE_FREE(this_locator->local_points_idx);
1705   PLE_FREE(this_locator->distant_points_idx);
1706   PLE_FREE(this_locator->local_point_ids);
1707   PLE_FREE(this_locator->distant_point_location);
1708   PLE_FREE(this_locator->distant_point_coords);
1709 
1710   this_locator->n_interior = 0;
1711   this_locator->n_exterior = 0;
1712   PLE_FREE(this_locator->interior_list);
1713   PLE_FREE(this_locator->exterior_list);
1714 }
1715 
1716 /*----------------------------------------------------------------------------
1717  * Determine or update possibly intersecting ranks for unlocated elements,
1718  * in parallel.
1719  *
1720  * parameters:
1721  *   this_locator       <-- pointer to locator structure
1722  *   mesh               <-- pointer to mesh representation structure
1723  *   tolerance_base     <-- associated fixed tolerance
1724  *   tolerance_fraction <-- associated fraction of element bounding
1725  *                          boxes added to tolerance
1726  *   n_points           <-- number of points to locate
1727  *   point_list         <-- optional indirection array to point_coords
1728  *   point_coords       <-- coordinates of points to locate
1729  *                          (dimension: dim * n_points)
1730  *   location           <-> number of distant element containing or closest
1731  *                          to each point, or -1 (size: n_points)
1732  *   mesh_extents_f     <-- pointer to function computing mesh or mesh
1733  *                          subset or element extents
1734  *
1735  * returns:
1736  *   local rank intersection info
1737  *----------------------------------------------------------------------------*/
1738 
1739 static _rank_intersects_t
_intersects_local(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const ple_coord_t point_coords[],const ple_lnum_t location[],ple_mesh_extents_t * mesh_extents_f)1740 _intersects_local(ple_locator_t       *this_locator,
1741                   const void          *mesh,
1742                   float                tolerance_base,
1743                   float                tolerance_fraction,
1744                   ple_lnum_t           n_points,
1745                   const ple_lnum_t     point_list[],
1746                   const ple_coord_t    point_coords[],
1747                   const ple_lnum_t     location[],
1748                   ple_mesh_extents_t  *mesh_extents_f)
1749 {
1750   int i;
1751   int stride2;
1752   double extents[12];
1753 
1754   int j;
1755   int n_intersects;
1756 
1757   const int dim = this_locator->dim;
1758 
1759   _rank_intersects_t intersects;
1760 
1761   /* Update intersects */
1762 
1763   intersects.n = 0;
1764 
1765   mesh_extents_f(mesh,
1766                  1,
1767                  tolerance_fraction,
1768                  extents);
1769 
1770   _point_extents(dim,
1771                  this_locator->point_id_base,
1772                  n_points,
1773                  point_list,
1774                  point_coords,
1775                  location,
1776                  extents + 2*dim);
1777 
1778   for (i = 0; i < dim; i++) {
1779 
1780     if (extents[i] > -HUGE_VAL + tolerance_base)
1781       extents[i]         -= tolerance_base;
1782     else
1783       extents[i] = -HUGE_VAL;
1784     if (extents[i] < HUGE_VAL - tolerance_base)
1785       extents[i +   dim] += tolerance_base;
1786     else
1787       extents[i +   dim] = HUGE_VAL;
1788 
1789     if (extents[i + 2*dim] > -HUGE_VAL + tolerance_base)
1790       extents[i + 2*dim] -= tolerance_base;
1791     else
1792       extents[i + 2*dim] = -HUGE_VAL;
1793     if (extents[i + 3*dim] < HUGE_VAL - tolerance_base)
1794       extents[i + 3*dim] += tolerance_base;
1795     else
1796       extents[i + 3*dim] = HUGE_VAL;
1797 
1798   }
1799 
1800   /* Determine possible overlap */
1801 
1802   stride2 = dim * 2; /* Stride for one type of extent */
1803 
1804   n_intersects = 0;
1805 
1806   if (_intersect_extents(dim, extents, extents + (2*dim)) == true)
1807     n_intersects += 1;
1808 
1809   intersects.n = n_intersects;
1810   PLE_MALLOC(intersects.extents, intersects.n * stride2, double);
1811 
1812   /* Copy only element (and not point) extents */
1813 
1814   for (j = 0; j < stride2; j++)
1815     intersects.extents[j] = extents[j];
1816 
1817   return intersects;
1818 }
1819 
1820 /*----------------------------------------------------------------------------
1821  * Prepare locator for use with a given mesh representation and point set.
1822  *
1823  * parameters:
1824  *   this_locator       <-> pointer to locator structure
1825  *   mesh               <-- pointer to mesh representation structure
1826  *   tolerance_base     <-- associated fixed tolerance
1827  *   tolerance_fraction <-- associated fraction of element bounding
1828  *                          boxes added to tolerance
1829  *   n_points           <-- number of points to locate
1830  *   point_list         <-- optional indirection array to point_coords
1831  *   point_tag          <-- optional point tag (size: n_points)
1832  *   point_coords       <-- coordinates of points to locate
1833  *                          (dimension: dim * n_points)
1834  *   distance           --> optional distance from point to matching element:
1835  *                          < 0 if unlocated; 0 - 1 if inside and > 1 if
1836  *                          outside a volume element, or absolute distance
1837  *                          to a surface element (size: n_points)
1838  *   mesh_extents_f     <-- function computing mesh or mesh subset extents
1839  *   mesh_locate_f      <-- function locating the closest local elements
1840  *----------------------------------------------------------------------------*/
1841 
1842 static void
_locate_all_local(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],ple_lnum_t location[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)1843 _locate_all_local(ple_locator_t               *this_locator,
1844                   const void                  *mesh,
1845                   float                        tolerance_base,
1846                   float                        tolerance_fraction,
1847                   ple_lnum_t                   n_points,
1848                   const ple_lnum_t             point_list[],
1849                   const int                    point_tag[],
1850                   const ple_coord_t            point_coords[],
1851                   ple_lnum_t                   location[],
1852                   float                        distance[],
1853                   ple_mesh_extents_t          *mesh_extents_f,
1854                   ple_mesh_elements_locate_t  *mesh_locate_f)
1855 {
1856   int l;
1857   ple_lnum_t j, k;
1858   ple_lnum_t n_coords, n_interior, n_exterior, coord_idx;
1859   _rank_intersects_t intersects;
1860 
1861   const int dim = this_locator->dim;
1862   const int have_tags = this_locator->have_tags;
1863   const ple_lnum_t idb = this_locator->point_id_base;
1864 
1865   /* Update intersect for current point list */
1866 
1867   intersects = _intersects_local(this_locator,
1868                                  mesh,
1869                                  tolerance_base,
1870                                  tolerance_fraction,
1871                                  n_points,
1872                                  point_list,
1873                                  point_coords,
1874                                  location,
1875                                  mesh_extents_f);
1876 
1877   n_coords = 0;
1878 
1879   this_locator->n_intersects = intersects.n;
1880 
1881   /* Build partial buffer */
1882 
1883   if (intersects.n > 0) {
1884 
1885     ple_lnum_t *_location = location;
1886     float *_distance = distance;
1887 
1888     ple_lnum_t *id;
1889     int *tag;
1890     ple_coord_t *coords;
1891 
1892     PLE_MALLOC(coords, n_points * dim, ple_coord_t);
1893     PLE_MALLOC(id, n_points, ple_lnum_t);
1894 
1895     if (have_tags)
1896       PLE_MALLOC(tag, n_points, int);
1897     else
1898       tag = NULL;
1899 
1900     for (j = 0; j < n_points; j++) {
1901 
1902       if (location[j] > -1) {
1903         if (distance == NULL)
1904           continue;
1905         else if (distance[j] < 1)
1906           continue;
1907       }
1908 
1909       if (point_list != NULL)
1910         coord_idx = point_list[j] - idb;
1911       else
1912         coord_idx = j;
1913 
1914       if (_within_extents(dim,
1915                           &(point_coords[dim*coord_idx]),
1916                           intersects.extents) == true) {
1917 
1918         for (k = 0; k < dim; k++)
1919           coords[n_coords*dim + k]
1920             = point_coords[dim*coord_idx + k];
1921 
1922         id[n_coords] = j;
1923 
1924         if (have_tags)
1925           tag[n_coords] = point_tag[j];
1926 
1927         n_coords += 1;
1928       }
1929 
1930     }
1931 
1932     PLE_REALLOC(coords, n_coords * dim, ple_coord_t);
1933     PLE_REALLOC(id, n_coords, ple_lnum_t);
1934     if (have_tags)
1935       PLE_REALLOC(tag, n_coords, int);
1936 
1937     if (n_coords < n_points) {
1938       PLE_MALLOC(_location, n_coords, ple_lnum_t);
1939       for (j = 0; j < n_coords; j++)
1940         _location[j] = location[id[j]];
1941     }
1942     if (distance == NULL || n_coords < n_points) {
1943       PLE_MALLOC(_distance, n_coords, float);
1944       if (distance == NULL) {
1945         for (j = 0; j < n_coords; j++)
1946           _distance[j] = -1.0;
1947       }
1948       else {
1949         for (j = 0; j < n_coords; j++)
1950           _distance[j] = distance[id[j]];
1951       }
1952     }
1953 
1954     mesh_locate_f(mesh,
1955                   tolerance_base,
1956                   tolerance_fraction,
1957                   n_coords,
1958                   coords,
1959                   tag,
1960                   _location,
1961                   _distance);
1962 
1963     PLE_FREE(coords);
1964 
1965     if (n_coords < n_points) {
1966       for (j = 0; j < n_coords; j++) {
1967         if (_location[j] > -1) {
1968           k = id[j];
1969           if (distance != NULL) {
1970             if (distance[k] <= _distance[j])
1971               continue;
1972             else
1973               distance[k] = _distance[j];
1974           }
1975           location[k] = _location[j];
1976         }
1977       }
1978     }
1979 
1980     if (_location != location)
1981       PLE_FREE(_location);
1982 
1983     if (_distance != distance)
1984       PLE_FREE(_distance);
1985 
1986     PLE_FREE(tag);
1987     PLE_FREE(id);
1988 
1989   }
1990 
1991   PLE_FREE(intersects.extents);
1992 
1993   /* Reorganize location information */
1994   /*---------------------------------*/
1995 
1996   /* Now that location is done, the location[] array contains
1997      either -1 if a point was not located, or a local index;
1998      the distance[] array is not needed anymore now that all comparisons have
1999      been done */
2000 
2001   this_locator->n_interior = 0;
2002   this_locator->n_exterior = 0;
2003   for (j = 0; j < n_points; j++) {
2004     if (location[j] > -1)
2005       this_locator->n_interior += 1;
2006     else
2007       this_locator->n_exterior += 1;
2008   }
2009 
2010   PLE_MALLOC(this_locator->interior_list, this_locator->n_interior, ple_lnum_t);
2011   PLE_MALLOC(this_locator->exterior_list, this_locator->n_exterior, ple_lnum_t);
2012 
2013   /* Organize total number of "local" and "distant" points */
2014 
2015   PLE_REALLOC(this_locator->local_points_idx, 2, ple_lnum_t);
2016   PLE_REALLOC(this_locator->distant_points_idx, 2, ple_lnum_t);
2017 
2018   this_locator->local_points_idx[0] = 0;
2019   this_locator->local_points_idx[1] = this_locator->n_interior;
2020 
2021   this_locator->distant_points_idx[0] = 0;
2022   this_locator->distant_points_idx[1] = this_locator->n_interior;
2023 
2024   this_locator->local_point_ids = NULL; /* Not needed for single-process */
2025 
2026   PLE_REALLOC(this_locator->distant_point_location,
2027               this_locator->n_interior,
2028               ple_lnum_t);
2029   PLE_REALLOC(this_locator->distant_point_coords,
2030               this_locator->n_interior * dim,
2031               ple_coord_t);
2032 
2033   n_interior = 0;
2034   n_exterior = 0;
2035 
2036   for (j = 0; j < n_points; j++) {
2037 
2038     if (point_list != NULL)
2039       coord_idx = point_list[j] - idb;
2040     else
2041       coord_idx = j;
2042 
2043     if (location[j] > -1) {
2044       this_locator->distant_point_location[n_interior] = location[j];
2045       for (l = 0; l < dim; l++) {
2046         this_locator->distant_point_coords[n_interior*dim + l]
2047           = point_coords[coord_idx*dim + l];
2048       }
2049       this_locator->interior_list[n_interior] = j + idb;
2050       n_interior += 1;
2051     }
2052     else {
2053       this_locator->exterior_list[n_exterior] = j + idb;
2054       n_exterior += 1;
2055     }
2056 
2057   }
2058 
2059 }
2060 
2061 #if defined(PLE_HAVE_MPI)
2062 
2063 /*----------------------------------------------------------------------------
2064  * Distribute variable defined on distant points to processes owning
2065  * the original points (i.e. distant processes).
2066  *
2067  * The exchange is symmetric if both variables are defined, receive
2068  * only if distant_var is NULL, or send only if local_var is NULL.
2069  *
2070  * parameters:
2071  *   this_locator  <-- pointer to locator structure
2072  *   distant_var   <-> variable defined on distant points (ready to send)
2073  *   local_var     <-> variable defined on local points (received)
2074  *   local_list    <-- optional indirection list for local_var
2075  *   datatype      <-- variable type
2076  *   stride        <-- dimension (1 for scalar, 3 for interlaced vector)
2077  *   reverse       <-- if true, exchange is reversed
2078  *                     (receive values associated with distant points
2079  *                     from the processes owning the original points)
2080  *----------------------------------------------------------------------------*/
2081 
2082 static void
_exchange_point_var_distant(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,MPI_Datatype datatype,size_t stride,_Bool reverse)2083 _exchange_point_var_distant(ple_locator_t     *this_locator,
2084                             void              *distant_var,
2085                             void              *local_var,
2086                             const ple_lnum_t  *local_list,
2087                             MPI_Datatype       datatype,
2088                             size_t             stride,
2089                             _Bool              reverse)
2090 {
2091   int dist_v_count, loc_v_count, size;
2092   int dist_rank;
2093   int dist_v_flag, loc_v_flag;
2094   ple_lnum_t n_points_loc, n_points_loc_max, n_points_dist;
2095   size_t dist_v_idx;
2096   void *dist_v_ptr;
2097   void *loc_v_buf;
2098 
2099   double comm_timing[4] = {0., 0., 0., 0.};
2100 
2101   MPI_Aint lb, extent;
2102   MPI_Status status;
2103 
2104   /* Check extent of datatype */
2105 
2106 #if (MPI_VERSION >= 2)
2107   MPI_Type_get_extent(datatype, &lb, &extent);
2108 #else
2109   MPI_Type_extent(datatype, &extent);
2110 #endif
2111   MPI_Type_size(datatype, &size);
2112 
2113   if (extent != size)
2114     ple_error(__FILE__, __LINE__, 0,
2115               _("_exchange_point_var() is not implemented for use with\n"
2116                 "MPI datatypes associated with structures using padding\n"
2117                 "(for which size != extent)."));
2118 
2119   /* Initialization */
2120 
2121   n_points_loc_max = 0;
2122 
2123   for (int i = 0; i < this_locator->n_intersects; i++) {
2124     n_points_loc =    this_locator->local_points_idx[i+1]
2125                     - this_locator->local_points_idx[i];
2126     if (n_points_loc > n_points_loc_max)
2127       n_points_loc_max = n_points_loc;
2128   }
2129 
2130   PLE_MALLOC(loc_v_buf, n_points_loc_max*size*stride, char);
2131 
2132   /* Loop on MPI ranks */
2133   /*-------------------*/
2134 
2135   for (int li = 0; li < this_locator->n_intersects; li++) {
2136 
2137     int i = (this_locator->comm_order != NULL) ?
2138       this_locator->comm_order[li] : li;
2139 
2140     const ple_lnum_t *_local_point_ids
2141       = this_locator->local_point_ids + this_locator->local_points_idx[i];
2142 
2143     dist_rank = this_locator->intersect_rank[i];
2144 
2145     n_points_loc =    this_locator->local_points_idx[i+1]
2146                     - this_locator->local_points_idx[i];
2147 
2148     n_points_dist =   this_locator->distant_points_idx[i+1]
2149                     - this_locator->distant_points_idx[i];
2150 
2151     if (distant_var != NULL && n_points_dist > 0)
2152       dist_v_flag = 1;
2153     else
2154       dist_v_flag = 0;
2155 
2156     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2157 
2158     MPI_Sendrecv(&dist_v_flag, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2159                  &loc_v_flag, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2160                  this_locator->comm, &status);
2161 
2162     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2163 
2164     if (loc_v_flag == 1 && (local_var == NULL || n_points_loc == 0))
2165       ple_error(__FILE__, __LINE__, 0,
2166                 _("Incoherent arguments to different instances in "
2167                   "_exchange_point_var().\n"
2168                   "Send and receive operations do not match "
2169                   "(dist_rank = %d\n)\n"), dist_rank);
2170 
2171     dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2172     dist_v_count = n_points_dist * stride * dist_v_flag;
2173 
2174     if (loc_v_flag > 0)
2175       loc_v_count = n_points_loc*stride;
2176     else
2177       loc_v_count = 0;
2178 
2179     /* Exchange information */
2180 
2181     if (distant_var != NULL)
2182       dist_v_ptr = (void *)(((char *)distant_var) + dist_v_idx);
2183     else
2184       dist_v_ptr = NULL;
2185 
2186     if (reverse == false) {
2187 
2188       _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2189 
2190       MPI_Sendrecv(dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2191                    loc_v_buf, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2192                    this_locator->comm, &status);
2193 
2194       _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2195 
2196       if (loc_v_flag > 0) {
2197         if (local_list == NULL) {
2198           int k;
2199           size_t l;
2200           const size_t nbytes = stride*size;
2201           for (k = 0; k < n_points_loc; k++) {
2202             char *local_v_p = (char *)local_var + _local_point_ids[k]*nbytes;
2203             const char *loc_v_buf_p = (const char *)loc_v_buf + k*nbytes;
2204             for (l = 0; l < nbytes; l++)
2205               local_v_p[l] = loc_v_buf_p[l];
2206           }
2207         }
2208         else {
2209           int k;
2210           size_t l;
2211           const size_t nbytes = stride*size;
2212           const ple_lnum_t idb = this_locator->point_id_base;
2213           for (k = 0; k < n_points_loc; k++) {
2214             char *local_v_p =   (char *)local_var
2215                               + (local_list[_local_point_ids[k]] - idb)*nbytes;
2216             const char *loc_v_buf_p = (const char *)loc_v_buf + k*nbytes;
2217             for (l = 0; l < nbytes; l++)
2218               local_v_p[l] = loc_v_buf_p[l];
2219           }
2220         }
2221       }
2222 
2223     }
2224     else { /* if (reverse == true) */
2225 
2226       if (loc_v_flag > 0) {
2227         if (local_list == NULL) {
2228           int k;
2229           size_t l;
2230           const size_t nbytes = stride*size;
2231           for (k = 0; k < n_points_loc; k++) {
2232             const char *local_v_p
2233               = (const char *)local_var + _local_point_ids[k]*nbytes;
2234             char *loc_v_buf_p = (char *)loc_v_buf + k*nbytes;
2235             for (l = 0; l < nbytes; l++)
2236               loc_v_buf_p[l] = local_v_p[l];
2237           }
2238         }
2239         else {
2240           int k;
2241           size_t l;
2242           const size_t nbytes = stride*size;
2243           const ple_lnum_t idb = this_locator->point_id_base;
2244           for (k = 0; k < n_points_loc; k++) {
2245             const char *local_v_p
2246               = (const char *)local_var
2247                 + (local_list[_local_point_ids[k]] - idb)*nbytes;
2248             char *loc_v_buf_p = (char *)loc_v_buf + k*nbytes;
2249             for (l = 0; l < nbytes; l++)
2250               loc_v_buf_p[l] = local_v_p[l];
2251           }
2252         }
2253       }
2254 
2255       _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2256 
2257       MPI_Sendrecv(loc_v_buf, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2258                    dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2259                    this_locator->comm, &status);
2260 
2261       _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2262 
2263     }
2264 
2265   } /* End of loop on MPI ranks */
2266 
2267   PLE_FREE(loc_v_buf);
2268 
2269   this_locator->exchange_wtime[1] += comm_timing[0];
2270   this_locator->exchange_cpu_time[1] += comm_timing[1];
2271 }
2272 
2273 /*----------------------------------------------------------------------------
2274  * Distribute variable defined on distant points to processes owning
2275  * the original points (i.e. distant processes).
2276  *
2277  * The exchange is symmetric if both variables are defined, receive
2278  * only if distant_var is NULL, or send only if local_var is NULL.
2279  *
2280  * This variant of the function uses asynchronous MPI calls.
2281  *
2282  * parameters:
2283  *   this_locator  <-- pointer to locator structure
2284  *   distant_var   <-> variable defined on distant points (ready to send)
2285  *   local_var     <-> variable defined on local points (received)
2286  *   local_list    <-- optional indirection list for local_var
2287  *   datatype      <-- variable type
2288  *   stride        <-- dimension (1 for scalar, 3 for interlaced vector)
2289  *   reverse       <-- if true, exchange is reversed
2290  *                     (receive values associated with distant points
2291  *                     from the processes owning the original points)
2292  *----------------------------------------------------------------------------*/
2293 
2294 static void
_exchange_point_var_distant_asyn(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,MPI_Datatype datatype,size_t stride,_Bool reverse)2295 _exchange_point_var_distant_asyn(ple_locator_t     *this_locator,
2296                                  void              *distant_var,
2297                                  void              *local_var,
2298                                  const ple_lnum_t  *local_list,
2299                                  MPI_Datatype       datatype,
2300                                  size_t             stride,
2301                                  _Bool              reverse)
2302 {
2303   int dist_v_count, loc_v_count, size;
2304   int dist_rank;
2305   ple_lnum_t n_points_loc, n_points_loc_tot, n_points_dist;
2306   size_t dist_v_idx;
2307   unsigned char *dist_v_ptr, *loc_v_ptr;
2308 
2309   MPI_Aint lb, extent;
2310   void *loc_v_buf = NULL;
2311   int *dist_v_flag = NULL, *loc_v_flag = NULL;
2312   MPI_Status *status = NULL;
2313   MPI_Request *request = NULL;
2314 
2315   double comm_timing[4] = {0., 0., 0., 0.};
2316 
2317   /* Check extent of datatype */
2318 
2319 #if (MPI_VERSION >= 2)
2320   MPI_Type_get_extent(datatype, &lb, &extent);
2321 #else
2322   MPI_Type_extent(datatype, &extent);
2323 #endif
2324   MPI_Type_size(datatype, &size);
2325 
2326   if (extent != size)
2327     ple_error(__FILE__, __LINE__, 0,
2328               _("_exchange_point_var() is not implemented for use with\n"
2329                 "MPI datatypes associated with structures using padding\n"
2330                 "(for which size != extent)."));
2331 
2332   /* Initialization */
2333 
2334   n_points_loc_tot
2335     = this_locator->local_points_idx[this_locator->n_intersects];
2336 
2337   PLE_MALLOC(loc_v_flag, this_locator->n_intersects, int);
2338   PLE_MALLOC(dist_v_flag, this_locator->n_intersects, int);
2339   PLE_MALLOC(request, this_locator->n_intersects*2, MPI_Request);
2340   PLE_MALLOC(status, this_locator->n_intersects*2, MPI_Status);
2341 
2342   PLE_MALLOC(loc_v_buf, n_points_loc_tot*size*stride, char);
2343 
2344   /* First loop on distant ranks for argument checks */
2345   /*-------------------------------------------------*/
2346 
2347   _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2348 
2349   for (int i = 0; i < this_locator->n_intersects; i++) {
2350 
2351     dist_rank = this_locator->intersect_rank[i];
2352 
2353     n_points_dist =   this_locator->distant_points_idx[i+1]
2354                     - this_locator->distant_points_idx[i];
2355 
2356     if (distant_var != NULL && n_points_dist > 0)
2357       dist_v_flag[i] = 1;
2358     else
2359       dist_v_flag[i] = 0;
2360 
2361     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2362 
2363     MPI_Irecv(loc_v_flag + i, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2364               this_locator->comm, &request[i*2]);
2365     MPI_Isend(dist_v_flag + i, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2366               this_locator->comm, &request[i*2+1]);
2367   }
2368 
2369   MPI_Waitall(this_locator->n_intersects*2, request, status);
2370 
2371   _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2372 
2373   PLE_FREE(dist_v_flag);
2374 
2375   for (int i = 0; i < this_locator->n_intersects; i++) {
2376 
2377     dist_rank = this_locator->intersect_rank[i];
2378 
2379     n_points_loc =   this_locator->local_points_idx[i+1]
2380                    - this_locator->local_points_idx[i];
2381 
2382     if (loc_v_flag[i] == 1 && (local_var == NULL || n_points_loc == 0))
2383       ple_error(__FILE__, __LINE__, 0,
2384                 _("Incoherent arguments to different instances in "
2385                   "_exchange_point_var().\n"
2386                   "Send and receive operations do not match "
2387                   "(dist_rank = %d\n)\n"), dist_rank);
2388   }
2389 
2390   /* Loop on distant ranks for exchange of data in standard mode */
2391   /*-------------------------------------------------------------*/
2392 
2393   if (reverse == false) {
2394 
2395     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2396 
2397     loc_v_ptr = loc_v_buf;
2398 
2399     for (int i = 0; i < this_locator->n_intersects; i++) {
2400 
2401       dist_rank = this_locator->intersect_rank[i];
2402 
2403       n_points_loc =    this_locator->local_points_idx[i+1]
2404                       - this_locator->local_points_idx[i];
2405 
2406       n_points_dist =   this_locator->distant_points_idx[i+1]
2407                       - this_locator->distant_points_idx[i];
2408 
2409       dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2410 
2411       if (distant_var != NULL)
2412         dist_v_count = n_points_dist * stride;
2413       else
2414         dist_v_count = 0;
2415 
2416       if (loc_v_flag[i] > 0)
2417         loc_v_count = n_points_loc*stride;
2418       else
2419         loc_v_count = 0;
2420 
2421       /* Exchange information */
2422 
2423       if (distant_var != NULL)
2424         dist_v_ptr = ((unsigned char *)distant_var) + dist_v_idx;
2425       else
2426         dist_v_ptr = NULL;
2427 
2428       MPI_Irecv(loc_v_ptr, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2429                 this_locator->comm, &request[i*2]);
2430       MPI_Isend(dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2431                 this_locator->comm, &request[i*2+1]);
2432 
2433       loc_v_ptr += loc_v_count*size;
2434     }
2435 
2436     MPI_Waitall(this_locator->n_intersects*2, request, status);
2437 
2438     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2439   }
2440 
2441   /* Loop on distant ranks for preparation or retrieval of buffer data */
2442   /*-------------------------------------------------------------------*/
2443 
2444   loc_v_ptr = loc_v_buf;
2445 
2446   for (int i = 0; i < this_locator->n_intersects; i++) {
2447 
2448     const ple_lnum_t *_local_point_ids
2449       = this_locator->local_point_ids + this_locator->local_points_idx[i];
2450 
2451     dist_rank = this_locator->intersect_rank[i];
2452 
2453     n_points_loc =    this_locator->local_points_idx[i+1]
2454       - this_locator->local_points_idx[i];
2455 
2456     n_points_dist =   this_locator->distant_points_idx[i+1]
2457                     - this_locator->distant_points_idx[i];
2458 
2459     dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2460 
2461     if (distant_var != NULL)
2462       dist_v_count = n_points_dist * stride;
2463     else
2464       dist_v_count = 0;
2465 
2466     if (loc_v_flag[i] > 0)
2467       loc_v_count = n_points_loc*stride;
2468     else
2469       loc_v_count = 0;
2470 
2471     /* Exchange information */
2472 
2473     if (distant_var != NULL)
2474       dist_v_ptr = ((unsigned char *)distant_var) + dist_v_idx;
2475     else
2476       dist_v_ptr = NULL;
2477 
2478     dist_rank = this_locator->intersect_rank[i];
2479 
2480     n_points_loc =    this_locator->local_points_idx[i+1]
2481                     - this_locator->local_points_idx[i];
2482 
2483     n_points_dist =   this_locator->distant_points_idx[i+1]
2484                     - this_locator->distant_points_idx[i];
2485 
2486     if (reverse == false) {
2487 
2488       if (loc_v_flag[i] > 0) {
2489         if (local_list == NULL) {
2490           const size_t nbytes = stride*size;
2491           for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2492             char *local_v_p = (char *)local_var + _local_point_ids[k]*nbytes;
2493             const char *loc_v_buf_p = (const char *)loc_v_ptr + k*nbytes;
2494             for (size_t l = 0; l < nbytes; l++)
2495               local_v_p[l] = loc_v_buf_p[l];
2496           }
2497         }
2498         else {
2499           const size_t nbytes = stride*size;
2500           const ple_lnum_t idb = this_locator->point_id_base;
2501           for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2502             char *local_v_p =   (char *)local_var
2503                               + (local_list[_local_point_ids[k]] - idb)*nbytes;
2504             const char *loc_v_buf_p = (const char *)loc_v_ptr + k*nbytes;
2505             for (size_t l = 0; l < nbytes; l++)
2506               local_v_p[l] = loc_v_buf_p[l];
2507           }
2508         }
2509       }
2510 
2511     }
2512     else { /* if (reverse == true) */
2513 
2514       if (loc_v_flag[i] > 0) {
2515         if (local_list == NULL) {
2516           const size_t nbytes = stride*size;
2517           for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2518             const char *local_v_p
2519               = (const char *)local_var + _local_point_ids[k]*nbytes;
2520             char *loc_v_buf_p = (char *)loc_v_ptr + k*nbytes;
2521             for (size_t l = 0; l < nbytes; l++)
2522               loc_v_buf_p[l] = local_v_p[l];
2523           }
2524         }
2525         else {
2526           const size_t nbytes = stride*size;
2527           const ple_lnum_t idb = this_locator->point_id_base;
2528           for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2529             const char *local_v_p
2530               = (const char *)local_var
2531                 + (local_list[_local_point_ids[k]] - idb)*nbytes;
2532             char *loc_v_buf_p = (char *)loc_v_ptr + k*nbytes;
2533             for (size_t l = 0; l < nbytes; l++)
2534               loc_v_buf_p[l] = local_v_p[l];
2535           }
2536         }
2537       }
2538 
2539     }
2540 
2541     loc_v_ptr += loc_v_count*size;
2542 
2543   }
2544 
2545   /* Loop on distant ranks for exchange of data in reverse mode */
2546   /*------------------------------------------------------------*/
2547 
2548   if (reverse == true) {
2549 
2550     _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2551 
2552     loc_v_ptr = loc_v_buf;
2553 
2554     for (int i = 0; i < this_locator->n_intersects; i++) {
2555 
2556       dist_rank = this_locator->intersect_rank[i];
2557 
2558       n_points_loc =    this_locator->local_points_idx[i+1]
2559                       - this_locator->local_points_idx[i];
2560 
2561       n_points_dist =   this_locator->distant_points_idx[i+1]
2562                       - this_locator->distant_points_idx[i];
2563 
2564       dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2565 
2566       if (distant_var != NULL)
2567         dist_v_count = n_points_dist * stride;
2568       else
2569         dist_v_count = 0;
2570 
2571       if (loc_v_flag[i] > 0)
2572         loc_v_count = n_points_loc*stride;
2573       else
2574         loc_v_count = 0;
2575 
2576       /* Exchange information */
2577 
2578       if (distant_var != NULL)
2579         dist_v_ptr = ((unsigned char *)distant_var) + dist_v_idx;
2580       else
2581         dist_v_ptr = NULL;
2582 
2583       MPI_Irecv(dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2584                 this_locator->comm, &request[i*2]);
2585       MPI_Isend(loc_v_ptr, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2586                 this_locator->comm, &request[i*2+1]);
2587 
2588       loc_v_ptr += loc_v_count*size;
2589     }
2590 
2591     MPI_Waitall(this_locator->n_intersects*2, request, status);
2592 
2593     _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2594 
2595   }
2596 
2597   /* Free temporary arrays */
2598 
2599   PLE_FREE(loc_v_buf);
2600 
2601   PLE_FREE(loc_v_flag);
2602   PLE_FREE(request);
2603   PLE_FREE(status);
2604 
2605   this_locator->exchange_wtime[1] += comm_timing[0];
2606   this_locator->exchange_cpu_time[1] += comm_timing[1];
2607 }
2608 
2609 #endif /* defined(PLE_HAVE_MPI) */
2610 
2611 /*----------------------------------------------------------------------------
2612  * Distribute variable defined on "distant points" to the original ("local")
2613  * points.
2614  *
2615  * The exchange is symmetric if both variables are defined, receive
2616  * only if distant_var is NULL, or send only if local_var is NULL.
2617  *
2618  * parameters:
2619  *   this_locator  <-- pointer to locator structure
2620  *   distant_var   <-> variable defined on distant points (ready to send)
2621  *   local_var     <-> variable defined on local points (received)
2622  *   local_list    <-- optional indirection list for local_var
2623  *   type_size     <-- sizeof (float or double) variable type
2624  *   stride        <-- dimension (1 for scalar, 3 for interlaced vector)
2625  *   reverse       <-- if true, exchange is reversed
2626  *                     (receive values associated with distant points
2627  *                     from the processes owning the original points)
2628  *----------------------------------------------------------------------------*/
2629 
2630 static void
_exchange_point_var_local(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,size_t type_size,size_t stride,_Bool reverse)2631 _exchange_point_var_local(ple_locator_t     *this_locator,
2632                           void              *distant_var,
2633                           void              *local_var,
2634                           const ple_lnum_t  *local_list,
2635                           size_t             type_size,
2636                           size_t             stride,
2637                           _Bool              reverse)
2638 {
2639   ple_lnum_t i;
2640   size_t j;
2641   ple_lnum_t n_points_loc;
2642 
2643   const size_t nbytes = stride*type_size;
2644 
2645   /* Initialization */
2646 
2647   if (this_locator->n_interior == 0)
2648     return;
2649 
2650   n_points_loc =   this_locator->local_points_idx[1]
2651                  - this_locator->local_points_idx[0];
2652 
2653   assert(n_points_loc == (  this_locator->distant_points_idx[1]
2654                           - this_locator->distant_points_idx[0]));
2655 
2656   /* Exchange information */
2657 
2658   if (reverse == false) {
2659 
2660     if (local_list == NULL)
2661       memcpy(local_var, distant_var, n_points_loc*nbytes);
2662 
2663     else {
2664       const ple_lnum_t idb = this_locator->point_id_base;
2665       for (i = 0; i < n_points_loc; i++) {
2666         char *local_var_p = (char *)local_var + (local_list[i] - idb)*nbytes;
2667         const char *distant_var_p = (const char *)distant_var + i*nbytes;
2668         for (j = 0; j < nbytes; j++)
2669           local_var_p[j] = distant_var_p[j];
2670       }
2671     }
2672 
2673   }
2674   else { /* if (reverse == true) */
2675 
2676     if (local_list == NULL)
2677       memcpy(distant_var, local_var, n_points_loc*nbytes);
2678 
2679     else {
2680       const ple_lnum_t idb = this_locator->point_id_base;
2681       for (i = 0; i < n_points_loc; i++) {
2682         const char *local_var_p
2683           = (const char *)local_var + (local_list[i] - idb)*nbytes;
2684         char *distant_var_p = (char *)distant_var + i*nbytes;
2685         for (j = 0; j < nbytes; j++)
2686           distant_var_p[j] = local_var_p[j];
2687       }
2688     }
2689 
2690   }
2691 }
2692 
2693 /*----------------------------------------------------------------------------
2694  * Return timing information.
2695  *
2696  * When location on closest elements to force location of all points is
2697  * active, location times include a total value, followed by the value
2698  * associated with the location of closest elements stage.
2699  *
2700  * parameters:
2701  *   this_locator      <-- pointer to locator structure
2702  *   time_type         <-- 0 for total times, 1 for communication times
2703  *   location_wtime    --> Location Wall-clock time (or NULL)
2704  *   location_cpu_time --> Location CPU time (or NULL)
2705  *   exchange_wtime    --> Variable exchange Wall-clock time (or NULL)
2706  *   exchange_cpu_time --> Variable exchange CPU time (or NULL)
2707  *----------------------------------------------------------------------------*/
2708 
2709 static void
_get_times(const ple_locator_t * this_locator,int time_type,double * location_wtime,double * location_cpu_time,double * exchange_wtime,double * exchange_cpu_time)2710 _get_times(const ple_locator_t  *this_locator,
2711            int                   time_type,
2712            double               *location_wtime,
2713            double               *location_cpu_time,
2714            double               *exchange_wtime,
2715            double               *exchange_cpu_time)
2716 {
2717   const ple_locator_t  *_locator = this_locator;
2718 
2719   if (this_locator != NULL) {
2720 
2721     if (location_wtime != NULL) {
2722       *location_wtime = _locator->location_wtime[time_type];
2723     }
2724     if (location_cpu_time != NULL) {
2725       *location_cpu_time = _locator->location_cpu_time[time_type];
2726     }
2727     if (exchange_wtime != NULL)
2728       *exchange_wtime = _locator->exchange_wtime[time_type];
2729     if (exchange_cpu_time != NULL)
2730       *exchange_cpu_time = _locator->exchange_cpu_time[time_type];
2731 
2732   }
2733   else {
2734 
2735     if (location_wtime != NULL) {
2736       location_wtime[0] = 0.;
2737       location_wtime[1] = 0.;
2738     }
2739     if (location_cpu_time != NULL) {
2740       location_cpu_time[0] = 0.;
2741       location_cpu_time[1] = 0.;
2742     }
2743     if (exchange_wtime != NULL)
2744       *exchange_wtime = 0.;
2745     if (exchange_cpu_time != NULL)
2746       *exchange_cpu_time = 0.;
2747   }
2748 }
2749 
2750 /*============================================================================
2751  * Public function definitions
2752  *============================================================================*/
2753 
2754 /*----------------------------------------------------------------------------*/
2755 /*!
2756  * \brief Creation of a locator structure.
2757  *
2758  * Note that depending on the choice of ranks of the associated communicator,
2759  * distant ranks may in fact be truly distant or not. If n_ranks = 1 and
2760  * start_rank is equal to the current rank in the communicator, the locator
2761  * will work only locally.
2762  *
2763  * \param[in] comm       associated MPI communicator
2764  * \param[in] n_ranks    number of MPI ranks associated with distant location
2765  * \param[in] start_rank first MPI rank associated with distant location
2766  *
2767  * \return pointer to locator
2768  */
2769 /*----------------------------------------------------------------------------*/
2770 
2771 #if defined(PLE_HAVE_MPI)
2772 ple_locator_t *
ple_locator_create(MPI_Comm comm,int n_ranks,int start_rank)2773 ple_locator_create(MPI_Comm  comm,
2774                    int       n_ranks,
2775                    int       start_rank)
2776 #else
2777 ple_locator_t *
2778 ple_locator_create(void)
2779 #endif
2780 {
2781   int  i;
2782   ple_locator_t  *this_locator;
2783 
2784   PLE_MALLOC(this_locator, 1, ple_locator_t);
2785 
2786   this_locator->dim = 0;
2787   this_locator->have_tags = 0;
2788 
2789 #if defined(PLE_HAVE_MPI)
2790   this_locator->comm = comm;
2791   this_locator->n_ranks = n_ranks;
2792   this_locator->start_rank = start_rank;
2793 #else
2794   this_locator->n_ranks = 1;
2795   this_locator->start_rank = 0;
2796 #endif
2797 
2798   this_locator->locate_algorithm = _LOCATE_BB_SENDRECV;
2799   this_locator->exchange_algorithm = _EXCHANGE_SENDRECV;
2800 
2801   this_locator->point_id_base = 0;
2802 
2803   this_locator->n_intersects = 0;
2804   this_locator->intersect_rank = NULL;
2805   this_locator->comm_order = NULL;
2806 
2807   this_locator->local_points_idx = NULL;
2808   this_locator->distant_points_idx = NULL;
2809 
2810   this_locator->local_point_ids = NULL;
2811 
2812   this_locator->distant_point_location = NULL;
2813   this_locator->distant_point_coords = NULL;
2814 
2815   this_locator->n_interior = 0;
2816   this_locator->interior_list = NULL;
2817 
2818   this_locator->n_exterior = 0;
2819   this_locator->exterior_list = NULL;
2820 
2821   for (i = 0; i < 2; i++) {
2822     this_locator->location_wtime[i] = 0.;
2823     this_locator->location_cpu_time[i] = 0.;
2824   }
2825 
2826   for (i = 0; i < 2; i++) {
2827     this_locator->exchange_wtime[i] = 0.;
2828     this_locator->exchange_cpu_time[i] = 0.;
2829   }
2830 
2831   return this_locator;
2832 }
2833 
2834 /*----------------------------------------------------------------------------*/
2835 /*!
2836  * \brief Destruction of a locator structure.
2837  *
2838  * \param[in, out] this_locator locator to destroy
2839  *
2840  * \return NULL pointer
2841  */
2842 /*----------------------------------------------------------------------------*/
2843 
2844 ple_locator_t *
ple_locator_destroy(ple_locator_t * this_locator)2845 ple_locator_destroy(ple_locator_t  *this_locator)
2846 {
2847   if (this_locator != NULL) {
2848 
2849     PLE_FREE(this_locator->local_points_idx);
2850     PLE_FREE(this_locator->distant_points_idx);
2851 
2852     if (this_locator->local_point_ids != NULL)
2853       PLE_FREE(this_locator->local_point_ids);
2854 
2855     PLE_FREE(this_locator->distant_point_location);
2856     PLE_FREE(this_locator->distant_point_coords);
2857 
2858     PLE_FREE(this_locator->intersect_rank);
2859     PLE_FREE(this_locator->comm_order);
2860 
2861     PLE_FREE(this_locator->interior_list);
2862     PLE_FREE(this_locator->exterior_list);
2863 
2864     PLE_FREE(this_locator);
2865   }
2866 
2867   return NULL;
2868 }
2869 
2870 /*----------------------------------------------------------------------------*/
2871 /*!
2872  * \brief Prepare locator for use with a given mesh representation.
2873  *
2874  * \param[in, out] this_locator        pointer to locator structure
2875  * \param[in]      mesh                pointer to mesh representation structure
2876  * \param[in]      options             options array (size
2877  *                                     PLE_LOCATOR_N_OPTIONS), or NULL
2878  * \param[in]      tolerance_base      associated fixed tolerance
2879  * \param[in]      tolerance_fraction  associated fraction of element bounding
2880  *                                     boxes added to tolerance
2881  * \param[in]      dim                 spatial dimension of mesh and points to
2882  *                                     locate
2883  * \param[in]      n_points            number of points to locate
2884  * \param[in]      point_list          optional indirection array to point_coords
2885  * \param[in]      point_tag           optional point tag (size: n_points)
2886  * \param[in]      point_coords        coordinates of points to locate
2887  *                                     (dimension: dim * n_points)
2888  * \param[out]     distance            optional distance from point to matching
2889  *                                     element: < 0 if unlocated; 0 - 1 if inside
2890  *                                     and > 1 if outside a volume element, or
2891  *                                     absolute distance to a surface element
2892  *                                     (size: n_points)
2893  * \param[in]      mesh_extents_f      pointer to function computing mesh or mesh
2894  *                                     subset or element extents
2895  * \param[in]      mesh_locate_f       pointer to function wich updates the
2896  *                                     location[] and distance[] arrays
2897  *                                     associated with a set of points for
2898  *                                     points that are in an element of this
2899  *                                     mesh, or closer to one than to previously
2900  *                                     encountered elements.
2901  */
2902 /*----------------------------------------------------------------------------*/
2903 
2904 void
ple_locator_set_mesh(ple_locator_t * this_locator,const void * mesh,const int * options,float tolerance_base,float tolerance_fraction,int dim,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)2905 ple_locator_set_mesh(ple_locator_t               *this_locator,
2906                      const void                  *mesh,
2907                      const int                   *options,
2908                      float                        tolerance_base,
2909                      float                        tolerance_fraction,
2910                      int                          dim,
2911                      ple_lnum_t                   n_points,
2912                      const ple_lnum_t             point_list[],
2913                      const int                    point_tag[],
2914                      const ple_coord_t            point_coords[],
2915                      float                        distance[],
2916                      ple_mesh_extents_t          *mesh_extents_f,
2917                      ple_mesh_elements_locate_t  *mesh_locate_f)
2918 {
2919   double w_start, w_end, cpu_start, cpu_end;
2920 
2921   /* Initialize timing */
2922 
2923   w_start = ple_timer_wtime();
2924   cpu_start = ple_timer_cpu_time();
2925 
2926   /* Other initializations */
2927 
2928   this_locator->dim = dim;
2929 
2930   if (distance != NULL) {
2931     for (ple_lnum_t i = 0; i < n_points; i++)
2932       distance[i] = -1;
2933   }
2934 
2935   /* Release information if previously present */
2936 
2937   _clear_location_info(this_locator);
2938 
2939   ple_locator_extend_search(this_locator,
2940                             mesh,
2941                             options,
2942                             tolerance_base,
2943                             tolerance_fraction,
2944                             n_points,
2945                             point_list,
2946                             point_tag,
2947                             point_coords,
2948                             distance,
2949                             mesh_extents_f,
2950                             mesh_locate_f);
2951 
2952   /* Finalize timing */
2953 
2954   w_end = ple_timer_wtime();
2955   cpu_end = ple_timer_cpu_time();
2956 
2957   this_locator->location_wtime[0] += (w_end - w_start);
2958   this_locator->location_cpu_time[0] += (cpu_end - cpu_start);
2959 }
2960 
2961 /*----------------------------------------------------------------------------*/
2962 /*!
2963  * \brief Extend search for a locator for which set_mesh has already been
2964  *        called.
2965  *
2966  * \param[in, out] this_locator        pointer to locator structure
2967  * \param[in]      mesh                pointer to mesh representation structure
2968  * \param[in]      options             options array (size
2969  *                                     PLE_LOCATOR_N_OPTIONS), or NULL
2970  * \param[in]      tolerance_base      associated fixed tolerance
2971  * \param[in]      tolerance_fraction  associated fraction of element bounding
2972  *                                     boxes added to tolerance
2973  * \param[in]      n_points            number of points to locate
2974  * \param[in]      point_list          optional indirection array to point_coords
2975  * \param[in]      point_tag           optional point tag (size: n_points)
2976  * \param[in]      point_coords        coordinates of points to locate
2977  *                                     (dimension: dim * n_points)
2978  * \param[out]     distance            optional distance from point to matching
2979  *                                     element: < 0 if unlocated; 0 - 1 if inside
2980  *                                     and > 1 if outside a volume element, or
2981  *                                     absolute distance to a surface element
2982  *                                     (size: n_points)
2983  * \param[in]      mesh_extents_f      pointer to function computing mesh or mesh
2984  *                                     subset or element extents
2985  * \param[in]      mesh_locate_f       pointer to function wich updates the
2986  *                                     location[] and distance[] arrays
2987  *                                     associated with a set of points for
2988  *                                     points that are in an element of this
2989  *                                     mesh, or closer to one than to previously
2990  *                                     encountered elements.
2991  */
2992 /*----------------------------------------------------------------------------*/
2993 
2994 void
ple_locator_extend_search(ple_locator_t * this_locator,const void * mesh,const int * options,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)2995 ple_locator_extend_search(ple_locator_t               *this_locator,
2996                           const void                  *mesh,
2997                           const int                   *options,
2998                           float                        tolerance_base,
2999                           float                        tolerance_fraction,
3000                           ple_lnum_t                   n_points,
3001                           const ple_lnum_t             point_list[],
3002                           const int                    point_tag[],
3003                           const ple_coord_t            point_coords[],
3004                           float                        distance[],
3005                           ple_mesh_extents_t          *mesh_extents_f,
3006                           ple_mesh_elements_locate_t  *mesh_locate_f)
3007 {
3008   int i;
3009   double w_start, w_end, cpu_start, cpu_end;
3010   ple_lnum_t  *location;
3011 
3012   double comm_timing[4] = {0., 0., 0., 0.};
3013   int mpi_flag = 0;
3014 
3015   const int dim = this_locator->dim;
3016 
3017   /* Initialize timing */
3018 
3019   w_start = ple_timer_wtime();
3020   cpu_start = ple_timer_cpu_time();
3021 
3022   if (options != NULL)
3023     this_locator->point_id_base = options[PLE_LOCATOR_NUMBERING];
3024   else
3025     this_locator->point_id_base = 0;
3026 
3027   const int idb = this_locator->point_id_base;
3028 
3029   this_locator->have_tags = 0;
3030 
3031   /* Prepare locator (MPI version) */
3032   /*-------------------------------*/
3033 
3034 #if defined(PLE_HAVE_MPI)
3035 
3036   MPI_Initialized(&mpi_flag);
3037 
3038   if (mpi_flag && this_locator->comm == MPI_COMM_NULL)
3039     mpi_flag = 0;
3040 
3041   if (mpi_flag) {
3042 
3043     /* Flag values
3044        0: mesh dimension
3045        1: space dimension
3046        2: minimum algorithm version
3047        3: maximum algorithm version
3048        4: preferred algorithm version
3049        5: have point tags */
3050 
3051     int globflag[6];
3052     int locflag[6] = {-1,
3053                       -1,
3054                       1, /* equivalent to _LOCATE_BB_SENDRECV */
3055                       -_LOCATE_BB_SENDRECV_ORDERED,
3056                       _LOCATE_BB_SENDRECV_ORDERED,
3057                       0};
3058     ple_lnum_t  *location_rank_id;
3059 
3060     /* Check that at least one of the local or distant nodal meshes
3061        is non-NULL, and at least one of the local or distant
3062        point sets is non null */
3063 
3064     if (mesh != NULL)
3065       locflag[0] = dim;
3066 
3067     if (n_points > 0)
3068       locflag[1] = dim;
3069 
3070     if (n_points > 0 && point_tag != NULL)
3071       locflag[5] = 1;
3072 
3073     _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
3074 
3075     MPI_Allreduce(locflag, globflag, 6, MPI_INT, MPI_MAX,
3076                   this_locator->comm);
3077 
3078     _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
3079 
3080     if (globflag[0] < 0 || globflag[1] < 0)
3081       return;
3082     else if (mesh != NULL && globflag[1] != dim)
3083       ple_error(__FILE__, __LINE__, 0,
3084                 _("Locator trying to use distant space dimension %d\n"
3085                   "with local space dimension %d\n"),
3086                 globflag[1], dim);
3087     else if (mesh == NULL && globflag[0] != dim)
3088       ple_error(__FILE__, __LINE__, 0,
3089                 _("Locator trying to use local space dimension %d\n"
3090                   "with distant space dimension %d\n"),
3091                 dim, globflag[0]);
3092 
3093     /* Check algorithm versions and supported features */
3094 
3095     globflag[3] = -globflag[3];
3096 
3097     /* Compatibility with older versions */
3098     for (i = 2; i < 5; i++) {
3099       if (globflag[i] == 1)
3100         globflag[i] = _LOCATE_BB_SENDRECV;
3101     }
3102 
3103     if (globflag[2] > globflag[3])
3104       ple_error(__FILE__, __LINE__, 0,
3105                 _("Incompatible locator algorithm ranges:\n"
3106                   "  global minimum algorithm id %d\n"
3107                   "  global maximum algorithm id %d\n"
3108                   "PLE library versions or builds are incompatible."),
3109                 globflag[2], globflag[3]);
3110 
3111     if (globflag[4] < globflag[2])
3112       globflag[4] = globflag[2];
3113     if (globflag[4] > globflag[3])
3114       globflag[4] = globflag[3];
3115 
3116     this_locator->locate_algorithm = globflag[4];
3117 
3118     if (globflag[5] > 0)
3119       this_locator->have_tags = 1;
3120 
3121     /* Free temporary memory */
3122 
3123     PLE_MALLOC(location, n_points, ple_lnum_t);
3124     PLE_MALLOC(location_rank_id, n_points, ple_lnum_t);
3125 
3126     _transfer_location_distant(this_locator,
3127                                n_points,
3128                                location,
3129                                location_rank_id);
3130 
3131     _locate_all_distant(this_locator,
3132                         mesh,
3133                         tolerance_base,
3134                         tolerance_fraction,
3135                         n_points,
3136                         point_list,
3137                         point_tag,
3138                         point_coords,
3139                         location,
3140                         location_rank_id,
3141                         distance,
3142                         mesh_extents_f,
3143                         mesh_locate_f);
3144 
3145     PLE_FREE(location_rank_id);
3146   }
3147 
3148 #endif
3149 
3150   /* Prepare locator (local version) */
3151   /*---------------------------------*/
3152 
3153   if (!mpi_flag) {
3154 
3155     if (mesh == NULL || n_points == 0)
3156       return;
3157 
3158     if (point_tag != NULL)
3159       this_locator->have_tags = 1;
3160 
3161     PLE_MALLOC(location, n_points, ple_lnum_t);
3162 
3163     _transfer_location_local(this_locator,
3164                              n_points,
3165                              location);
3166 
3167     _locate_all_local(this_locator,
3168                       mesh,
3169                       tolerance_base,
3170                       tolerance_fraction,
3171                       n_points,
3172                       point_list,
3173                       point_tag,
3174                       point_coords,
3175                       location,
3176                       distance,
3177                       mesh_extents_f,
3178                       mesh_locate_f);
3179 
3180     PLE_FREE(location);
3181 
3182   }
3183 
3184   /* Update local_point_ids values */
3185   /*-------------------------------*/
3186 
3187   if (   this_locator->n_interior > 0
3188       && this_locator->local_point_ids != NULL) {
3189 
3190     ple_lnum_t  *reduced_index;
3191 
3192     PLE_MALLOC(reduced_index, n_points, ple_lnum_t);
3193 
3194     for (i = 0; i < n_points; i++)
3195       reduced_index[i] = -1;
3196 
3197     assert(  this_locator->local_points_idx[this_locator->n_intersects]
3198            == this_locator->n_interior);
3199 
3200     for (i = 0; i < this_locator->n_interior; i++)
3201       reduced_index[this_locator->interior_list[i] - idb] = i;
3202 
3203     /* Update this_locator->local_point_ids[] so that it refers
3204        to an index in a dense [0, this_locator->n_interior] subset
3205        of the local points */
3206 
3207     for (i = 0; i < this_locator->n_interior; i++)
3208       this_locator->local_point_ids[i]
3209         = reduced_index[this_locator->local_point_ids[i]];
3210 
3211     for (i = 0; i < this_locator->n_interior; i++)
3212       assert(this_locator->local_point_ids[i] > -1);
3213 
3214     PLE_FREE(reduced_index);
3215 
3216   }
3217 
3218   /* If an initial point list was given, update
3219      this_locator->interior_list and this_locator->exterior_list
3220      so that they refer to the same point set as that initial
3221      list (and not to an index within the selected point set) */
3222 
3223   if (point_list != NULL) {
3224 
3225     for (i = 0; i < this_locator->n_interior; i++)
3226       this_locator->interior_list[i]
3227         = point_list[this_locator->interior_list[i] - idb];
3228 
3229     for (i = 0; i < this_locator->n_exterior; i++)
3230       this_locator->exterior_list[i]
3231         = point_list[this_locator->exterior_list[i] - idb];
3232 
3233   }
3234 
3235   /* Finalize timing */
3236 
3237   w_end = ple_timer_wtime();
3238   cpu_end = ple_timer_cpu_time();
3239 
3240   this_locator->location_wtime[0] += (w_end - w_start);
3241   this_locator->location_cpu_time[0] += (cpu_end - cpu_start);
3242 
3243   this_locator->location_wtime[1] += comm_timing[0];
3244   this_locator->location_cpu_time[1] += comm_timing[1];
3245 }
3246 
3247 /*----------------------------------------------------------------------------*/
3248 /*!
3249  * \brief Shift location ids for located points after locator initialization.
3250  *
3251  * This is useful mainly to switch between 0-based to 1-based numberings.
3252  *
3253  * \param[in, out] this_locator    pointer to locator structure
3254  * \param[in]      location_shift  shift value
3255  */
3256 /*----------------------------------------------------------------------------*/
3257 
3258 void
ple_locator_shift_locations(ple_locator_t * this_locator,ple_lnum_t location_shift)3259 ple_locator_shift_locations(ple_locator_t  *this_locator,
3260                             ple_lnum_t      location_shift)
3261 {
3262   int n_intersects = this_locator->n_intersects;
3263   if (n_intersects == 0)
3264     return;
3265 
3266   const ple_lnum_t n_points = this_locator->distant_points_idx[n_intersects];
3267 
3268   for (ple_lnum_t i = 0; i < n_points; i++) {
3269     if (this_locator->distant_point_location[i] > -1)
3270       this_locator->distant_point_location[i] += location_shift;
3271   }
3272 }
3273 
3274 /*----------------------------------------------------------------------------*/
3275 /*!
3276  * \brief Return number of distant points after locator initialization.
3277  *
3278  * \param[in] this_locator pointer to locator structure
3279  *
3280  * \return number of distant points.
3281  */
3282 /*----------------------------------------------------------------------------*/
3283 
3284 ple_lnum_t
ple_locator_get_n_dist_points(const ple_locator_t * this_locator)3285 ple_locator_get_n_dist_points(const ple_locator_t  *this_locator)
3286 {
3287   ple_lnum_t retval = 0;
3288 
3289   if (this_locator != NULL) {
3290     if (this_locator->n_intersects != 0)
3291       retval = this_locator->distant_points_idx[this_locator->n_intersects];
3292   }
3293 
3294   return retval;
3295 }
3296 
3297 /*----------------------------------------------------------------------------*/
3298 /*!
3299  * \brief Return an array of local element numbers containing (or nearest to)
3300  *  each distant point after locator initialization.
3301  *
3302  * \param[in] this_locator pointer to locator structure
3303  *
3304  * \return local element numbers associated with distant points.
3305  */
3306 /*----------------------------------------------------------------------------*/
3307 
3308 const ple_lnum_t *
ple_locator_get_dist_locations(const ple_locator_t * this_locator)3309 ple_locator_get_dist_locations(const ple_locator_t  *this_locator)
3310 {
3311   const ple_lnum_t * retval = NULL;
3312 
3313   if (this_locator != NULL) {
3314     if (this_locator->n_ranks != 0)
3315       retval = this_locator->distant_point_location;
3316   }
3317 
3318   return retval;
3319 }
3320 
3321 /*----------------------------------------------------------------------------*/
3322 /*!
3323  * \brief Return an array of coordinates of each distant point after
3324  * locator initialization.
3325  *
3326  * \param[in] this_locator pointer to locator structure
3327  *
3328  * \return coordinate array associated with distant points (interlaced).
3329  */
3330 /*----------------------------------------------------------------------------*/
3331 
3332 const ple_coord_t *
ple_locator_get_dist_coords(const ple_locator_t * this_locator)3333 ple_locator_get_dist_coords(const ple_locator_t  *this_locator)
3334 {
3335   const ple_coord_t * retval = NULL;
3336 
3337   if (this_locator != NULL) {
3338     if (this_locator->n_intersects != 0)
3339       retval = this_locator->distant_point_coords;
3340   }
3341 
3342   return retval;
3343 }
3344 
3345 /*----------------------------------------------------------------------------*/
3346 /*!
3347  * \brief Return number of points located after locator initialization.
3348  *
3349  * \param[in] this_locator pointer to locator structure
3350  *
3351  * \return number of points located.
3352  */
3353 /*----------------------------------------------------------------------------*/
3354 
3355 ple_lnum_t
ple_locator_get_n_interior(const ple_locator_t * this_locator)3356 ple_locator_get_n_interior(const ple_locator_t  *this_locator)
3357 {
3358   ple_lnum_t retval = 0;
3359 
3360   if (this_locator != NULL)
3361     retval = this_locator->n_interior;
3362 
3363   return retval;
3364 }
3365 
3366 /*----------------------------------------------------------------------------*/
3367 /*!
3368  * \brief Return list of points located after locator initialization.
3369  * This list defines a subset of the point set used at initialization.
3370  *
3371  * \param[in] this_locator pointer to locator structure
3372  *
3373  * \return list of points located.
3374  */
3375 /*----------------------------------------------------------------------------*/
3376 
3377 const ple_lnum_t *
ple_locator_get_interior_list(const ple_locator_t * this_locator)3378 ple_locator_get_interior_list(const ple_locator_t  *this_locator)
3379 {
3380   return this_locator->interior_list;
3381 }
3382 
3383 /*----------------------------------------------------------------------------*/
3384 /*!
3385  * \brief Return number of points not located after locator initialization.
3386  *
3387  * \param[in] this_locator pointer to locator structure
3388  *
3389  * \return number of points not located.
3390  */
3391 /*----------------------------------------------------------------------------*/
3392 
3393 ple_lnum_t
ple_locator_get_n_exterior(const ple_locator_t * this_locator)3394 ple_locator_get_n_exterior(const ple_locator_t  *this_locator)
3395 {
3396   return this_locator->n_exterior;
3397 }
3398 
3399 /*----------------------------------------------------------------------------*/
3400 /*!
3401  * \brief Return list of points not located after locator initialization.
3402  * This list defines a subset of the point set used at initialization.
3403  *
3404  * \param[in] this_locator pointer to locator structure
3405  *
3406  * \return list of points not located.
3407  */
3408 /*----------------------------------------------------------------------------*/
3409 
3410 const ple_lnum_t *
ple_locator_get_exterior_list(const ple_locator_t * this_locator)3411 ple_locator_get_exterior_list(const ple_locator_t  *this_locator)
3412 {
3413   return this_locator->exterior_list;
3414 }
3415 
3416 /*----------------------------------------------------------------------------*/
3417 /*!
3418  * \brief Discard list of points not located after locator initialization.
3419  * This list defines a subset of the point set used at initialization.
3420  *
3421  * \param[in] this_locator pointer to locator structure
3422  */
3423 /*----------------------------------------------------------------------------*/
3424 
3425 void
ple_locator_discard_exterior(ple_locator_t * this_locator)3426 ple_locator_discard_exterior(ple_locator_t  *this_locator)
3427 {
3428   this_locator->n_exterior = 0;
3429   PLE_FREE(this_locator->exterior_list);
3430 }
3431 
3432 /*----------------------------------------------------------------------------*/
3433 /*!
3434  * \brief Distribute variable defined on distant points to processes owning
3435  * the original points (i.e. distant processes).
3436  *
3437  * The exchange is symmetric if both variables are defined, receive
3438  * only if distant_var is NULL, or send only if local_var is NULL.
3439  *
3440  * The caller should have defined the values of distant_var[] for the
3441  * distant points, whose coordinates are given by
3442  * ple_locator_get_dist_coords(), and which are located in the elements
3443  * whose numbers are given by ple_locator_get_dist_locations().
3444  *
3445  * The local_var[] is defined at the located points (those whose
3446  * numbers are returned by ple_locator_get_interior_list().
3447  *
3448  * If the optional local_list indirection is used, it is assumed to use
3449  * the same base numbering as that defined by the options for the previous
3450  * call to ple_locator_set_mesh() or ple_locator_extend_search().
3451  *
3452  * \param[in]      this_locator pointer to locator structure
3453  * \param[in, out] distant_var  variable defined on distant points
3454  *                              (ready to send); size: n_dist_points*stride
3455  * \param[in, out] local_var    variable defined on located local points
3456  *                              (received); size: n_interior*stride
3457  * \param[in]      local_list   optional indirection list for local_var
3458  * \param[in]      type_size    sizeof (float or double) variable type
3459  * \param[in]      stride       dimension (1 for scalar,
3460  *                              3 for interleaved vector)
3461  * \param[in]      reverse      if nonzero, exchange is reversed
3462  *                              (receive values associated with distant points
3463  *                              from the processes owning the original points)
3464  */
3465 /*----------------------------------------------------------------------------*/
3466 
3467 void
ple_locator_exchange_point_var(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,size_t type_size,size_t stride,int reverse)3468 ple_locator_exchange_point_var(ple_locator_t     *this_locator,
3469                                void              *distant_var,
3470                                void              *local_var,
3471                                const ple_lnum_t  *local_list,
3472                                size_t             type_size,
3473                                size_t             stride,
3474                                int                reverse)
3475 {
3476   double w_start, w_end, cpu_start, cpu_end;
3477 
3478   int mpi_flag = 0;
3479   _Bool _reverse = reverse;
3480 
3481   /* Initialize timing */
3482 
3483   w_start = ple_timer_wtime();
3484   cpu_start = ple_timer_cpu_time();
3485 
3486 #if defined(PLE_HAVE_MPI)
3487 
3488   MPI_Initialized(&mpi_flag);
3489 
3490   if (mpi_flag && this_locator->comm == MPI_COMM_NULL)
3491     mpi_flag = 0;
3492 
3493   if (mpi_flag) {
3494 
3495     MPI_Datatype datatype = MPI_DATATYPE_NULL;
3496 
3497     if (type_size == sizeof(double))
3498       datatype = MPI_DOUBLE;
3499     else if (type_size == sizeof(float))
3500       datatype = MPI_FLOAT;
3501     else
3502       ple_error(__FILE__, __LINE__, 0,
3503                 _("type_size passed to ple_locator_exchange_point_var() does\n"
3504                   "not correspond to double or float."));
3505 
3506     assert (datatype != MPI_DATATYPE_NULL);
3507 
3508     if (this_locator->exchange_algorithm == _EXCHANGE_SENDRECV)
3509       _exchange_point_var_distant(this_locator,
3510                                   distant_var,
3511                                   local_var,
3512                                   local_list,
3513                                   datatype,
3514                                   stride,
3515                                   _reverse);
3516 
3517     else if (this_locator->exchange_algorithm == _EXCHANGE_ISEND_IRECV)
3518       _exchange_point_var_distant_asyn(this_locator,
3519                                        distant_var,
3520                                        local_var,
3521                                        local_list,
3522                                        datatype,
3523                                        stride,
3524                                        _reverse);
3525 
3526   }
3527 
3528 #endif /* defined(PLE_HAVE_MPI) */
3529 
3530   if (!mpi_flag)
3531     _exchange_point_var_local(this_locator,
3532                               distant_var,
3533                               local_var,
3534                               local_list,
3535                               type_size,
3536                               stride,
3537                               _reverse);
3538 
3539   /* Finalize timing */
3540 
3541   w_end = ple_timer_wtime();
3542   cpu_end = ple_timer_cpu_time();
3543 
3544   this_locator->exchange_wtime[0] += (w_end - w_start);
3545   this_locator->exchange_cpu_time[0] += (cpu_end - cpu_start);
3546 }
3547 
3548 /*----------------------------------------------------------------------------*/
3549 /*!
3550  * \brief Return timing information.
3551  *
3552  * In parallel mode, this includes communication time.
3553  *
3554  * When location on closest elements to force location of all points is
3555  * active, location times include a total value, followed by the value
3556  * associated with the location of closest elements stage.
3557  *
3558  * \param[in]  this_locator      pointer to locator structure
3559  * \param[out] location_wtime    Location Wall-clock time (or NULL)
3560  * \param[out] location_cpu_time Location CPU time (or NULL)
3561  * \param[out] exchange_wtime    Variable exchange Wall-clock time
3562  *                               (size: 1 or NULL)
3563  * \param[out] exchange_cpu_time Variable exchange CPU time (size: 2 or NULL)
3564  */
3565 /*----------------------------------------------------------------------------*/
3566 
3567 void
ple_locator_get_times(const ple_locator_t * this_locator,double * location_wtime,double * location_cpu_time,double * exchange_wtime,double * exchange_cpu_time)3568 ple_locator_get_times(const ple_locator_t  *this_locator,
3569                       double               *location_wtime,
3570                       double               *location_cpu_time,
3571                       double               *exchange_wtime,
3572                       double               *exchange_cpu_time)
3573 {
3574   _get_times(this_locator,
3575              0,
3576              location_wtime, location_cpu_time,
3577              exchange_wtime, exchange_cpu_time);
3578 }
3579 
3580 /*----------------------------------------------------------------------------*/
3581 /*!
3582  * \brief Return communication timing information.
3583  *
3584  * In serial mode, return times are always zero.
3585  *
3586  * When location on closest elements to force location of all points is
3587  * active, location times include a total value, followed by the value
3588  * associated with the location of closest elements stage.
3589  *
3590  * parameters:
3591  * \param[in]  this_locator      pointer to locator structure
3592  * \param[out] location_wtime    Location Wall-clock time (or NULL)
3593  * \param[out] location_cpu_time Location CPU time (or NULL)
3594  * \param[out] exchange_wtime    Variable exchange Wall-clock time (or NULL)
3595  * \param[out] exchange_cpu_time Variable exchange CPU time (or NULL)
3596  */
3597 /*----------------------------------------------------------------------------*/
3598 
3599 void
ple_locator_get_comm_times(const ple_locator_t * this_locator,double * location_wtime,double * location_cpu_time,double * exchange_wtime,double * exchange_cpu_time)3600 ple_locator_get_comm_times(const ple_locator_t  *this_locator,
3601                            double               *location_wtime,
3602                            double               *location_cpu_time,
3603                            double               *exchange_wtime,
3604                            double               *exchange_cpu_time)
3605 {
3606   _get_times(this_locator,
3607              1,
3608              location_wtime, location_cpu_time,
3609              exchange_wtime, exchange_cpu_time);
3610 }
3611 
3612 /*----------------------------------------------------------------------------*/
3613 /*!
3614  * \brief Dump printout of a locator structure.
3615  *
3616  * \param this_locator pointer to structure that should be dumped
3617  */
3618 /*----------------------------------------------------------------------------*/
3619 
3620 void
ple_locator_dump(const ple_locator_t * this_locator)3621 ple_locator_dump(const ple_locator_t  *this_locator)
3622 {
3623   int  i;
3624   ple_lnum_t  j;
3625   const ple_lnum_t  *idx, *index, *loc;
3626   const ple_coord_t  *coords;
3627 
3628   const ple_locator_t  *_locator = this_locator;
3629 
3630   if (this_locator == NULL)
3631     return;
3632 
3633   /* Basic information */
3634   /*-------------------*/
3635 
3636   ple_printf("\n"
3637              "Locator:\n\n"
3638              "Spatial dimension:                     %d\n"
3639              "Exchange algorithm:                    %d\n"
3640              "Number of ranks of distant location:   %d\n"
3641              "First rank of distant location:        %d\n"
3642              "Number of intersecting distant ranks:  %d\n",
3643              _locator->dim,
3644              _locator->exchange_algorithm,
3645              _locator->n_ranks, _locator->start_rank,
3646              _locator->n_intersects);
3647 
3648 #if defined(PLE_HAVE_MPI)
3649   if (_locator->comm != MPI_COMM_NULL)
3650     ple_printf("\n"
3651                "Associated MPI communicator:           %ld\n",
3652                (long)(_locator->comm));
3653 #endif
3654 
3655   /* Arrays indexed by rank */
3656   /*------------------------*/
3657 
3658   if (_locator->intersect_rank != NULL) {
3659     for (i = 0; i < _locator->n_intersects; i++)
3660       ple_printf("\n"
3661                  "  Intersection %d with distant rank %d\n\n",
3662                  i, _locator->intersect_rank[i]);
3663   }
3664   if (_locator->intersect_rank != NULL) {
3665     for (i = 0; i < _locator->n_intersects; i++)
3666       ple_printf("\n"
3667                  "  Communication ordering %d: %d\n\n",
3668                  i, _locator->comm_order[i]);
3669   }
3670 
3671   if (_locator->n_interior > 0) {
3672 
3673     if (_locator->local_point_ids != NULL) {
3674 
3675       ple_printf("\n  Local point ids (for receiving):\n\n");
3676       idx = _locator->local_points_idx;
3677       index = _locator->local_point_ids;
3678       for (i = 0; i < _locator->n_intersects; i++) {
3679         if (idx[i+1] > idx[i]) {
3680           ple_printf("%6d (idx = %10d) %10d\n",
3681                      i, idx[i], index[idx[i]]);
3682           for (j = idx[i] + 1; j < idx[i + 1]; j++)
3683             ple_printf("                          %10d\n", index[j]);
3684         }
3685         else {
3686           ple_printf("%6d (idx = %10d)\n", i, idx[i]);
3687         }
3688         ple_printf("   end (idx = %10d)\n", idx[_locator->n_intersects]);
3689       }
3690 
3691     }
3692 
3693   }
3694 
3695   if (_locator->distant_points_idx != NULL) {
3696 
3697     idx = _locator->distant_points_idx;
3698     loc = _locator->distant_point_location;
3699     coords = _locator->distant_point_coords;
3700 
3701     if (idx[_locator->n_intersects] > 0)
3702       ple_printf("\n  Distant point location:\n\n");
3703 
3704     for (i = 0; i < _locator->n_intersects; i++) {
3705 
3706       if (idx[i+1] > idx[i]) {
3707 
3708         if (_locator->dim == 1) {
3709           ple_printf("%6d (idx = %10d) %10d [%12.5e]\n",
3710                      i, _locator->intersect_rank[i], idx[i],
3711                      loc[idx[i]], coords[idx[i]]);
3712           for (j = idx[i] + 1; j < idx[i + 1]; j++)
3713             ple_printf("                          %10d [%12.5e]\n",
3714                        loc[j], coords[j]);
3715         }
3716         else if (_locator->dim == 2) {
3717           ple_printf("%6d (idx = %10d) %10d [%12.5e, %12.5e]\n",
3718                      i, idx[i], loc[idx[i]],
3719                      coords[2*idx[i]], coords[2*idx[i]+1]);
3720           for (j = idx[i] + 1; j < idx[i + 1]; j++)
3721             ple_printf("                          %10d [%12.5e, %12.5e]\n",
3722                        loc[j], coords[2*j], coords[2*j+1]);
3723         }
3724         else if (_locator->dim == 3) {
3725           ple_printf("%6d (idx = %10d) %10d [%12.5e, %12.5e, %12.5e]\n",
3726                      i, idx[i], loc[idx[i]],
3727                      coords[3*idx[i]], coords[3*idx[i]+1], coords[3*idx[i]+2]);
3728           for (j = idx[i] + 1; j < idx[i + 1]; j++)
3729             ple_printf("                          "
3730                        "%10d [%12.5e, %12.5e, %12.5e]\n",
3731                        loc[j], coords[3*j], coords[3*j+1], coords[3*j+2]);
3732         }
3733 
3734       } /* if (idx[i+1] > idx[i]) */
3735 
3736     }
3737 
3738     if (idx[_locator->n_intersects] > 0)
3739       ple_printf("   end (idx = %10d)\n", idx[_locator->n_intersects]);
3740   }
3741 
3742   /* Local arrays */
3743   /*--------------*/
3744 
3745   ple_printf("\n"
3746              "  Number of local points successfully located:  %d\n\n",
3747              _locator->n_interior);
3748 
3749   for (j = 0; j < _locator->n_interior; j++)
3750     ple_printf("    %10d\n", _locator->interior_list[j]);
3751 
3752   if  (_locator->n_interior > 0)
3753     ple_printf("\n");
3754 
3755   ple_printf("  Number of local points not located:  %d\n",
3756              _locator->n_exterior);
3757 
3758   for (j = 0; j < _locator->n_exterior; j++)
3759     ple_printf("    %10d\n", _locator->exterior_list[j]);
3760 
3761   if  (_locator->n_exterior > 0)
3762     ple_printf("\n");
3763 
3764   /* Timing information */
3765   /*--------------------*/
3766 
3767   ple_printf("  Location Wall-clock time: %12.5f (comm: %12.5f)\n",
3768              _locator->location_wtime[0], _locator->location_wtime[1]);
3769 
3770   ple_printf("  Location CPU time:        %12.5f (comm: %12.5f)\n",
3771              _locator->location_cpu_time[0], _locator->location_cpu_time[1]);
3772 
3773   ple_printf("  Exchange Wall-clock time: %12.5f (comm: %12.5f)\n",
3774              _locator->exchange_wtime[0], _locator->exchange_wtime[1]);
3775 
3776   ple_printf("  Exchange CPU time:        %12.5f (comm: %12.5f)\n",
3777              _locator->exchange_cpu_time[0], _locator->exchange_cpu_time[1]);
3778 
3779 }
3780 
3781 #if defined(PLE_HAVE_MPI)
3782 
3783 /*----------------------------------------------------------------------------*/
3784 /*!
3785  * \brief Get the maximum number of exchanging ranks for which we use
3786  * asynchronous MPI sends and receives instead of MPI_SendRecv.
3787  *
3788  * \return the maximum number of ranks allowing asynchronous exchanges
3789  */
3790 /*----------------------------------------------------------------------------*/
3791 
3792 int
ple_locator_get_async_threshold(void)3793 ple_locator_get_async_threshold(void)
3794 {
3795   return _ple_locator_async_threshold;
3796 }
3797 
3798 /*----------------------------------------------------------------------------*/
3799 /*!
3800  * \brief Set the maximum number of exchanging ranks for which we use
3801  * asynchronous MPI sends and receives instead of MPI_SendRecv.
3802  *
3803  * \param threshold maximum number of ranks allowing asynchronous exchanges
3804  */
3805 /*----------------------------------------------------------------------------*/
3806 
3807 void
ple_locator_set_async_threshold(int threshold)3808 ple_locator_set_async_threshold(int threshold)
3809 {
3810   _ple_locator_async_threshold = threshold;
3811 }
3812 
3813 /*----------------------------------------------------------------------------*/
3814 /*!
3815  * \brief Register communication logging functions for locator instrumentation.
3816  *
3817  * By default, locators are not instrumented.
3818  *
3819  * Functions using MPE may be defined and used, but other similar systems
3820  * may be used.
3821  *
3822  * \param[in] log_function pointer to logging function
3823  * \param[in] start_p_comm point to point communication start event number
3824  * \param[in] end_p_comm   point to point communication end event number
3825  * \param[in] start_g_comm global communication start event number
3826  * \param[in] end_g_comm   global communication end event number
3827  */
3828 /*----------------------------------------------------------------------------*/
3829 
3830 void
ple_locator_set_comm_log(ple_locator_log_t * log_function,int start_p_comm,int end_p_comm,int start_g_comm,int end_g_comm)3831 ple_locator_set_comm_log(ple_locator_log_t  *log_function,
3832                          int                 start_p_comm,
3833                          int                 end_p_comm,
3834                          int                 start_g_comm,
3835                          int                 end_g_comm)
3836 {
3837   _ple_locator_log_func = log_function;
3838 
3839   _ple_locator_log_start_p_comm = start_p_comm;
3840   _ple_locator_log_end_p_comm = end_p_comm;
3841   _ple_locator_log_start_g_comm = start_g_comm;
3842   _ple_locator_log_end_g_comm = end_g_comm;
3843 }
3844 
3845 #endif /* defined(PLE_HAVE_MPI) */
3846 
3847 /*----------------------------------------------------------------------------*/
3848 
3849 #ifdef __cplusplus
3850 }
3851 #endif /* __cplusplus */
3852