1 /*============================================================================
2 * Locate points in a representation associated with a mesh
3 *============================================================================*/
4
5 /*
6 This file is part of the "Parallel Location and Exchange" library,
7 intended to provide mesh or particle-based code coupling services.
8
9 Copyright (C) 2005-2021 EDF S.A.
10
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2.1 of the License, or (at your option) any later version.
15
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 /*!
27 * \file ple_locator.c
28 *
29 * \brief Locate points in a representation associated with a mesh.
30 */
31
32 /*----------------------------------------------------------------------------*/
33
34 #include "ple_config.h"
35
36 /*----------------------------------------------------------------------------
37 * Standard C library headers
38 *----------------------------------------------------------------------------*/
39
40 #include <assert.h>
41 #include <math.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45
46 #if defined(PLE_HAVE_MPI)
47 #include <mpi.h>
48 #if !defined(MPI_VERSION) /* Defined in up-to-date MPI versions */
49 # define MPI_VERSION 1
50 #endif
51 #endif
52
53 /*----------------------------------------------------------------------------
54 * Local headers
55 *----------------------------------------------------------------------------*/
56
57 #include "ple_config_defs.h"
58 #include "ple_defs.h"
59
60 /*----------------------------------------------------------------------------
61 * Header for the current file
62 *----------------------------------------------------------------------------*/
63
64 #include "ple_locator.h"
65
66 /*----------------------------------------------------------------------------*/
67
68 #ifdef __cplusplus
69 extern "C" {
70 #if 0
71 } /* Fake brace to force back Emacs auto-indentation back to column 0 */
72 #endif
73 #endif /* __cplusplus */
74
75 /*============================================================================
76 * Local macro definitions
77 *============================================================================*/
78
79 /* In case <math.h> does not define HUGE_VAL, use a "safe" value */
80 #if !defined(HUGE_VAL)
81 #define HUGE_VAL 1.0e+30
82 #endif
83
84 /* Geometric operation macros*/
85
86 #define _MODULE(vect) \
87 sqrt(vect[0] * vect[0] + vect[1] * vect[1] + vect[2] * vect[2])
88
89 /*
90 * Algorithm ID's
91 */
92
93 #define _LOCATE_BB_SENDRECV 100 /* bounding boxes + send-receive */
94 #define _LOCATE_BB_SENDRECV_ORDERED 200 /* bounding boxes + send-receive
95 with communication ordering */
96
97 #define _EXCHANGE_SENDRECV 100 /* Sendrecv */
98 #define _EXCHANGE_ISEND_IRECV 200 /* Isend/Irecv/Waitall */
99
100 /*============================================================================
101 * Type definitions
102 *============================================================================*/
103
104 typedef struct {
105
106 int n; /* Number of intersecting distant ranks */
107 int *rank; /* List of intersecting distant ranks */
108 double *extents; /* List of intersecting distant extents */
109
110 } _rank_intersects_t;
111
112 /*----------------------------------------------------------------------------
113 * Structure defining a locator
114 *----------------------------------------------------------------------------*/
115
116 struct _ple_locator_t {
117
118 /* Basic information */
119 /*-------------------*/
120
121 int dim; /* Spatial dimension */
122 int have_tags; /* Do we use tags */
123
124 int locate_algorithm; /* Location algorithm id */
125 int exchange_algorithm; /* Exchange algorithm id */
126
127 #if defined(PLE_HAVE_MPI)
128 MPI_Comm comm; /* Associated MPI communicator */
129 #endif
130
131 int n_ranks; /* Number of MPI ranks of distant location */
132 int start_rank; /* First MPI rank of distant location */
133
134 int n_intersects; /* Number of intersecting distant ranks */
135 int *intersect_rank; /* List of intersecting distant ranks */
136 int *comm_order; /* Optional communication ordering */
137
138 ple_lnum_t point_id_base; /* base numbering for (external) point ids */
139
140 ple_lnum_t *local_points_idx; /* Start index of local points per rank
141 (size: n_intersects + 1)*/
142 ple_lnum_t *distant_points_idx; /* Start index of distant points per rank
143 (size: n_intersects + 1)*/
144
145 ple_lnum_t *local_point_ids; /* Local point index for data received
146 (with blocs starting at
147 local_points_idx[] indexes,
148 0 to n-1 numbering) */
149
150 ple_lnum_t *distant_point_location; /* Location of distant points by parent
151 element number (with blocs starting
152 at distant_points_idx[] indexes) */
153 ple_coord_t *distant_point_coords; /* Coordinates of distant points
154 (with blocs starting at
155 distant_points_idx[]*dim indexes) */
156
157 ple_lnum_t n_interior; /* Number of local points located */
158 ple_lnum_t *interior_list; /* List of points located */
159 ple_lnum_t n_exterior; /* Number of local points not located */
160 ple_lnum_t *exterior_list; /* List of points not located */
161
162 /* Timing information (2 fields/time; 0: total; 1: communication) */
163
164 double location_wtime[2]; /* Location Wall-clock time */
165 double location_cpu_time[2]; /* Location CPU time */
166 double exchange_wtime[2]; /* Variable exchange Wall-clock time */
167 double exchange_cpu_time[2]; /* Variable exchange CPU time */
168 };
169
170 /*============================================================================
171 * Local function pointer type documentation
172 *============================================================================*/
173
174 #ifdef DOXYGEN_ONLY
175
176 /*!
177 * \brief Query number of extents and compute extents of a mesh representation.
178 *
179 * For future optimizations, computation of extents should not be limited
180 * to mesh extents, but to 1 to n extents, allowing different extent
181 * refinements, from global mesh to individual element extents.
182 *
183 * The minimum required functionality for this function is to compute
184 * whole mesh extents, but it could also return extents of individual
185 * elements, or intermediate extents of mesh subdivisions or coarsened
186 * elements. As such, it takes an argument indicating the maximum
187 * local number of extents it should compute (based on the size of
188 * the extents array argument), but returns the number of extents
189 * really computed, which may be lower (usually 1 for mesh extents,
190 * possibly even 0 if the local mesh is empty). If n_max_extents = 1,
191 * the whole mesh extents should be computed.
192 *
193 * If n_max_extents is set to a negative value (-1), no extents are computed,
194 * but the function returns the maximum number of extents it may compute.
195 * This query mode allows for the caller to allocate the correct amount
196 * of memory for a subsequent call.
197 *
198 * \param[in] mesh pointer to mesh representation structure
199 * \param[in] n_max_extents maximum number of sub-extents (such as element
200 * extents) to compute, or -1 to query
201 * \param[in] tolerance addition to local extents of each element:
202 * extent = base_extent * (1 + tolerance)
203 * \param[in] extents extents associated with the mesh or elements (or
204 * even aggregated elements in case of coarser
205 * representation):
206 * x_min_0, y_min_0, ..., x_max_i, y_max_i, ...
207 * (size: 2*dim*n_max_extents), ignored in query mode
208 *
209 * \return the number of extents computed
210 */
211
212 typedef ple_lnum_t
213 (ple_mesh_extents_t) (const void *mesh,
214 ple_lnum_t n_max_extents,
215 double tolerance,
216 double extents[]);
217
218 /*!
219 * \brief Find elements in a given local mesh containing points: updates the
220 * location[] and distance[] arrays associated with a set of points
221 * for points that are in an element of this mesh, or closer to one
222 * than to previously encountered elements.
223 *
224 * \param[in] this_nodal pointer to nodal mesh representation
225 * structure
226 * \param[in] tolerance_base associated fixed tolerance
227 * \param[in] tolerance_fraction associated fraction of element bounding
228 * boxes added to tolerance
229 * \param[in] n_points number of points to locate
230 * \param[in] point_coords point coordinates
231 * \param[in, out] location number of element containing or closest
232 * to each point (size: n_points)
233 * \param[in, out] distance distance from point to element indicated
234 * by location[]: < 0 if unlocated, >= 0
235 * if inside; the choice of distance metric
236 * is left to the calling code
237 * (size: n_points)
238 */
239
240 typedef void
241 (ple_mesh_elements_locate_t) (const void *mesh,
242 float tolerance_base,
243 float tolerance_fraction,
244 ple_lnum_t n_points,
245 const ple_coord_t point_coords[],
246 ple_lnum_t location[],
247 float distance[]);
248
249 /*!
250 * \brief Function pointer type for user definable logging/profiling
251 * type functions
252 */
253
254 typedef int
255 (ple_locator_log_t) (int event,
256 int data,
257 const char *string);
258
259 #endif /* DOXYGEN_ONLY */
260
261 /*============================================================================
262 * Static global variables
263 *============================================================================*/
264
265 #if defined(PLE_HAVE_MPI)
266
267 /* maximum number of exchanging ranks for which we use asynchronous
268 MPI sends and receives instead of MPI_SendRecv */
269
270 static int _ple_locator_async_threshold = 128;
271
272 /* global logging function */
273
274 static ple_locator_log_t *_ple_locator_log_func = NULL;
275
276 /* global variables associated with communication logging */
277
278 static int _ple_locator_log_start_p_comm = 0;
279 static int _ple_locator_log_end_p_comm = 0;
280 static int _ple_locator_log_start_g_comm = 0;
281 static int _ple_locator_log_end_g_comm = 0;
282
283 #endif
284
285 /*============================================================================
286 * Private function definitions
287 *============================================================================*/
288
289 #if defined(PLE_HAVE_MPI)
290
291 /*----------------------------------------------------------------------------
292 * Log communication start.
293 *
294 * parameters:
295 * start_p_comm <-- event number for the start of the "send/recv"
296 * or "global send/recv" state
297 * timing <-> 0: wall-clock total; 1: CPU total;
298 * 2: wall-clock timer start; 3: CPU timer start
299 *----------------------------------------------------------------------------*/
300
301 inline static void
_locator_trace_start_comm(int start_p_comm,double timing[4])302 _locator_trace_start_comm(int start_p_comm,
303 double timing[4])
304 {
305 timing[2] = ple_timer_wtime();
306 timing[3] = ple_timer_cpu_time();
307
308 if(_ple_locator_log_func != NULL)
309 _ple_locator_log_func(start_p_comm, 0, NULL);
310 }
311
312 /*----------------------------------------------------------------------------
313 * Log communication end.
314 *
315 * parameters:
316 * end_p_comm <-- event number for the end of the "send/recv" or
317 * "global send/recv" state
318 * timing <-> 0: wall-clock total; 1 CPU total;
319 * 2: wall-clock timer start; 3: CPU timer start
320 *----------------------------------------------------------------------------*/
321
322 inline static void
_locator_trace_end_comm(int end_p_comm,double timing[4])323 _locator_trace_end_comm(int end_p_comm,
324 double timing[4])
325 {
326 if(_ple_locator_log_func != NULL)
327 _ple_locator_log_func(end_p_comm, 0, NULL);
328
329 timing[0] += ple_timer_wtime() - timing[2];
330 timing[1] += ple_timer_cpu_time() - timing[3];
331 }
332
333 /*----------------------------------------------------------------------------
334 * Descend binary tree for the ordering of an integer array.
335 *
336 * parameters:
337 * number <-- pointer to numbers of entities that should be ordered.
338 * (if NULL, a default 1 to n numbering is considered)
339 * level <-- level of the binary tree to descend
340 * n <-- number of entities in the binary tree to descend
341 * order <-> ordering array
342 *----------------------------------------------------------------------------*/
343
344 inline static void
_order_int_descend_tree(const int number[],size_t level,const size_t n,int order[])345 _order_int_descend_tree(const int number[],
346 size_t level,
347 const size_t n,
348 int order[])
349 {
350 size_t i_save, i1, i2, lv_cur;
351
352 i_save = (size_t)(order[level]);
353
354 while (level <= (n/2)) {
355
356 lv_cur = (2*level) + 1;
357
358 if (lv_cur < n - 1) {
359
360 i1 = (size_t)(order[lv_cur+1]);
361 i2 = (size_t)(order[lv_cur]);
362
363 if (number[i1] > number[i2]) lv_cur++;
364 }
365
366 if (lv_cur >= n) break;
367
368 i1 = i_save;
369 i2 = (size_t)(order[lv_cur]);
370
371 if (number[i1] >= number[i2]) break;
372
373 order[level] = order[lv_cur];
374 level = lv_cur;
375
376 }
377
378 order[level] = i_save;
379 }
380
381 /*----------------------------------------------------------------------------
382 * Order an array of local numbers.
383 *
384 * parameters:
385 * number <-- array of entity numbers (if NULL, a default 1 to n
386 * numbering is considered)
387 * order <-- pre-allocated ordering table
388 * n <-- number of entities considered
389 *----------------------------------------------------------------------------*/
390
391 static void
_order_int(const int number[],int order[],const size_t n)392 _order_int(const int number[],
393 int order[],
394 const size_t n)
395 {
396 size_t i;
397 int o_save;
398
399 /* Initialize ordering array */
400
401 for (i = 0; i < n; i++)
402 order[i] = i;
403
404 if (n < 2)
405 return;
406
407 /* Create binary tree */
408
409 i = (n / 2);
410 do {
411 i--;
412 _order_int_descend_tree(number, i, n, order);
413 } while (i > 0);
414
415 /* Sort binary tree */
416
417 for (i = n - 1; i > 0; i--) {
418 o_save = order[0];
419 order[0] = order[i];
420 order[i] = o_save;
421 _order_int_descend_tree(number, 0, i, order);
422 }
423 }
424
425 /*----------------------------------------------------------------------------
426 * Order communicating ranks to reduce serialization
427 *
428 * parameters:
429 * rank_id <-- local rank id
430 * n_ranks <-- communicator size
431 * n_comm_ranks <-- number of communicating ranks
432 * comm_rank <-- communicating ranks
433 * order --> communication order
434 *----------------------------------------------------------------------------*/
435
436 static void
_order_comm_ranks(int rank_id,int n_ranks,int n_comm_ranks,const int comm_rank[],int order[])437 _order_comm_ranks(int rank_id,
438 int n_ranks,
439 int n_comm_ranks,
440 const int comm_rank[],
441 int order[])
442 {
443 int *sub_rank, *rank_dist;
444
445 PLE_MALLOC(sub_rank, n_comm_ranks, int);
446 PLE_MALLOC(rank_dist, n_comm_ranks, int);
447
448 /* Compute ordering ("distance") key */
449
450 for (int i = 0; i < n_comm_ranks; i++) {
451 sub_rank[i] = comm_rank[i];
452 rank_dist[i] = 0;
453 }
454
455 int l_r_id = rank_id;
456 int m_r_id = (n_ranks+1) / 2;
457
458 while (true) {
459
460 if (l_r_id < m_r_id) {
461 for (int i = 0; i < n_comm_ranks; i++) {
462 if (sub_rank[i] >= m_r_id) {
463 rank_dist[i] = rank_dist[i]*2 + 1;
464 sub_rank[i] -= m_r_id;
465 }
466 else
467 rank_dist[i] = rank_dist[i]*2;
468 }
469 }
470 else {
471 for (int i = 0; i < n_comm_ranks; i++) {
472 if (sub_rank[i] >= m_r_id) {
473 rank_dist[i] = rank_dist[i]*2;
474 sub_rank[i] -= m_r_id;
475 }
476 else
477 rank_dist[i] = rank_dist[i]*2 + 1;
478 }
479 l_r_id -= m_r_id;
480 }
481
482 if (m_r_id < 2)
483 break;
484
485 m_r_id = (m_r_id+1) / 2;
486
487 }
488
489 /* Order results */
490
491 _order_int(rank_dist, order, n_comm_ranks);
492
493 PLE_FREE(sub_rank);
494 PLE_FREE(rank_dist);
495 }
496
497 #endif /* defined(PLE_HAVE_MPI) */
498
499 /*----------------------------------------------------------------------------
500 * Test if two extents intersect
501 *
502 * parameters:
503 * dim <-- spatial (coordinates) dimension
504 * extents_1 <-- extents: x_min, y_min, ..., x_max, y_max, ...
505 * size: dim*2
506 * extents_2 <-- extents: x_min, y_min, ..., x_max, y_max, ...
507 * size: dim*2
508 *
509 * returns:
510 * true if extents intersect, false otherwise
511 *----------------------------------------------------------------------------*/
512
513 inline static _Bool
_intersect_extents(int dim,const double extents_1[],const double extents_2[])514 _intersect_extents(int dim,
515 const double extents_1[],
516 const double extents_2[])
517 {
518 int i;
519 _Bool retval = true;
520
521 for (i = 0; i < dim; i++) {
522 if ( (extents_1[i] > extents_2[i + dim])
523 || (extents_2[i] > extents_1[i + dim])) {
524 retval = false;
525 break;
526 }
527 }
528
529 return retval;
530 }
531
532 /*----------------------------------------------------------------------------
533 * Test if a point is within given extents
534 *
535 * parameters:
536 * dim <-- spatial (coordinates) dimension
537 * coords <-- coordinates: x, y, ...
538 * size: dim
539 * extents <-- extents: x_min, y_min, ..., x_max, y_max, ...
540 * size: dim*2
541 *
542 * returns:
543 * true if point lies within extents, false otherwise
544 *----------------------------------------------------------------------------*/
545
546 inline static _Bool
_within_extents(int dim,const ple_coord_t coords[],const double extents[])547 _within_extents(int dim,
548 const ple_coord_t coords[],
549 const double extents[])
550 {
551 int i;
552 _Bool retval = true;
553
554 for (i = 0; i < dim; i++) {
555 if ( (coords[i] < extents[i])
556 || (coords[i] > extents[i + dim])) {
557 retval = false;
558 break;
559 }
560 }
561
562 return retval;
563 }
564
565 /*----------------------------------------------------------------------------
566 * Compute extents of a point set
567 *
568 * parameters:
569 * dim <-- space dimension of points to locate
570 * point_list_base <-- base numbering for point list
571 * n_points <-- number of points to locate
572 * point_list <-- optional indirection array to point_coords
573 * point_coords <-- coordinates of points to locate
574 * (dimension: dim * n_points)
575 * location <-- existing location_information, or NULL
576 * extents --> extents associated with mesh:
577 * x_min, y_min, ..., x_max, y_max, ... (size: 2*dim)
578 *----------------------------------------------------------------------------*/
579
580 static void
_point_extents(int dim,ple_lnum_t point_list_base,ple_lnum_t n_points,const ple_lnum_t point_list[],const ple_coord_t point_coords[],const ple_lnum_t location[],double extents[])581 _point_extents(int dim,
582 ple_lnum_t point_list_base,
583 ple_lnum_t n_points,
584 const ple_lnum_t point_list[],
585 const ple_coord_t point_coords[],
586 const ple_lnum_t location[],
587 double extents[])
588 {
589 int i;
590 ple_lnum_t j, coord_idx;
591
592 /* initialize extents in case mesh is empty or dim < 3 */
593 for (i = 0; i < dim; i++) {
594 extents[i] = HUGE_VAL;
595 extents[i + dim] = -HUGE_VAL;
596 }
597
598 /* Compute extents */
599
600 if (location != NULL) {
601
602 if (point_list != NULL) {
603
604 for (j = 0; j < n_points; j++) {
605 coord_idx = point_list[j] - point_list_base;
606 for (i = 0; i < dim; i++) {
607 if (extents[i] > point_coords[(coord_idx * dim) + i])
608 extents[i] = point_coords[(coord_idx * dim) + i];
609 if (extents[i + dim] < point_coords[(coord_idx * dim) + i])
610 extents[i + dim] = point_coords[(coord_idx * dim) + i];
611 }
612 }
613 }
614
615 else {
616
617 for (coord_idx = 0; coord_idx < n_points; coord_idx++) {
618 for (i = 0; i < dim; i++) {
619 if (extents[i] > point_coords[(coord_idx * dim) + i])
620 extents[i] = point_coords[(coord_idx * dim) + i];
621 if (extents[i + dim] < point_coords[(coord_idx * dim) + i])
622 extents[i + dim] = point_coords[(coord_idx * dim) + i];
623 }
624 }
625 }
626
627 }
628 else {
629
630 if (point_list != NULL) {
631
632 for (j = 0; j < n_points; j++) {
633 coord_idx = point_list[j] - point_list_base;
634 for (i = 0; i < dim; i++) {
635 if (extents[i] > point_coords[(coord_idx * dim) + i])
636 extents[i] = point_coords[(coord_idx * dim) + i];
637 if (extents[i + dim] < point_coords[(coord_idx * dim) + i])
638 extents[i + dim] = point_coords[(coord_idx * dim) + i];
639 }
640 }
641 }
642
643 else {
644
645 for (j = 0; j < n_points; j++) {
646 for (i = 0; i < dim; i++) {
647 if (extents[i] > point_coords[(j * dim) + i])
648 extents[i] = point_coords[(j * dim) + i];
649 if (extents[i + dim] < point_coords[(j * dim) + i])
650 extents[i + dim] = point_coords[(j * dim) + i];
651 }
652 }
653 }
654
655 }
656
657 }
658
659 /*----------------------------------------------------------------------------
660 * Clear previous location information.
661 *
662 * parameters:
663 * this_locator <-> pointer to locator structure
664 *----------------------------------------------------------------------------*/
665
666 static void
_clear_location_info(ple_locator_t * this_locator)667 _clear_location_info(ple_locator_t *this_locator)
668 {
669 this_locator->n_intersects = 0;
670
671 PLE_FREE(this_locator->intersect_rank);
672
673 PLE_FREE(this_locator->local_points_idx);
674 PLE_FREE(this_locator->distant_points_idx);
675
676 PLE_FREE(this_locator->local_point_ids);
677
678 PLE_FREE(this_locator->distant_point_location);
679 PLE_FREE(this_locator->distant_point_coords);
680
681 PLE_FREE(this_locator->interior_list);
682 PLE_FREE(this_locator->exterior_list);
683 }
684
685 #if defined(PLE_HAVE_MPI)
686
687 /*----------------------------------------------------------------------------
688 * Update intersection rank information once location is done.
689 *
690 * parameters:
691 * this_locator <-> pointer to locator structure
692 * n_points <-- number of points to locate
693 * location_rank_id <-> rank on which a point is located in, id
694 * in updated locator intersection rank info out
695 *----------------------------------------------------------------------------*/
696
697 static void
_location_ranks(ple_locator_t * this_locator,ple_lnum_t n_points,ple_lnum_t location_rank_id[])698 _location_ranks(ple_locator_t *this_locator,
699 ple_lnum_t n_points,
700 ple_lnum_t location_rank_id[])
701 {
702 int i, k;
703 ple_lnum_t j;
704
705 int loc_vals[2], max_vals[2];
706 int comm_size;
707 int *send_flag = NULL, *recv_flag = NULL, *intersect_rank_id = NULL;
708
709 double comm_timing[4] = {0., 0., 0., 0.};
710
711 /* Initialization */
712
713 MPI_Comm_size(this_locator->comm, &comm_size);
714
715 PLE_MALLOC(send_flag, comm_size, int);
716 PLE_MALLOC(recv_flag, comm_size, int);
717
718 for (i = 0; i < comm_size; i++)
719 send_flag[i] = 0;
720
721 for (j = 0; j < n_points; j++) {
722 if (location_rank_id[j] > -1)
723 send_flag[location_rank_id[j]] = 1;
724 }
725
726 /* As exchange detection is asymetric, synchronize it */
727
728 _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
729
730 MPI_Alltoall(send_flag, 1, MPI_INT, recv_flag, 1, MPI_INT,
731 this_locator->comm);
732
733 _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
734
735 /* Update number of "intersects" and matching rank info */
736
737 this_locator->n_intersects = 0;
738
739 for (i = 0; i < this_locator->n_ranks; i++) {
740 j = i + this_locator->start_rank;
741 if (send_flag[j] == 1 || recv_flag[j] == 1)
742 this_locator->n_intersects++;
743 }
744
745 PLE_REALLOC(this_locator->intersect_rank,
746 this_locator->n_intersects,
747 int);
748
749 for (i = 0, k = 0; i < this_locator->n_ranks; i++) {
750 j = i + this_locator->start_rank;
751 if (send_flag[j] == 1 || recv_flag[j] == 1)
752 this_locator->intersect_rank[k++] = j;
753 }
754
755 if (this_locator->locate_algorithm == _LOCATE_BB_SENDRECV_ORDERED) {
756 int rank_id;
757 MPI_Comm_rank(this_locator->comm, &rank_id);
758 PLE_REALLOC(this_locator->comm_order, this_locator->n_intersects, int);
759 _order_comm_ranks(rank_id,
760 comm_size,
761 this_locator->n_intersects,
762 this_locator->intersect_rank,
763 this_locator->comm_order);
764 }
765
766 PLE_FREE(send_flag);
767 PLE_FREE(recv_flag);
768
769 /* Now convert location rank id to intersect rank */
770
771 PLE_MALLOC(intersect_rank_id, this_locator->n_ranks, int);
772
773 for (i = 0; i < this_locator->n_ranks; i++)
774 intersect_rank_id[i] = -1;
775
776 for (i = 0; i < this_locator->n_intersects; i++) {
777 intersect_rank_id[ this_locator->intersect_rank[i]
778 - this_locator->start_rank] = i;
779 }
780
781 for (j = 0; j < n_points; j++) {
782 k = location_rank_id[j] - this_locator->start_rank;
783 if (k > -1)
784 location_rank_id[j] = intersect_rank_id[k];
785 }
786
787 PLE_FREE(intersect_rank_id);
788
789 _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
790
791 loc_vals[0] = this_locator->n_intersects;
792 loc_vals[1] = _ple_locator_async_threshold;
793
794 MPI_Allreduce(loc_vals, max_vals, 2, MPI_INT, MPI_MAX,
795 this_locator->comm);
796
797 if (max_vals[0] <= max_vals[1])
798 this_locator->exchange_algorithm = _EXCHANGE_ISEND_IRECV;
799 else
800 this_locator->exchange_algorithm = _EXCHANGE_SENDRECV;
801
802 _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
803
804 this_locator->location_wtime[1] += comm_timing[0];
805 this_locator->location_cpu_time[1] += comm_timing[1];
806 }
807
808 /*----------------------------------------------------------------------------
809 * Determine or update possibly intersecting ranks for unlocated elements,
810 * in parallel.
811 *
812 * parameters:
813 * this_locator <-- pointer to locator structure
814 * mesh <-- pointer to mesh representation structure
815 * tolerance_base <-- associated fixed tolerance
816 * tolerance_fraction <-- associated fraction of element bounding
817 * boxes added to tolerance
818 * n_points <-- number of points to locate
819 * point_list <-- optional indirection array to point_coords
820 * point_coords <-- coordinates of points to locate
821 * (dimension: dim * n_points)
822 * location <-> number of distant element containing or closest
823 * to each point, or -1 (size: n_points)
824 * mesh_extents_f <-- pointer to function computing mesh or mesh
825 * subset or element extents
826 *
827 * returns:
828 * local rank intersection info
829 *----------------------------------------------------------------------------*/
830
831 static _rank_intersects_t
_intersects_distant(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const ple_coord_t point_coords[],const ple_lnum_t location[],ple_mesh_extents_t * mesh_extents_f)832 _intersects_distant(ple_locator_t *this_locator,
833 const void *mesh,
834 float tolerance_base,
835 float tolerance_fraction,
836 ple_lnum_t n_points,
837 const ple_lnum_t point_list[],
838 const ple_coord_t point_coords[],
839 const ple_lnum_t location[],
840 ple_mesh_extents_t *mesh_extents_f)
841 {
842 int stride2;
843 double extents[12];
844
845 int j;
846 int stride4;
847 int comm_rank, comm_size;
848 int n_intersects;
849 int *intersect_rank;
850 double *recvbuf;
851
852 double comm_timing[4] = {0., 0., 0., 0.};
853 const int dim = this_locator->dim;
854
855 _rank_intersects_t intersects;
856
857 /* Update intersects */
858
859 intersects.n = 0;
860
861 /* initialize mesh extents in case mesh is empty or dim < 3 */
862 for (int i = 0; i < dim; i++) {
863 extents[i] = HUGE_VAL;
864 extents[i + dim] = -HUGE_VAL;
865 }
866
867 mesh_extents_f(mesh,
868 1,
869 tolerance_fraction,
870 extents);
871
872 _point_extents(dim,
873 this_locator->point_id_base,
874 n_points,
875 point_list,
876 point_coords,
877 location,
878 extents + 2*dim);
879
880 for (int i = 0; i < dim; i++) {
881
882 if (extents[i] > -HUGE_VAL + tolerance_base)
883 extents[i] -= tolerance_base;
884 else
885 extents[i] = -HUGE_VAL;
886 if (extents[i] < HUGE_VAL - tolerance_base)
887 extents[i + dim] += tolerance_base;
888 else
889 extents[i + dim] = HUGE_VAL;
890
891 if (extents[i + 2*dim] > -HUGE_VAL + tolerance_base)
892 extents[i + 2*dim] -= tolerance_base;
893 else
894 extents[i + 2*dim] = -HUGE_VAL;
895 if (extents[i + 3*dim] < HUGE_VAL - tolerance_base)
896 extents[i + 3*dim] += tolerance_base;
897 else
898 extents[i + 3*dim] = HUGE_VAL;
899
900 }
901
902 /* Exchange extent information */
903
904 MPI_Comm_rank(this_locator->comm, &comm_rank);
905 MPI_Comm_size(this_locator->comm, &comm_size);
906
907 stride2 = dim * 2; /* Stride for one type of extent */
908 stride4 = dim * 4; /* Stride for element and vertex
909 extents, end-to-end */
910
911 PLE_MALLOC(recvbuf, stride4*comm_size, double);
912
913 _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
914
915 MPI_Allgather(extents, stride4, MPI_DOUBLE, recvbuf, stride4, MPI_DOUBLE,
916 this_locator->comm);
917
918 _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
919
920 /* Count and mark possible overlaps */
921
922 n_intersects = 0;
923 PLE_MALLOC(intersect_rank, this_locator->n_ranks, int);
924
925 for (int i = 0; i < this_locator->n_ranks; i++) {
926 j = this_locator->start_rank + i;
927 if ( (_intersect_extents(dim,
928 extents + (2*dim),
929 recvbuf + (j*stride4)) == true)
930 || (_intersect_extents(dim,
931 extents,
932 recvbuf + (j*stride4) + (2*dim)) == true)) {
933 intersect_rank[n_intersects] = j;
934 n_intersects += 1;
935 }
936 }
937
938 intersects.n = n_intersects;
939 PLE_MALLOC(intersects.rank, intersects.n, int);
940 PLE_MALLOC(intersects.extents, intersects.n * stride2, double);
941
942 for (int i = 0; i < intersects.n; i++) {
943
944 intersects.rank[i] = intersect_rank[i];
945
946 /* Copy only distant element (and not point) extents */
947
948 for (j = 0; j < stride2; j++)
949 intersects.extents[i*stride2 + j]
950 = recvbuf[intersect_rank[i]*stride4 + j];
951
952 }
953
954 /* Free temporary memory */
955
956 PLE_FREE(intersect_rank);
957 PLE_FREE(recvbuf);
958
959 /* Finalize timing */
960
961 this_locator->location_wtime[1] += comm_timing[0];
962 this_locator->location_cpu_time[1] += comm_timing[1];
963
964 return intersects;
965 }
966
967 /*----------------------------------------------------------------------------
968 * Initialize location information from previous locator info,
969 * in parallel mode.
970 *
971 * parameters:
972 * this_locator <-> pointer to locator structure
973 * n_points <-- number of points to locate
974 * location --> number of distant element containing or closest
975 * to each point, or -1 (size: n_points)
976 * location_rank_id --> rank id for distant element containing or closest
977 * to each point, or -1
978 *----------------------------------------------------------------------------*/
979
980 static void
_transfer_location_distant(ple_locator_t * this_locator,ple_lnum_t n_points,ple_lnum_t location[],ple_lnum_t location_rank_id[])981 _transfer_location_distant(ple_locator_t *this_locator,
982 ple_lnum_t n_points,
983 ple_lnum_t location[],
984 ple_lnum_t location_rank_id[])
985 {
986 int dist_rank;
987 ple_lnum_t j, k, n_points_loc, n_points_dist, dist_v_idx;
988
989 double comm_timing[4] = {0., 0., 0., 0.};
990
991 const ple_lnum_t idb = this_locator->point_id_base;
992 const ple_lnum_t *_interior_list = this_locator->interior_list;
993
994 /* Initialize locations */
995
996 for (j = 0; j < n_points; j++) {
997 location[j] = -1;
998 location_rank_id[j] = -1;
999 }
1000
1001 /* Update with existing location info *
1002 exact location info is not necessary, so not determined;
1003 all that is necessary is to mark located points as such */
1004
1005 for (int li = 0; li < this_locator->n_intersects; li++) {
1006
1007 int i = (this_locator->comm_order != NULL) ?
1008 this_locator->comm_order[li] : li;
1009
1010 MPI_Status status;
1011 ple_lnum_t *loc_v_buf, *dist_v_ptr;
1012 const ple_lnum_t *_local_point_ids
1013 = this_locator->local_point_ids + this_locator->local_points_idx[i];
1014
1015 dist_rank = this_locator->intersect_rank[i];
1016
1017 n_points_loc = this_locator->local_points_idx[i+1]
1018 - this_locator->local_points_idx[i];
1019
1020 n_points_dist = this_locator->distant_points_idx[i+1]
1021 - this_locator->distant_points_idx[i];
1022
1023 dist_v_idx = this_locator->distant_points_idx[i];
1024
1025 PLE_MALLOC(loc_v_buf, n_points_loc, ple_lnum_t);
1026
1027 /* Exchange information */
1028
1029 dist_v_ptr = this_locator->distant_point_location + dist_v_idx;
1030
1031 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1032
1033 MPI_Sendrecv(dist_v_ptr, n_points_dist, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1034 loc_v_buf, n_points_loc, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1035 this_locator->comm, &status);
1036
1037 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1038
1039 for (k = 0; k < n_points_loc; k++) {
1040 ple_lnum_t pt_id = _interior_list[_local_point_ids[k]] - idb;
1041 location[pt_id] = loc_v_buf[k];
1042 location_rank_id[pt_id] = dist_rank;
1043 }
1044
1045 PLE_FREE(loc_v_buf);
1046 } /* End of loop on MPI ranks */
1047
1048 this_locator->n_intersects = 0;
1049 PLE_FREE(this_locator->intersect_rank);
1050 PLE_FREE(this_locator->comm_order);
1051 PLE_FREE(this_locator->local_points_idx);
1052 PLE_FREE(this_locator->distant_points_idx);
1053 PLE_FREE(this_locator->local_point_ids);
1054 PLE_FREE(this_locator->distant_point_location);
1055 PLE_FREE(this_locator->distant_point_coords);
1056
1057 this_locator->n_interior = 0;
1058 this_locator->n_exterior = 0;
1059 PLE_FREE(this_locator->interior_list);
1060 PLE_FREE(this_locator->exterior_list);
1061 }
1062
1063 /*----------------------------------------------------------------------------
1064 * Location of points not yet located on the closest elements.
1065 *
1066 * parameters:
1067 * this_locator <-> pointer to locator structure
1068 * mesh <-- pointer to mesh representation structure
1069 * tolerance_base <-- associated fixed tolerance
1070 * tolerance_fraction <-- associated fraction of element bounding
1071 * boxes added to tolerance
1072 * n_points <-- number of points to locate
1073 * point_list <-- optional indirection array to point_coords
1074 * point_tag <-- optional point tag (size: n_points)
1075 * point_coords <-- coordinates of points to locate
1076 * (dimension: dim * n_points)
1077 * location <-> number of distant element containing or closest
1078 * to each point, or -1 (size: n_points)
1079 * location_rank_id <-> rank id for distant element containing or closest
1080 * to each point, or -1
1081 * distance <-> distance from point to element indicated by
1082 * location[]: < 0 if unlocated, 0 - 1 if inside,
1083 * > 1 if outside (size: n_points)
1084 * mesh_extents_f <-- function computing mesh or mesh subset extents
1085 * mesh_locate_f <-- function locating the points on local elements
1086 *----------------------------------------------------------------------------*/
1087
1088 static void
_locate_distant(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],ple_lnum_t location[],ple_lnum_t location_rank_id[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)1089 _locate_distant(ple_locator_t *this_locator,
1090 const void *mesh,
1091 float tolerance_base,
1092 float tolerance_fraction,
1093 ple_lnum_t n_points,
1094 const ple_lnum_t point_list[],
1095 const int point_tag[],
1096 const ple_coord_t point_coords[],
1097 ple_lnum_t location[],
1098 ple_lnum_t location_rank_id[],
1099 float distance[],
1100 ple_mesh_extents_t *mesh_extents_f,
1101 ple_mesh_elements_locate_t *mesh_locate_f)
1102 {
1103 int k;
1104 int dist_rank, dist_index;
1105 ple_lnum_t j;
1106 ple_lnum_t n_coords_loc, n_coords_dist;
1107 ple_lnum_t *location_loc, *location_dist;
1108 int *tag_dist, *send_tag;
1109 ple_coord_t *coords_dist, *send_coords;
1110 float *distance_dist, *distance_loc;
1111
1112 _rank_intersects_t intersects;
1113 MPI_Status status;
1114
1115 ple_lnum_t _n_points = 0;
1116 double extents[6];
1117
1118 double comm_timing[4] = {0., 0., 0., 0.};
1119
1120 ple_lnum_t *_point_list = NULL, *_point_id = NULL, *send_id = NULL;
1121 const ple_lnum_t *_point_list_p = NULL;
1122
1123 const int dim = this_locator->dim;
1124 const int stride = dim * 2;
1125 const int have_tags = this_locator->have_tags;
1126 const ple_lnum_t idb = this_locator->point_id_base;
1127
1128 /* Filter non-located points */
1129
1130 _n_points = 0;
1131 _point_list_p = point_list;
1132
1133 for (j = 0; j < n_points; j++) {
1134 if (location[j] < 0)
1135 _n_points++;
1136 }
1137
1138 if (_n_points < n_points) {
1139
1140 PLE_MALLOC(_point_list, _n_points, ple_lnum_t);
1141 _point_list_p = _point_list;
1142
1143 _n_points = 0;
1144 if (point_list == NULL) {
1145 _point_id = _point_list;
1146 for (j = 0; j < n_points; j++) {
1147 if (location[j] < 0)
1148 _point_list[_n_points++] = j + idb;
1149 }
1150 }
1151 else {
1152 PLE_MALLOC(_point_id, _n_points, ple_lnum_t);
1153 for (j = 0; j < n_points; j++) {
1154 if (location[j] < 0) {
1155 _point_list[_n_points] = point_list[j];
1156 _point_id[_n_points] = j + idb;
1157 _n_points++;
1158 }
1159 }
1160 }
1161
1162 }
1163
1164 /* Update intersect for current point list */
1165
1166 intersects = _intersects_distant(this_locator,
1167 mesh,
1168 tolerance_base,
1169 tolerance_fraction,
1170 _n_points,
1171 _point_list_p,
1172 point_coords,
1173 location,
1174 mesh_extents_f);
1175
1176 /* Allocate buffers */
1177
1178 PLE_MALLOC(send_coords, _n_points * dim, ple_coord_t);
1179 PLE_MALLOC(send_id, _n_points, ple_lnum_t);
1180
1181 if (have_tags)
1182 PLE_MALLOC(send_tag, _n_points, int);
1183 else
1184 send_tag = NULL;
1185
1186 int *comm_order = NULL;
1187 if (this_locator->locate_algorithm == _LOCATE_BB_SENDRECV_ORDERED) {
1188 int comm_size, rank_id;
1189 MPI_Comm_size(this_locator->comm, &comm_size);
1190 MPI_Comm_rank(this_locator->comm, &rank_id);
1191 PLE_MALLOC(comm_order, intersects.n, int);
1192 _order_comm_ranks(rank_id,
1193 comm_size,
1194 intersects.n,
1195 intersects.rank,
1196 comm_order);
1197 }
1198
1199 /* First loop on possibly intersecting distant ranks */
1200 /*---------------------------------------------------*/
1201
1202 for (int li = 0; li < intersects.n; li++) {
1203
1204 int i = (comm_order != NULL) ?
1205 comm_order[li] : li;
1206
1207 dist_index = i; /* Ordering (communication schema) not yet optimized */
1208 dist_rank = intersects.rank[dist_index];
1209
1210 /* Prepare and send coords that should fit in each send buffer */
1211 /* Reset buffers for current intersect rank */
1212
1213 n_coords_loc = 0;
1214
1215 for (k = 0; k < stride; k++)
1216 extents[k] = intersects.extents[dist_index*stride + k];
1217
1218 /* Build partial buffer */
1219
1220 for (j = 0; j < _n_points; j++) {
1221
1222 ple_lnum_t coord_idx;
1223
1224 if (_point_list_p != NULL)
1225 coord_idx = _point_list_p[j] - idb;
1226 else
1227 coord_idx = j;
1228
1229 if (_within_extents(dim,
1230 &(point_coords[dim*coord_idx]),
1231 extents) == true) {
1232
1233 if (_point_id != NULL)
1234 send_id[n_coords_loc] = _point_id[j] -idb;
1235 else
1236 send_id[n_coords_loc] = j;
1237
1238 for (k = 0; k < dim; k++)
1239 send_coords[n_coords_loc*dim + k] = point_coords[dim*coord_idx + k];
1240
1241 if (have_tags)
1242 send_tag[n_coords_loc] = point_tag[j];
1243
1244 n_coords_loc += 1;
1245 }
1246
1247 }
1248
1249 /* Send then receive partial buffer */
1250
1251 dist_rank = intersects.rank[dist_index];
1252
1253 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1254
1255 MPI_Sendrecv(&n_coords_loc, 1, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1256 &n_coords_dist, 1, PLE_MPI_LNUM, dist_rank,
1257 PLE_MPI_TAG, this_locator->comm, &status);
1258
1259 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1260
1261 PLE_MALLOC(coords_dist, n_coords_dist*dim, ple_coord_t);
1262 if (have_tags)
1263 PLE_MALLOC(tag_dist, n_coords_dist, int);
1264 else
1265 tag_dist = NULL;
1266
1267 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1268
1269 MPI_Sendrecv(send_coords, (int)(n_coords_loc*dim),
1270 PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1271 coords_dist, (int)(n_coords_dist*dim),
1272 PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1273 this_locator->comm, &status);
1274
1275 if (have_tags)
1276 MPI_Sendrecv(send_tag, (int)(n_coords_loc),
1277 MPI_INT, dist_rank, PLE_MPI_TAG,
1278 tag_dist, (int)(n_coords_dist),
1279 MPI_INT, dist_rank, PLE_MPI_TAG,
1280 this_locator->comm, &status);
1281
1282 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1283
1284 /* Now locate received coords on local rank */
1285
1286 PLE_MALLOC(location_dist, n_coords_dist, ple_lnum_t);
1287 PLE_MALLOC(distance_dist, n_coords_dist, float);
1288
1289 for (j = 0; j < n_coords_dist; j++) {
1290 location_dist[j] = -1;
1291 distance_dist[j] = -1.0;
1292 }
1293
1294 mesh_locate_f(mesh,
1295 tolerance_base,
1296 tolerance_fraction,
1297 n_coords_dist,
1298 coords_dist,
1299 tag_dist,
1300 location_dist,
1301 distance_dist);
1302
1303 PLE_FREE(tag_dist);
1304 PLE_FREE(coords_dist);
1305
1306 /* Exchange location return information with distant rank */
1307
1308 PLE_MALLOC(location_loc, n_coords_loc, ple_lnum_t);
1309 PLE_MALLOC(distance_loc, n_coords_loc, float);
1310
1311 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1312
1313 MPI_Sendrecv(location_dist, (int)n_coords_dist,
1314 PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1315 location_loc, (int)n_coords_loc,
1316 PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1317 this_locator->comm, &status);
1318
1319 MPI_Sendrecv(distance_dist, (int)n_coords_dist,
1320 MPI_FLOAT, dist_rank, PLE_MPI_TAG,
1321 distance_loc, (int)n_coords_loc,
1322 MPI_FLOAT, dist_rank, PLE_MPI_TAG,
1323 this_locator->comm, &status);
1324
1325 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1326
1327 PLE_FREE(location_dist);
1328 PLE_FREE(distance_dist);
1329
1330 /* Now update location information */
1331
1332 for (j = 0; j < n_coords_loc; j++) {
1333
1334 ple_lnum_t l = send_id[j];
1335
1336 if ( (distance_loc[j] > -0.1)
1337 && (distance_loc[j] < distance[l] || location[l] < 0)) {
1338 location_rank_id[l] = dist_rank;
1339 location[l] = location_loc[j];
1340 distance[l] = distance_loc[j];
1341 }
1342
1343 }
1344
1345 PLE_FREE(location_loc);
1346 PLE_FREE(distance_loc);
1347
1348 }
1349
1350 PLE_FREE(comm_order);
1351
1352 /* Free temporary arrays */
1353
1354 PLE_FREE(send_id);
1355 PLE_FREE(send_tag);
1356 PLE_FREE(send_coords);
1357
1358 if (_point_list != point_list) {
1359 if (_point_id != _point_list)
1360 PLE_FREE(_point_id);
1361 PLE_FREE(_point_list);
1362 }
1363
1364 PLE_FREE(intersects.rank);
1365 PLE_FREE(intersects.extents);
1366
1367 /* Finalize timing */
1368
1369 this_locator->location_wtime[1] += comm_timing[0];
1370 this_locator->location_cpu_time[1] += comm_timing[1];
1371 }
1372
1373 /*----------------------------------------------------------------------------
1374 * Prepare locator for use with a given mesh representation and point set.
1375 *
1376 * parameters:
1377 * this_locator <-> pointer to locator structure
1378 * mesh <-- pointer to mesh representation structure
1379 * dim <-- spatial dimension
1380 * n_points <-- number of points to locate
1381 * point_list <-- optional indirection array to point_coords
1382 * point_tag <-- optional point tag (size: n_points)
1383 * point_coords <-- coordinates of points to locate
1384 * (dimension: dim * n_points)
1385 * location <-> number of distant element containing or closest
1386 * to each point, or -1 (size: n_points)
1387 * location_rank_id <-> rank id for distant element containing or closest
1388 * to each point, or -1
1389 * distance --> optional distance from point to matching element:
1390 * < 0 if unlocated; 0 - 1 if inside and > 1 if
1391 * outside a volume element, or absolute distance
1392 * to a surface element (size: n_points)
1393 * mesh_extents_f <-- pointer to function computing mesh or mesh
1394 * subset or element extents
1395 * mesh_locate_f <-- function locating points in or on elements
1396 *----------------------------------------------------------------------------*/
1397
1398 static void
_locate_all_distant(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],ple_lnum_t location[],ple_lnum_t location_rank_id[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)1399 _locate_all_distant(ple_locator_t *this_locator,
1400 const void *mesh,
1401 float tolerance_base,
1402 float tolerance_fraction,
1403 ple_lnum_t n_points,
1404 const ple_lnum_t point_list[],
1405 const int point_tag[],
1406 const ple_coord_t point_coords[],
1407 ple_lnum_t location[],
1408 ple_lnum_t location_rank_id[],
1409 float distance[],
1410 ple_mesh_extents_t *mesh_extents_f,
1411 ple_mesh_elements_locate_t *mesh_locate_f)
1412 {
1413 int k;
1414 int dist_rank;
1415 ple_lnum_t j;
1416 ple_lnum_t n_coords_loc, n_coords_dist, n_interior, n_exterior;
1417 ple_lnum_t coord_idx, start_idx;
1418 ple_lnum_t *send_id, *send_location;
1419 ple_lnum_t *location_count, *location_shift;
1420 float *_distance = distance;
1421
1422 MPI_Status status;
1423
1424 double comm_timing[4] = {0., 0., 0., 0.};
1425 const int dim = this_locator->dim;
1426 const ple_lnum_t idb = this_locator->point_id_base;
1427
1428 if (distance == NULL) {
1429 PLE_MALLOC(_distance, n_points, float);
1430 for (j = 0; j < n_points; j++)
1431 _distance[j] = -1.0;
1432 }
1433
1434 /* Now update locations */
1435
1436 _locate_distant(this_locator,
1437 mesh,
1438 tolerance_base,
1439 tolerance_fraction,
1440 n_points,
1441 point_list,
1442 point_tag,
1443 point_coords,
1444 location,
1445 location_rank_id,
1446 _distance,
1447 mesh_extents_f,
1448 mesh_locate_f);
1449
1450 /* Update info on communicating ranks and matching ids */
1451 /*----------------------------------------------------*/
1452
1453 _location_ranks(this_locator,
1454 n_points,
1455 location_rank_id);
1456
1457 /* Reorganize location information */
1458 /*---------------------------------*/
1459
1460 /* Now that location is done, the location[] array contains
1461 either -1 if a point was not located, or a local index
1462 (associated with the corresponding rank); the distance[] array
1463 is not needed anymore now that all comparisons have been done */
1464
1465 if (_distance != distance)
1466 PLE_FREE(_distance);
1467
1468 PLE_MALLOC(location_shift, this_locator->n_intersects, ple_lnum_t);
1469 PLE_MALLOC(location_count, this_locator->n_intersects, ple_lnum_t);
1470
1471 for (int i = 0; i < this_locator->n_intersects; i++)
1472 location_count[i] = 0;
1473
1474 n_exterior = 0;
1475 for (j = 0; j < n_points; j++) {
1476 if (location_rank_id[j] > -1)
1477 location_count[location_rank_id[j]] += 1;
1478 else
1479 n_exterior += 1;
1480 }
1481
1482 this_locator->n_interior = n_points - n_exterior;
1483 PLE_MALLOC(this_locator->interior_list, this_locator->n_interior, ple_lnum_t);
1484
1485 this_locator->n_exterior = n_exterior;
1486 PLE_MALLOC(this_locator->exterior_list, this_locator->n_exterior, ple_lnum_t);
1487
1488 if (this_locator->n_intersects > 0)
1489 location_shift[0] = 0;
1490 for (int i = 1; i < this_locator->n_intersects; i++)
1491 location_shift[i] = location_shift[i-1] + location_count[i-1];
1492
1493 for (int i = 0; i < this_locator->n_intersects; i++)
1494 location_count[i] = 0;
1495
1496 PLE_MALLOC(send_id, n_points, ple_lnum_t);
1497 PLE_MALLOC(send_location, n_points, ple_lnum_t);
1498
1499 /* send_id[] will now contain information for all blocks */
1500 for (j = 0; j < n_points; j++)
1501 send_id[j] = -1;
1502
1503 n_interior = 0;
1504 n_exterior = 0;
1505 for (j = 0; j < n_points; j++) {
1506 const int l_rank = location_rank_id[j];
1507 if (l_rank > -1) {
1508 send_id[location_shift[l_rank] + location_count[l_rank]] = j;
1509 location_count[l_rank] += 1;
1510 this_locator->interior_list[n_interior] = j + idb;
1511 n_interior += 1;
1512 }
1513 else {
1514 this_locator->exterior_list[n_exterior] = j + idb;
1515 n_exterior += 1;
1516 }
1517 }
1518
1519 /* Second loop on possibly intersecting distant ranks */
1520 /*----------------------------------------------------*/
1521
1522 /* Count and organize total number of local and distant points */
1523
1524 PLE_REALLOC(this_locator->local_points_idx,
1525 this_locator->n_intersects + 1,
1526 ple_lnum_t);
1527
1528 PLE_REALLOC(this_locator->distant_points_idx,
1529 this_locator->n_intersects + 1,
1530 ple_lnum_t);
1531
1532 this_locator->local_points_idx[0] = 0;
1533 this_locator->distant_points_idx[0] = 0;
1534
1535 for (int li = 0; li < this_locator->n_intersects; li++) {
1536
1537 int i = (this_locator->comm_order != NULL) ?
1538 this_locator->comm_order[li] : li;
1539
1540 dist_rank = this_locator->intersect_rank[i];
1541
1542 n_coords_loc = location_count[i];
1543
1544 this_locator->local_points_idx[i+1] = n_coords_loc;
1545
1546 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1547
1548 MPI_Sendrecv(&n_coords_loc, 1, PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1549 &n_coords_dist, 1, PLE_MPI_LNUM, dist_rank,
1550 PLE_MPI_TAG, this_locator->comm, &status);
1551
1552 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1553
1554 this_locator->distant_points_idx[i+1] = n_coords_dist;
1555
1556 }
1557
1558 /* Counts to index */
1559
1560 this_locator->local_points_idx[0] = 0;
1561 this_locator->distant_points_idx[0] = 0;
1562
1563 for (int i = 0; i < this_locator->n_intersects; i++) {
1564 this_locator->local_points_idx[i+1] += this_locator->local_points_idx[i];
1565 this_locator->distant_points_idx[i+1] += this_locator->distant_points_idx[i];
1566 }
1567
1568 /* Third loop on possibly intersecting distant ranks */
1569 /*----------------------------------------------------*/
1570
1571 PLE_REALLOC(this_locator->local_point_ids,
1572 this_locator->local_points_idx[this_locator->n_intersects],
1573 ple_lnum_t);
1574
1575 PLE_REALLOC(this_locator->distant_point_location,
1576 this_locator->distant_points_idx[this_locator->n_intersects],
1577 ple_lnum_t);
1578
1579 PLE_REALLOC(this_locator->distant_point_coords,
1580 this_locator->distant_points_idx[this_locator->n_intersects] * dim,
1581 ple_coord_t);
1582
1583 for (int li = 0; li < this_locator->n_intersects; li++) {
1584
1585 ple_coord_t *send_coords;
1586
1587 int i = (this_locator->comm_order != NULL) ?
1588 this_locator->comm_order[li] : li;
1589 dist_rank = this_locator->intersect_rank[i];
1590
1591 n_coords_loc = this_locator->local_points_idx[i+1]
1592 - this_locator->local_points_idx[i];
1593
1594 n_coords_dist = this_locator->distant_points_idx[i+1]
1595 - this_locator->distant_points_idx[i];
1596
1597 start_idx = this_locator->local_points_idx[i];
1598
1599 PLE_MALLOC(send_coords, n_coords_loc * dim, ple_coord_t);
1600
1601 for (j = 0; j < n_coords_loc; j++) {
1602
1603 coord_idx = send_id[location_shift[i] + j];
1604 assert(coord_idx > -1);
1605 this_locator->local_point_ids[start_idx + j] = coord_idx;
1606 send_location[j] = location[coord_idx];
1607 if (point_list != NULL) {
1608 for (k = 0; k < dim; k++)
1609 send_coords[j*dim + k]
1610 = point_coords[dim*(point_list[coord_idx] - idb) + k];
1611 }
1612 else {
1613 for (k = 0; k < dim; k++)
1614 send_coords[j*dim + k]
1615 = point_coords[dim*coord_idx + k];
1616 }
1617 }
1618
1619 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
1620
1621 MPI_Sendrecv(send_location, (int)n_coords_loc,
1622 PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1623 (this_locator->distant_point_location
1624 + this_locator->distant_points_idx[i]), (int)n_coords_dist,
1625 PLE_MPI_LNUM, dist_rank, PLE_MPI_TAG,
1626 this_locator->comm, &status);
1627
1628 MPI_Sendrecv(send_coords, (int)(n_coords_loc*dim),
1629 PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1630 (this_locator->distant_point_coords
1631 + (this_locator->distant_points_idx[i]*dim)),
1632 (int)(n_coords_dist*dim),
1633 PLE_MPI_COORD, dist_rank, PLE_MPI_TAG,
1634 this_locator->comm, &status);
1635
1636 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
1637
1638 PLE_FREE(send_coords);
1639
1640 }
1641
1642 PLE_FREE(location_count);
1643 PLE_FREE(location_shift);
1644
1645 PLE_FREE(send_location);
1646 PLE_FREE(send_id);
1647
1648 PLE_FREE(location);
1649
1650 this_locator->location_wtime[1] += comm_timing[0];
1651 this_locator->location_cpu_time[1] += comm_timing[1];
1652 }
1653
1654 #endif /* defined(PLE_HAVE_MPI) */
1655
1656 /*----------------------------------------------------------------------------
1657 * Initialize location information from previous locator info,
1658 * in serial mode.
1659 *
1660 * parameters:
1661 * this_locator <-> pointer to locator structure
1662 * n_points <-- number of points to locate
1663 * location --> number of distant element containing or closest
1664 * to each point, or -1 (size: n_points)
1665 *----------------------------------------------------------------------------*/
1666
1667 static void
_transfer_location_local(ple_locator_t * this_locator,ple_lnum_t n_points,ple_lnum_t location[])1668 _transfer_location_local(ple_locator_t *this_locator,
1669 ple_lnum_t n_points,
1670 ple_lnum_t location[])
1671 {
1672 ple_lnum_t j;
1673
1674 /* Initialize locations */
1675
1676 for (j = 0; j < n_points; j++)
1677 location[j] = -1;
1678
1679 /* Update with existing location info *
1680 exact location info is not necessary, so not determined;
1681 all that is necessary is to mark located points as such */
1682
1683 if (this_locator->n_intersects == 1) {
1684
1685 const ple_lnum_t _n_points = this_locator->local_points_idx[1]
1686 - this_locator->local_points_idx[0];
1687
1688 const ple_lnum_t idb = this_locator->point_id_base;
1689 const ple_lnum_t *_interior_list = this_locator->interior_list;
1690
1691 const ple_lnum_t *_local_point_ids = this_locator->local_point_ids;
1692 const ple_lnum_t *dist_v_ptr = this_locator->distant_point_location;
1693
1694 for (j = 0; j < _n_points; j++) {
1695 ple_lnum_t k = _interior_list[_local_point_ids[j] - idb];
1696 location[k] = dist_v_ptr[j];
1697 }
1698
1699 }
1700
1701 this_locator->n_intersects = 0;
1702 PLE_FREE(this_locator->intersect_rank);
1703 PLE_FREE(this_locator->comm_order);
1704 PLE_FREE(this_locator->local_points_idx);
1705 PLE_FREE(this_locator->distant_points_idx);
1706 PLE_FREE(this_locator->local_point_ids);
1707 PLE_FREE(this_locator->distant_point_location);
1708 PLE_FREE(this_locator->distant_point_coords);
1709
1710 this_locator->n_interior = 0;
1711 this_locator->n_exterior = 0;
1712 PLE_FREE(this_locator->interior_list);
1713 PLE_FREE(this_locator->exterior_list);
1714 }
1715
1716 /*----------------------------------------------------------------------------
1717 * Determine or update possibly intersecting ranks for unlocated elements,
1718 * in parallel.
1719 *
1720 * parameters:
1721 * this_locator <-- pointer to locator structure
1722 * mesh <-- pointer to mesh representation structure
1723 * tolerance_base <-- associated fixed tolerance
1724 * tolerance_fraction <-- associated fraction of element bounding
1725 * boxes added to tolerance
1726 * n_points <-- number of points to locate
1727 * point_list <-- optional indirection array to point_coords
1728 * point_coords <-- coordinates of points to locate
1729 * (dimension: dim * n_points)
1730 * location <-> number of distant element containing or closest
1731 * to each point, or -1 (size: n_points)
1732 * mesh_extents_f <-- pointer to function computing mesh or mesh
1733 * subset or element extents
1734 *
1735 * returns:
1736 * local rank intersection info
1737 *----------------------------------------------------------------------------*/
1738
1739 static _rank_intersects_t
_intersects_local(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const ple_coord_t point_coords[],const ple_lnum_t location[],ple_mesh_extents_t * mesh_extents_f)1740 _intersects_local(ple_locator_t *this_locator,
1741 const void *mesh,
1742 float tolerance_base,
1743 float tolerance_fraction,
1744 ple_lnum_t n_points,
1745 const ple_lnum_t point_list[],
1746 const ple_coord_t point_coords[],
1747 const ple_lnum_t location[],
1748 ple_mesh_extents_t *mesh_extents_f)
1749 {
1750 int i;
1751 int stride2;
1752 double extents[12];
1753
1754 int j;
1755 int n_intersects;
1756
1757 const int dim = this_locator->dim;
1758
1759 _rank_intersects_t intersects;
1760
1761 /* Update intersects */
1762
1763 intersects.n = 0;
1764
1765 mesh_extents_f(mesh,
1766 1,
1767 tolerance_fraction,
1768 extents);
1769
1770 _point_extents(dim,
1771 this_locator->point_id_base,
1772 n_points,
1773 point_list,
1774 point_coords,
1775 location,
1776 extents + 2*dim);
1777
1778 for (i = 0; i < dim; i++) {
1779
1780 if (extents[i] > -HUGE_VAL + tolerance_base)
1781 extents[i] -= tolerance_base;
1782 else
1783 extents[i] = -HUGE_VAL;
1784 if (extents[i] < HUGE_VAL - tolerance_base)
1785 extents[i + dim] += tolerance_base;
1786 else
1787 extents[i + dim] = HUGE_VAL;
1788
1789 if (extents[i + 2*dim] > -HUGE_VAL + tolerance_base)
1790 extents[i + 2*dim] -= tolerance_base;
1791 else
1792 extents[i + 2*dim] = -HUGE_VAL;
1793 if (extents[i + 3*dim] < HUGE_VAL - tolerance_base)
1794 extents[i + 3*dim] += tolerance_base;
1795 else
1796 extents[i + 3*dim] = HUGE_VAL;
1797
1798 }
1799
1800 /* Determine possible overlap */
1801
1802 stride2 = dim * 2; /* Stride for one type of extent */
1803
1804 n_intersects = 0;
1805
1806 if (_intersect_extents(dim, extents, extents + (2*dim)) == true)
1807 n_intersects += 1;
1808
1809 intersects.n = n_intersects;
1810 PLE_MALLOC(intersects.extents, intersects.n * stride2, double);
1811
1812 /* Copy only element (and not point) extents */
1813
1814 for (j = 0; j < stride2; j++)
1815 intersects.extents[j] = extents[j];
1816
1817 return intersects;
1818 }
1819
1820 /*----------------------------------------------------------------------------
1821 * Prepare locator for use with a given mesh representation and point set.
1822 *
1823 * parameters:
1824 * this_locator <-> pointer to locator structure
1825 * mesh <-- pointer to mesh representation structure
1826 * tolerance_base <-- associated fixed tolerance
1827 * tolerance_fraction <-- associated fraction of element bounding
1828 * boxes added to tolerance
1829 * n_points <-- number of points to locate
1830 * point_list <-- optional indirection array to point_coords
1831 * point_tag <-- optional point tag (size: n_points)
1832 * point_coords <-- coordinates of points to locate
1833 * (dimension: dim * n_points)
1834 * distance --> optional distance from point to matching element:
1835 * < 0 if unlocated; 0 - 1 if inside and > 1 if
1836 * outside a volume element, or absolute distance
1837 * to a surface element (size: n_points)
1838 * mesh_extents_f <-- function computing mesh or mesh subset extents
1839 * mesh_locate_f <-- function locating the closest local elements
1840 *----------------------------------------------------------------------------*/
1841
1842 static void
_locate_all_local(ple_locator_t * this_locator,const void * mesh,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],ple_lnum_t location[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)1843 _locate_all_local(ple_locator_t *this_locator,
1844 const void *mesh,
1845 float tolerance_base,
1846 float tolerance_fraction,
1847 ple_lnum_t n_points,
1848 const ple_lnum_t point_list[],
1849 const int point_tag[],
1850 const ple_coord_t point_coords[],
1851 ple_lnum_t location[],
1852 float distance[],
1853 ple_mesh_extents_t *mesh_extents_f,
1854 ple_mesh_elements_locate_t *mesh_locate_f)
1855 {
1856 int l;
1857 ple_lnum_t j, k;
1858 ple_lnum_t n_coords, n_interior, n_exterior, coord_idx;
1859 _rank_intersects_t intersects;
1860
1861 const int dim = this_locator->dim;
1862 const int have_tags = this_locator->have_tags;
1863 const ple_lnum_t idb = this_locator->point_id_base;
1864
1865 /* Update intersect for current point list */
1866
1867 intersects = _intersects_local(this_locator,
1868 mesh,
1869 tolerance_base,
1870 tolerance_fraction,
1871 n_points,
1872 point_list,
1873 point_coords,
1874 location,
1875 mesh_extents_f);
1876
1877 n_coords = 0;
1878
1879 this_locator->n_intersects = intersects.n;
1880
1881 /* Build partial buffer */
1882
1883 if (intersects.n > 0) {
1884
1885 ple_lnum_t *_location = location;
1886 float *_distance = distance;
1887
1888 ple_lnum_t *id;
1889 int *tag;
1890 ple_coord_t *coords;
1891
1892 PLE_MALLOC(coords, n_points * dim, ple_coord_t);
1893 PLE_MALLOC(id, n_points, ple_lnum_t);
1894
1895 if (have_tags)
1896 PLE_MALLOC(tag, n_points, int);
1897 else
1898 tag = NULL;
1899
1900 for (j = 0; j < n_points; j++) {
1901
1902 if (location[j] > -1) {
1903 if (distance == NULL)
1904 continue;
1905 else if (distance[j] < 1)
1906 continue;
1907 }
1908
1909 if (point_list != NULL)
1910 coord_idx = point_list[j] - idb;
1911 else
1912 coord_idx = j;
1913
1914 if (_within_extents(dim,
1915 &(point_coords[dim*coord_idx]),
1916 intersects.extents) == true) {
1917
1918 for (k = 0; k < dim; k++)
1919 coords[n_coords*dim + k]
1920 = point_coords[dim*coord_idx + k];
1921
1922 id[n_coords] = j;
1923
1924 if (have_tags)
1925 tag[n_coords] = point_tag[j];
1926
1927 n_coords += 1;
1928 }
1929
1930 }
1931
1932 PLE_REALLOC(coords, n_coords * dim, ple_coord_t);
1933 PLE_REALLOC(id, n_coords, ple_lnum_t);
1934 if (have_tags)
1935 PLE_REALLOC(tag, n_coords, int);
1936
1937 if (n_coords < n_points) {
1938 PLE_MALLOC(_location, n_coords, ple_lnum_t);
1939 for (j = 0; j < n_coords; j++)
1940 _location[j] = location[id[j]];
1941 }
1942 if (distance == NULL || n_coords < n_points) {
1943 PLE_MALLOC(_distance, n_coords, float);
1944 if (distance == NULL) {
1945 for (j = 0; j < n_coords; j++)
1946 _distance[j] = -1.0;
1947 }
1948 else {
1949 for (j = 0; j < n_coords; j++)
1950 _distance[j] = distance[id[j]];
1951 }
1952 }
1953
1954 mesh_locate_f(mesh,
1955 tolerance_base,
1956 tolerance_fraction,
1957 n_coords,
1958 coords,
1959 tag,
1960 _location,
1961 _distance);
1962
1963 PLE_FREE(coords);
1964
1965 if (n_coords < n_points) {
1966 for (j = 0; j < n_coords; j++) {
1967 if (_location[j] > -1) {
1968 k = id[j];
1969 if (distance != NULL) {
1970 if (distance[k] <= _distance[j])
1971 continue;
1972 else
1973 distance[k] = _distance[j];
1974 }
1975 location[k] = _location[j];
1976 }
1977 }
1978 }
1979
1980 if (_location != location)
1981 PLE_FREE(_location);
1982
1983 if (_distance != distance)
1984 PLE_FREE(_distance);
1985
1986 PLE_FREE(tag);
1987 PLE_FREE(id);
1988
1989 }
1990
1991 PLE_FREE(intersects.extents);
1992
1993 /* Reorganize location information */
1994 /*---------------------------------*/
1995
1996 /* Now that location is done, the location[] array contains
1997 either -1 if a point was not located, or a local index;
1998 the distance[] array is not needed anymore now that all comparisons have
1999 been done */
2000
2001 this_locator->n_interior = 0;
2002 this_locator->n_exterior = 0;
2003 for (j = 0; j < n_points; j++) {
2004 if (location[j] > -1)
2005 this_locator->n_interior += 1;
2006 else
2007 this_locator->n_exterior += 1;
2008 }
2009
2010 PLE_MALLOC(this_locator->interior_list, this_locator->n_interior, ple_lnum_t);
2011 PLE_MALLOC(this_locator->exterior_list, this_locator->n_exterior, ple_lnum_t);
2012
2013 /* Organize total number of "local" and "distant" points */
2014
2015 PLE_REALLOC(this_locator->local_points_idx, 2, ple_lnum_t);
2016 PLE_REALLOC(this_locator->distant_points_idx, 2, ple_lnum_t);
2017
2018 this_locator->local_points_idx[0] = 0;
2019 this_locator->local_points_idx[1] = this_locator->n_interior;
2020
2021 this_locator->distant_points_idx[0] = 0;
2022 this_locator->distant_points_idx[1] = this_locator->n_interior;
2023
2024 this_locator->local_point_ids = NULL; /* Not needed for single-process */
2025
2026 PLE_REALLOC(this_locator->distant_point_location,
2027 this_locator->n_interior,
2028 ple_lnum_t);
2029 PLE_REALLOC(this_locator->distant_point_coords,
2030 this_locator->n_interior * dim,
2031 ple_coord_t);
2032
2033 n_interior = 0;
2034 n_exterior = 0;
2035
2036 for (j = 0; j < n_points; j++) {
2037
2038 if (point_list != NULL)
2039 coord_idx = point_list[j] - idb;
2040 else
2041 coord_idx = j;
2042
2043 if (location[j] > -1) {
2044 this_locator->distant_point_location[n_interior] = location[j];
2045 for (l = 0; l < dim; l++) {
2046 this_locator->distant_point_coords[n_interior*dim + l]
2047 = point_coords[coord_idx*dim + l];
2048 }
2049 this_locator->interior_list[n_interior] = j + idb;
2050 n_interior += 1;
2051 }
2052 else {
2053 this_locator->exterior_list[n_exterior] = j + idb;
2054 n_exterior += 1;
2055 }
2056
2057 }
2058
2059 }
2060
2061 #if defined(PLE_HAVE_MPI)
2062
2063 /*----------------------------------------------------------------------------
2064 * Distribute variable defined on distant points to processes owning
2065 * the original points (i.e. distant processes).
2066 *
2067 * The exchange is symmetric if both variables are defined, receive
2068 * only if distant_var is NULL, or send only if local_var is NULL.
2069 *
2070 * parameters:
2071 * this_locator <-- pointer to locator structure
2072 * distant_var <-> variable defined on distant points (ready to send)
2073 * local_var <-> variable defined on local points (received)
2074 * local_list <-- optional indirection list for local_var
2075 * datatype <-- variable type
2076 * stride <-- dimension (1 for scalar, 3 for interlaced vector)
2077 * reverse <-- if true, exchange is reversed
2078 * (receive values associated with distant points
2079 * from the processes owning the original points)
2080 *----------------------------------------------------------------------------*/
2081
2082 static void
_exchange_point_var_distant(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,MPI_Datatype datatype,size_t stride,_Bool reverse)2083 _exchange_point_var_distant(ple_locator_t *this_locator,
2084 void *distant_var,
2085 void *local_var,
2086 const ple_lnum_t *local_list,
2087 MPI_Datatype datatype,
2088 size_t stride,
2089 _Bool reverse)
2090 {
2091 int dist_v_count, loc_v_count, size;
2092 int dist_rank;
2093 int dist_v_flag, loc_v_flag;
2094 ple_lnum_t n_points_loc, n_points_loc_max, n_points_dist;
2095 size_t dist_v_idx;
2096 void *dist_v_ptr;
2097 void *loc_v_buf;
2098
2099 double comm_timing[4] = {0., 0., 0., 0.};
2100
2101 MPI_Aint lb, extent;
2102 MPI_Status status;
2103
2104 /* Check extent of datatype */
2105
2106 #if (MPI_VERSION >= 2)
2107 MPI_Type_get_extent(datatype, &lb, &extent);
2108 #else
2109 MPI_Type_extent(datatype, &extent);
2110 #endif
2111 MPI_Type_size(datatype, &size);
2112
2113 if (extent != size)
2114 ple_error(__FILE__, __LINE__, 0,
2115 _("_exchange_point_var() is not implemented for use with\n"
2116 "MPI datatypes associated with structures using padding\n"
2117 "(for which size != extent)."));
2118
2119 /* Initialization */
2120
2121 n_points_loc_max = 0;
2122
2123 for (int i = 0; i < this_locator->n_intersects; i++) {
2124 n_points_loc = this_locator->local_points_idx[i+1]
2125 - this_locator->local_points_idx[i];
2126 if (n_points_loc > n_points_loc_max)
2127 n_points_loc_max = n_points_loc;
2128 }
2129
2130 PLE_MALLOC(loc_v_buf, n_points_loc_max*size*stride, char);
2131
2132 /* Loop on MPI ranks */
2133 /*-------------------*/
2134
2135 for (int li = 0; li < this_locator->n_intersects; li++) {
2136
2137 int i = (this_locator->comm_order != NULL) ?
2138 this_locator->comm_order[li] : li;
2139
2140 const ple_lnum_t *_local_point_ids
2141 = this_locator->local_point_ids + this_locator->local_points_idx[i];
2142
2143 dist_rank = this_locator->intersect_rank[i];
2144
2145 n_points_loc = this_locator->local_points_idx[i+1]
2146 - this_locator->local_points_idx[i];
2147
2148 n_points_dist = this_locator->distant_points_idx[i+1]
2149 - this_locator->distant_points_idx[i];
2150
2151 if (distant_var != NULL && n_points_dist > 0)
2152 dist_v_flag = 1;
2153 else
2154 dist_v_flag = 0;
2155
2156 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2157
2158 MPI_Sendrecv(&dist_v_flag, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2159 &loc_v_flag, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2160 this_locator->comm, &status);
2161
2162 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2163
2164 if (loc_v_flag == 1 && (local_var == NULL || n_points_loc == 0))
2165 ple_error(__FILE__, __LINE__, 0,
2166 _("Incoherent arguments to different instances in "
2167 "_exchange_point_var().\n"
2168 "Send and receive operations do not match "
2169 "(dist_rank = %d\n)\n"), dist_rank);
2170
2171 dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2172 dist_v_count = n_points_dist * stride * dist_v_flag;
2173
2174 if (loc_v_flag > 0)
2175 loc_v_count = n_points_loc*stride;
2176 else
2177 loc_v_count = 0;
2178
2179 /* Exchange information */
2180
2181 if (distant_var != NULL)
2182 dist_v_ptr = (void *)(((char *)distant_var) + dist_v_idx);
2183 else
2184 dist_v_ptr = NULL;
2185
2186 if (reverse == false) {
2187
2188 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2189
2190 MPI_Sendrecv(dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2191 loc_v_buf, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2192 this_locator->comm, &status);
2193
2194 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2195
2196 if (loc_v_flag > 0) {
2197 if (local_list == NULL) {
2198 int k;
2199 size_t l;
2200 const size_t nbytes = stride*size;
2201 for (k = 0; k < n_points_loc; k++) {
2202 char *local_v_p = (char *)local_var + _local_point_ids[k]*nbytes;
2203 const char *loc_v_buf_p = (const char *)loc_v_buf + k*nbytes;
2204 for (l = 0; l < nbytes; l++)
2205 local_v_p[l] = loc_v_buf_p[l];
2206 }
2207 }
2208 else {
2209 int k;
2210 size_t l;
2211 const size_t nbytes = stride*size;
2212 const ple_lnum_t idb = this_locator->point_id_base;
2213 for (k = 0; k < n_points_loc; k++) {
2214 char *local_v_p = (char *)local_var
2215 + (local_list[_local_point_ids[k]] - idb)*nbytes;
2216 const char *loc_v_buf_p = (const char *)loc_v_buf + k*nbytes;
2217 for (l = 0; l < nbytes; l++)
2218 local_v_p[l] = loc_v_buf_p[l];
2219 }
2220 }
2221 }
2222
2223 }
2224 else { /* if (reverse == true) */
2225
2226 if (loc_v_flag > 0) {
2227 if (local_list == NULL) {
2228 int k;
2229 size_t l;
2230 const size_t nbytes = stride*size;
2231 for (k = 0; k < n_points_loc; k++) {
2232 const char *local_v_p
2233 = (const char *)local_var + _local_point_ids[k]*nbytes;
2234 char *loc_v_buf_p = (char *)loc_v_buf + k*nbytes;
2235 for (l = 0; l < nbytes; l++)
2236 loc_v_buf_p[l] = local_v_p[l];
2237 }
2238 }
2239 else {
2240 int k;
2241 size_t l;
2242 const size_t nbytes = stride*size;
2243 const ple_lnum_t idb = this_locator->point_id_base;
2244 for (k = 0; k < n_points_loc; k++) {
2245 const char *local_v_p
2246 = (const char *)local_var
2247 + (local_list[_local_point_ids[k]] - idb)*nbytes;
2248 char *loc_v_buf_p = (char *)loc_v_buf + k*nbytes;
2249 for (l = 0; l < nbytes; l++)
2250 loc_v_buf_p[l] = local_v_p[l];
2251 }
2252 }
2253 }
2254
2255 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2256
2257 MPI_Sendrecv(loc_v_buf, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2258 dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2259 this_locator->comm, &status);
2260
2261 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2262
2263 }
2264
2265 } /* End of loop on MPI ranks */
2266
2267 PLE_FREE(loc_v_buf);
2268
2269 this_locator->exchange_wtime[1] += comm_timing[0];
2270 this_locator->exchange_cpu_time[1] += comm_timing[1];
2271 }
2272
2273 /*----------------------------------------------------------------------------
2274 * Distribute variable defined on distant points to processes owning
2275 * the original points (i.e. distant processes).
2276 *
2277 * The exchange is symmetric if both variables are defined, receive
2278 * only if distant_var is NULL, or send only if local_var is NULL.
2279 *
2280 * This variant of the function uses asynchronous MPI calls.
2281 *
2282 * parameters:
2283 * this_locator <-- pointer to locator structure
2284 * distant_var <-> variable defined on distant points (ready to send)
2285 * local_var <-> variable defined on local points (received)
2286 * local_list <-- optional indirection list for local_var
2287 * datatype <-- variable type
2288 * stride <-- dimension (1 for scalar, 3 for interlaced vector)
2289 * reverse <-- if true, exchange is reversed
2290 * (receive values associated with distant points
2291 * from the processes owning the original points)
2292 *----------------------------------------------------------------------------*/
2293
2294 static void
_exchange_point_var_distant_asyn(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,MPI_Datatype datatype,size_t stride,_Bool reverse)2295 _exchange_point_var_distant_asyn(ple_locator_t *this_locator,
2296 void *distant_var,
2297 void *local_var,
2298 const ple_lnum_t *local_list,
2299 MPI_Datatype datatype,
2300 size_t stride,
2301 _Bool reverse)
2302 {
2303 int dist_v_count, loc_v_count, size;
2304 int dist_rank;
2305 ple_lnum_t n_points_loc, n_points_loc_tot, n_points_dist;
2306 size_t dist_v_idx;
2307 unsigned char *dist_v_ptr, *loc_v_ptr;
2308
2309 MPI_Aint lb, extent;
2310 void *loc_v_buf = NULL;
2311 int *dist_v_flag = NULL, *loc_v_flag = NULL;
2312 MPI_Status *status = NULL;
2313 MPI_Request *request = NULL;
2314
2315 double comm_timing[4] = {0., 0., 0., 0.};
2316
2317 /* Check extent of datatype */
2318
2319 #if (MPI_VERSION >= 2)
2320 MPI_Type_get_extent(datatype, &lb, &extent);
2321 #else
2322 MPI_Type_extent(datatype, &extent);
2323 #endif
2324 MPI_Type_size(datatype, &size);
2325
2326 if (extent != size)
2327 ple_error(__FILE__, __LINE__, 0,
2328 _("_exchange_point_var() is not implemented for use with\n"
2329 "MPI datatypes associated with structures using padding\n"
2330 "(for which size != extent)."));
2331
2332 /* Initialization */
2333
2334 n_points_loc_tot
2335 = this_locator->local_points_idx[this_locator->n_intersects];
2336
2337 PLE_MALLOC(loc_v_flag, this_locator->n_intersects, int);
2338 PLE_MALLOC(dist_v_flag, this_locator->n_intersects, int);
2339 PLE_MALLOC(request, this_locator->n_intersects*2, MPI_Request);
2340 PLE_MALLOC(status, this_locator->n_intersects*2, MPI_Status);
2341
2342 PLE_MALLOC(loc_v_buf, n_points_loc_tot*size*stride, char);
2343
2344 /* First loop on distant ranks for argument checks */
2345 /*-------------------------------------------------*/
2346
2347 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2348
2349 for (int i = 0; i < this_locator->n_intersects; i++) {
2350
2351 dist_rank = this_locator->intersect_rank[i];
2352
2353 n_points_dist = this_locator->distant_points_idx[i+1]
2354 - this_locator->distant_points_idx[i];
2355
2356 if (distant_var != NULL && n_points_dist > 0)
2357 dist_v_flag[i] = 1;
2358 else
2359 dist_v_flag[i] = 0;
2360
2361 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2362
2363 MPI_Irecv(loc_v_flag + i, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2364 this_locator->comm, &request[i*2]);
2365 MPI_Isend(dist_v_flag + i, 1, MPI_INT, dist_rank, PLE_MPI_TAG,
2366 this_locator->comm, &request[i*2+1]);
2367 }
2368
2369 MPI_Waitall(this_locator->n_intersects*2, request, status);
2370
2371 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2372
2373 PLE_FREE(dist_v_flag);
2374
2375 for (int i = 0; i < this_locator->n_intersects; i++) {
2376
2377 dist_rank = this_locator->intersect_rank[i];
2378
2379 n_points_loc = this_locator->local_points_idx[i+1]
2380 - this_locator->local_points_idx[i];
2381
2382 if (loc_v_flag[i] == 1 && (local_var == NULL || n_points_loc == 0))
2383 ple_error(__FILE__, __LINE__, 0,
2384 _("Incoherent arguments to different instances in "
2385 "_exchange_point_var().\n"
2386 "Send and receive operations do not match "
2387 "(dist_rank = %d\n)\n"), dist_rank);
2388 }
2389
2390 /* Loop on distant ranks for exchange of data in standard mode */
2391 /*-------------------------------------------------------------*/
2392
2393 if (reverse == false) {
2394
2395 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2396
2397 loc_v_ptr = loc_v_buf;
2398
2399 for (int i = 0; i < this_locator->n_intersects; i++) {
2400
2401 dist_rank = this_locator->intersect_rank[i];
2402
2403 n_points_loc = this_locator->local_points_idx[i+1]
2404 - this_locator->local_points_idx[i];
2405
2406 n_points_dist = this_locator->distant_points_idx[i+1]
2407 - this_locator->distant_points_idx[i];
2408
2409 dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2410
2411 if (distant_var != NULL)
2412 dist_v_count = n_points_dist * stride;
2413 else
2414 dist_v_count = 0;
2415
2416 if (loc_v_flag[i] > 0)
2417 loc_v_count = n_points_loc*stride;
2418 else
2419 loc_v_count = 0;
2420
2421 /* Exchange information */
2422
2423 if (distant_var != NULL)
2424 dist_v_ptr = ((unsigned char *)distant_var) + dist_v_idx;
2425 else
2426 dist_v_ptr = NULL;
2427
2428 MPI_Irecv(loc_v_ptr, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2429 this_locator->comm, &request[i*2]);
2430 MPI_Isend(dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2431 this_locator->comm, &request[i*2+1]);
2432
2433 loc_v_ptr += loc_v_count*size;
2434 }
2435
2436 MPI_Waitall(this_locator->n_intersects*2, request, status);
2437
2438 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2439 }
2440
2441 /* Loop on distant ranks for preparation or retrieval of buffer data */
2442 /*-------------------------------------------------------------------*/
2443
2444 loc_v_ptr = loc_v_buf;
2445
2446 for (int i = 0; i < this_locator->n_intersects; i++) {
2447
2448 const ple_lnum_t *_local_point_ids
2449 = this_locator->local_point_ids + this_locator->local_points_idx[i];
2450
2451 dist_rank = this_locator->intersect_rank[i];
2452
2453 n_points_loc = this_locator->local_points_idx[i+1]
2454 - this_locator->local_points_idx[i];
2455
2456 n_points_dist = this_locator->distant_points_idx[i+1]
2457 - this_locator->distant_points_idx[i];
2458
2459 dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2460
2461 if (distant_var != NULL)
2462 dist_v_count = n_points_dist * stride;
2463 else
2464 dist_v_count = 0;
2465
2466 if (loc_v_flag[i] > 0)
2467 loc_v_count = n_points_loc*stride;
2468 else
2469 loc_v_count = 0;
2470
2471 /* Exchange information */
2472
2473 if (distant_var != NULL)
2474 dist_v_ptr = ((unsigned char *)distant_var) + dist_v_idx;
2475 else
2476 dist_v_ptr = NULL;
2477
2478 dist_rank = this_locator->intersect_rank[i];
2479
2480 n_points_loc = this_locator->local_points_idx[i+1]
2481 - this_locator->local_points_idx[i];
2482
2483 n_points_dist = this_locator->distant_points_idx[i+1]
2484 - this_locator->distant_points_idx[i];
2485
2486 if (reverse == false) {
2487
2488 if (loc_v_flag[i] > 0) {
2489 if (local_list == NULL) {
2490 const size_t nbytes = stride*size;
2491 for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2492 char *local_v_p = (char *)local_var + _local_point_ids[k]*nbytes;
2493 const char *loc_v_buf_p = (const char *)loc_v_ptr + k*nbytes;
2494 for (size_t l = 0; l < nbytes; l++)
2495 local_v_p[l] = loc_v_buf_p[l];
2496 }
2497 }
2498 else {
2499 const size_t nbytes = stride*size;
2500 const ple_lnum_t idb = this_locator->point_id_base;
2501 for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2502 char *local_v_p = (char *)local_var
2503 + (local_list[_local_point_ids[k]] - idb)*nbytes;
2504 const char *loc_v_buf_p = (const char *)loc_v_ptr + k*nbytes;
2505 for (size_t l = 0; l < nbytes; l++)
2506 local_v_p[l] = loc_v_buf_p[l];
2507 }
2508 }
2509 }
2510
2511 }
2512 else { /* if (reverse == true) */
2513
2514 if (loc_v_flag[i] > 0) {
2515 if (local_list == NULL) {
2516 const size_t nbytes = stride*size;
2517 for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2518 const char *local_v_p
2519 = (const char *)local_var + _local_point_ids[k]*nbytes;
2520 char *loc_v_buf_p = (char *)loc_v_ptr + k*nbytes;
2521 for (size_t l = 0; l < nbytes; l++)
2522 loc_v_buf_p[l] = local_v_p[l];
2523 }
2524 }
2525 else {
2526 const size_t nbytes = stride*size;
2527 const ple_lnum_t idb = this_locator->point_id_base;
2528 for (ple_lnum_t k = 0; k < n_points_loc; k++) {
2529 const char *local_v_p
2530 = (const char *)local_var
2531 + (local_list[_local_point_ids[k]] - idb)*nbytes;
2532 char *loc_v_buf_p = (char *)loc_v_ptr + k*nbytes;
2533 for (size_t l = 0; l < nbytes; l++)
2534 loc_v_buf_p[l] = local_v_p[l];
2535 }
2536 }
2537 }
2538
2539 }
2540
2541 loc_v_ptr += loc_v_count*size;
2542
2543 }
2544
2545 /* Loop on distant ranks for exchange of data in reverse mode */
2546 /*------------------------------------------------------------*/
2547
2548 if (reverse == true) {
2549
2550 _locator_trace_start_comm(_ple_locator_log_start_p_comm, comm_timing);
2551
2552 loc_v_ptr = loc_v_buf;
2553
2554 for (int i = 0; i < this_locator->n_intersects; i++) {
2555
2556 dist_rank = this_locator->intersect_rank[i];
2557
2558 n_points_loc = this_locator->local_points_idx[i+1]
2559 - this_locator->local_points_idx[i];
2560
2561 n_points_dist = this_locator->distant_points_idx[i+1]
2562 - this_locator->distant_points_idx[i];
2563
2564 dist_v_idx = this_locator->distant_points_idx[i] * stride*size;
2565
2566 if (distant_var != NULL)
2567 dist_v_count = n_points_dist * stride;
2568 else
2569 dist_v_count = 0;
2570
2571 if (loc_v_flag[i] > 0)
2572 loc_v_count = n_points_loc*stride;
2573 else
2574 loc_v_count = 0;
2575
2576 /* Exchange information */
2577
2578 if (distant_var != NULL)
2579 dist_v_ptr = ((unsigned char *)distant_var) + dist_v_idx;
2580 else
2581 dist_v_ptr = NULL;
2582
2583 MPI_Irecv(dist_v_ptr, dist_v_count, datatype, dist_rank, PLE_MPI_TAG,
2584 this_locator->comm, &request[i*2]);
2585 MPI_Isend(loc_v_ptr, loc_v_count, datatype, dist_rank, PLE_MPI_TAG,
2586 this_locator->comm, &request[i*2+1]);
2587
2588 loc_v_ptr += loc_v_count*size;
2589 }
2590
2591 MPI_Waitall(this_locator->n_intersects*2, request, status);
2592
2593 _locator_trace_end_comm(_ple_locator_log_end_p_comm, comm_timing);
2594
2595 }
2596
2597 /* Free temporary arrays */
2598
2599 PLE_FREE(loc_v_buf);
2600
2601 PLE_FREE(loc_v_flag);
2602 PLE_FREE(request);
2603 PLE_FREE(status);
2604
2605 this_locator->exchange_wtime[1] += comm_timing[0];
2606 this_locator->exchange_cpu_time[1] += comm_timing[1];
2607 }
2608
2609 #endif /* defined(PLE_HAVE_MPI) */
2610
2611 /*----------------------------------------------------------------------------
2612 * Distribute variable defined on "distant points" to the original ("local")
2613 * points.
2614 *
2615 * The exchange is symmetric if both variables are defined, receive
2616 * only if distant_var is NULL, or send only if local_var is NULL.
2617 *
2618 * parameters:
2619 * this_locator <-- pointer to locator structure
2620 * distant_var <-> variable defined on distant points (ready to send)
2621 * local_var <-> variable defined on local points (received)
2622 * local_list <-- optional indirection list for local_var
2623 * type_size <-- sizeof (float or double) variable type
2624 * stride <-- dimension (1 for scalar, 3 for interlaced vector)
2625 * reverse <-- if true, exchange is reversed
2626 * (receive values associated with distant points
2627 * from the processes owning the original points)
2628 *----------------------------------------------------------------------------*/
2629
2630 static void
_exchange_point_var_local(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,size_t type_size,size_t stride,_Bool reverse)2631 _exchange_point_var_local(ple_locator_t *this_locator,
2632 void *distant_var,
2633 void *local_var,
2634 const ple_lnum_t *local_list,
2635 size_t type_size,
2636 size_t stride,
2637 _Bool reverse)
2638 {
2639 ple_lnum_t i;
2640 size_t j;
2641 ple_lnum_t n_points_loc;
2642
2643 const size_t nbytes = stride*type_size;
2644
2645 /* Initialization */
2646
2647 if (this_locator->n_interior == 0)
2648 return;
2649
2650 n_points_loc = this_locator->local_points_idx[1]
2651 - this_locator->local_points_idx[0];
2652
2653 assert(n_points_loc == ( this_locator->distant_points_idx[1]
2654 - this_locator->distant_points_idx[0]));
2655
2656 /* Exchange information */
2657
2658 if (reverse == false) {
2659
2660 if (local_list == NULL)
2661 memcpy(local_var, distant_var, n_points_loc*nbytes);
2662
2663 else {
2664 const ple_lnum_t idb = this_locator->point_id_base;
2665 for (i = 0; i < n_points_loc; i++) {
2666 char *local_var_p = (char *)local_var + (local_list[i] - idb)*nbytes;
2667 const char *distant_var_p = (const char *)distant_var + i*nbytes;
2668 for (j = 0; j < nbytes; j++)
2669 local_var_p[j] = distant_var_p[j];
2670 }
2671 }
2672
2673 }
2674 else { /* if (reverse == true) */
2675
2676 if (local_list == NULL)
2677 memcpy(distant_var, local_var, n_points_loc*nbytes);
2678
2679 else {
2680 const ple_lnum_t idb = this_locator->point_id_base;
2681 for (i = 0; i < n_points_loc; i++) {
2682 const char *local_var_p
2683 = (const char *)local_var + (local_list[i] - idb)*nbytes;
2684 char *distant_var_p = (char *)distant_var + i*nbytes;
2685 for (j = 0; j < nbytes; j++)
2686 distant_var_p[j] = local_var_p[j];
2687 }
2688 }
2689
2690 }
2691 }
2692
2693 /*----------------------------------------------------------------------------
2694 * Return timing information.
2695 *
2696 * When location on closest elements to force location of all points is
2697 * active, location times include a total value, followed by the value
2698 * associated with the location of closest elements stage.
2699 *
2700 * parameters:
2701 * this_locator <-- pointer to locator structure
2702 * time_type <-- 0 for total times, 1 for communication times
2703 * location_wtime --> Location Wall-clock time (or NULL)
2704 * location_cpu_time --> Location CPU time (or NULL)
2705 * exchange_wtime --> Variable exchange Wall-clock time (or NULL)
2706 * exchange_cpu_time --> Variable exchange CPU time (or NULL)
2707 *----------------------------------------------------------------------------*/
2708
2709 static void
_get_times(const ple_locator_t * this_locator,int time_type,double * location_wtime,double * location_cpu_time,double * exchange_wtime,double * exchange_cpu_time)2710 _get_times(const ple_locator_t *this_locator,
2711 int time_type,
2712 double *location_wtime,
2713 double *location_cpu_time,
2714 double *exchange_wtime,
2715 double *exchange_cpu_time)
2716 {
2717 const ple_locator_t *_locator = this_locator;
2718
2719 if (this_locator != NULL) {
2720
2721 if (location_wtime != NULL) {
2722 *location_wtime = _locator->location_wtime[time_type];
2723 }
2724 if (location_cpu_time != NULL) {
2725 *location_cpu_time = _locator->location_cpu_time[time_type];
2726 }
2727 if (exchange_wtime != NULL)
2728 *exchange_wtime = _locator->exchange_wtime[time_type];
2729 if (exchange_cpu_time != NULL)
2730 *exchange_cpu_time = _locator->exchange_cpu_time[time_type];
2731
2732 }
2733 else {
2734
2735 if (location_wtime != NULL) {
2736 location_wtime[0] = 0.;
2737 location_wtime[1] = 0.;
2738 }
2739 if (location_cpu_time != NULL) {
2740 location_cpu_time[0] = 0.;
2741 location_cpu_time[1] = 0.;
2742 }
2743 if (exchange_wtime != NULL)
2744 *exchange_wtime = 0.;
2745 if (exchange_cpu_time != NULL)
2746 *exchange_cpu_time = 0.;
2747 }
2748 }
2749
2750 /*============================================================================
2751 * Public function definitions
2752 *============================================================================*/
2753
2754 /*----------------------------------------------------------------------------*/
2755 /*!
2756 * \brief Creation of a locator structure.
2757 *
2758 * Note that depending on the choice of ranks of the associated communicator,
2759 * distant ranks may in fact be truly distant or not. If n_ranks = 1 and
2760 * start_rank is equal to the current rank in the communicator, the locator
2761 * will work only locally.
2762 *
2763 * \param[in] comm associated MPI communicator
2764 * \param[in] n_ranks number of MPI ranks associated with distant location
2765 * \param[in] start_rank first MPI rank associated with distant location
2766 *
2767 * \return pointer to locator
2768 */
2769 /*----------------------------------------------------------------------------*/
2770
2771 #if defined(PLE_HAVE_MPI)
2772 ple_locator_t *
ple_locator_create(MPI_Comm comm,int n_ranks,int start_rank)2773 ple_locator_create(MPI_Comm comm,
2774 int n_ranks,
2775 int start_rank)
2776 #else
2777 ple_locator_t *
2778 ple_locator_create(void)
2779 #endif
2780 {
2781 int i;
2782 ple_locator_t *this_locator;
2783
2784 PLE_MALLOC(this_locator, 1, ple_locator_t);
2785
2786 this_locator->dim = 0;
2787 this_locator->have_tags = 0;
2788
2789 #if defined(PLE_HAVE_MPI)
2790 this_locator->comm = comm;
2791 this_locator->n_ranks = n_ranks;
2792 this_locator->start_rank = start_rank;
2793 #else
2794 this_locator->n_ranks = 1;
2795 this_locator->start_rank = 0;
2796 #endif
2797
2798 this_locator->locate_algorithm = _LOCATE_BB_SENDRECV;
2799 this_locator->exchange_algorithm = _EXCHANGE_SENDRECV;
2800
2801 this_locator->point_id_base = 0;
2802
2803 this_locator->n_intersects = 0;
2804 this_locator->intersect_rank = NULL;
2805 this_locator->comm_order = NULL;
2806
2807 this_locator->local_points_idx = NULL;
2808 this_locator->distant_points_idx = NULL;
2809
2810 this_locator->local_point_ids = NULL;
2811
2812 this_locator->distant_point_location = NULL;
2813 this_locator->distant_point_coords = NULL;
2814
2815 this_locator->n_interior = 0;
2816 this_locator->interior_list = NULL;
2817
2818 this_locator->n_exterior = 0;
2819 this_locator->exterior_list = NULL;
2820
2821 for (i = 0; i < 2; i++) {
2822 this_locator->location_wtime[i] = 0.;
2823 this_locator->location_cpu_time[i] = 0.;
2824 }
2825
2826 for (i = 0; i < 2; i++) {
2827 this_locator->exchange_wtime[i] = 0.;
2828 this_locator->exchange_cpu_time[i] = 0.;
2829 }
2830
2831 return this_locator;
2832 }
2833
2834 /*----------------------------------------------------------------------------*/
2835 /*!
2836 * \brief Destruction of a locator structure.
2837 *
2838 * \param[in, out] this_locator locator to destroy
2839 *
2840 * \return NULL pointer
2841 */
2842 /*----------------------------------------------------------------------------*/
2843
2844 ple_locator_t *
ple_locator_destroy(ple_locator_t * this_locator)2845 ple_locator_destroy(ple_locator_t *this_locator)
2846 {
2847 if (this_locator != NULL) {
2848
2849 PLE_FREE(this_locator->local_points_idx);
2850 PLE_FREE(this_locator->distant_points_idx);
2851
2852 if (this_locator->local_point_ids != NULL)
2853 PLE_FREE(this_locator->local_point_ids);
2854
2855 PLE_FREE(this_locator->distant_point_location);
2856 PLE_FREE(this_locator->distant_point_coords);
2857
2858 PLE_FREE(this_locator->intersect_rank);
2859 PLE_FREE(this_locator->comm_order);
2860
2861 PLE_FREE(this_locator->interior_list);
2862 PLE_FREE(this_locator->exterior_list);
2863
2864 PLE_FREE(this_locator);
2865 }
2866
2867 return NULL;
2868 }
2869
2870 /*----------------------------------------------------------------------------*/
2871 /*!
2872 * \brief Prepare locator for use with a given mesh representation.
2873 *
2874 * \param[in, out] this_locator pointer to locator structure
2875 * \param[in] mesh pointer to mesh representation structure
2876 * \param[in] options options array (size
2877 * PLE_LOCATOR_N_OPTIONS), or NULL
2878 * \param[in] tolerance_base associated fixed tolerance
2879 * \param[in] tolerance_fraction associated fraction of element bounding
2880 * boxes added to tolerance
2881 * \param[in] dim spatial dimension of mesh and points to
2882 * locate
2883 * \param[in] n_points number of points to locate
2884 * \param[in] point_list optional indirection array to point_coords
2885 * \param[in] point_tag optional point tag (size: n_points)
2886 * \param[in] point_coords coordinates of points to locate
2887 * (dimension: dim * n_points)
2888 * \param[out] distance optional distance from point to matching
2889 * element: < 0 if unlocated; 0 - 1 if inside
2890 * and > 1 if outside a volume element, or
2891 * absolute distance to a surface element
2892 * (size: n_points)
2893 * \param[in] mesh_extents_f pointer to function computing mesh or mesh
2894 * subset or element extents
2895 * \param[in] mesh_locate_f pointer to function wich updates the
2896 * location[] and distance[] arrays
2897 * associated with a set of points for
2898 * points that are in an element of this
2899 * mesh, or closer to one than to previously
2900 * encountered elements.
2901 */
2902 /*----------------------------------------------------------------------------*/
2903
2904 void
ple_locator_set_mesh(ple_locator_t * this_locator,const void * mesh,const int * options,float tolerance_base,float tolerance_fraction,int dim,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)2905 ple_locator_set_mesh(ple_locator_t *this_locator,
2906 const void *mesh,
2907 const int *options,
2908 float tolerance_base,
2909 float tolerance_fraction,
2910 int dim,
2911 ple_lnum_t n_points,
2912 const ple_lnum_t point_list[],
2913 const int point_tag[],
2914 const ple_coord_t point_coords[],
2915 float distance[],
2916 ple_mesh_extents_t *mesh_extents_f,
2917 ple_mesh_elements_locate_t *mesh_locate_f)
2918 {
2919 double w_start, w_end, cpu_start, cpu_end;
2920
2921 /* Initialize timing */
2922
2923 w_start = ple_timer_wtime();
2924 cpu_start = ple_timer_cpu_time();
2925
2926 /* Other initializations */
2927
2928 this_locator->dim = dim;
2929
2930 if (distance != NULL) {
2931 for (ple_lnum_t i = 0; i < n_points; i++)
2932 distance[i] = -1;
2933 }
2934
2935 /* Release information if previously present */
2936
2937 _clear_location_info(this_locator);
2938
2939 ple_locator_extend_search(this_locator,
2940 mesh,
2941 options,
2942 tolerance_base,
2943 tolerance_fraction,
2944 n_points,
2945 point_list,
2946 point_tag,
2947 point_coords,
2948 distance,
2949 mesh_extents_f,
2950 mesh_locate_f);
2951
2952 /* Finalize timing */
2953
2954 w_end = ple_timer_wtime();
2955 cpu_end = ple_timer_cpu_time();
2956
2957 this_locator->location_wtime[0] += (w_end - w_start);
2958 this_locator->location_cpu_time[0] += (cpu_end - cpu_start);
2959 }
2960
2961 /*----------------------------------------------------------------------------*/
2962 /*!
2963 * \brief Extend search for a locator for which set_mesh has already been
2964 * called.
2965 *
2966 * \param[in, out] this_locator pointer to locator structure
2967 * \param[in] mesh pointer to mesh representation structure
2968 * \param[in] options options array (size
2969 * PLE_LOCATOR_N_OPTIONS), or NULL
2970 * \param[in] tolerance_base associated fixed tolerance
2971 * \param[in] tolerance_fraction associated fraction of element bounding
2972 * boxes added to tolerance
2973 * \param[in] n_points number of points to locate
2974 * \param[in] point_list optional indirection array to point_coords
2975 * \param[in] point_tag optional point tag (size: n_points)
2976 * \param[in] point_coords coordinates of points to locate
2977 * (dimension: dim * n_points)
2978 * \param[out] distance optional distance from point to matching
2979 * element: < 0 if unlocated; 0 - 1 if inside
2980 * and > 1 if outside a volume element, or
2981 * absolute distance to a surface element
2982 * (size: n_points)
2983 * \param[in] mesh_extents_f pointer to function computing mesh or mesh
2984 * subset or element extents
2985 * \param[in] mesh_locate_f pointer to function wich updates the
2986 * location[] and distance[] arrays
2987 * associated with a set of points for
2988 * points that are in an element of this
2989 * mesh, or closer to one than to previously
2990 * encountered elements.
2991 */
2992 /*----------------------------------------------------------------------------*/
2993
2994 void
ple_locator_extend_search(ple_locator_t * this_locator,const void * mesh,const int * options,float tolerance_base,float tolerance_fraction,ple_lnum_t n_points,const ple_lnum_t point_list[],const int point_tag[],const ple_coord_t point_coords[],float distance[],ple_mesh_extents_t * mesh_extents_f,ple_mesh_elements_locate_t * mesh_locate_f)2995 ple_locator_extend_search(ple_locator_t *this_locator,
2996 const void *mesh,
2997 const int *options,
2998 float tolerance_base,
2999 float tolerance_fraction,
3000 ple_lnum_t n_points,
3001 const ple_lnum_t point_list[],
3002 const int point_tag[],
3003 const ple_coord_t point_coords[],
3004 float distance[],
3005 ple_mesh_extents_t *mesh_extents_f,
3006 ple_mesh_elements_locate_t *mesh_locate_f)
3007 {
3008 int i;
3009 double w_start, w_end, cpu_start, cpu_end;
3010 ple_lnum_t *location;
3011
3012 double comm_timing[4] = {0., 0., 0., 0.};
3013 int mpi_flag = 0;
3014
3015 const int dim = this_locator->dim;
3016
3017 /* Initialize timing */
3018
3019 w_start = ple_timer_wtime();
3020 cpu_start = ple_timer_cpu_time();
3021
3022 if (options != NULL)
3023 this_locator->point_id_base = options[PLE_LOCATOR_NUMBERING];
3024 else
3025 this_locator->point_id_base = 0;
3026
3027 const int idb = this_locator->point_id_base;
3028
3029 this_locator->have_tags = 0;
3030
3031 /* Prepare locator (MPI version) */
3032 /*-------------------------------*/
3033
3034 #if defined(PLE_HAVE_MPI)
3035
3036 MPI_Initialized(&mpi_flag);
3037
3038 if (mpi_flag && this_locator->comm == MPI_COMM_NULL)
3039 mpi_flag = 0;
3040
3041 if (mpi_flag) {
3042
3043 /* Flag values
3044 0: mesh dimension
3045 1: space dimension
3046 2: minimum algorithm version
3047 3: maximum algorithm version
3048 4: preferred algorithm version
3049 5: have point tags */
3050
3051 int globflag[6];
3052 int locflag[6] = {-1,
3053 -1,
3054 1, /* equivalent to _LOCATE_BB_SENDRECV */
3055 -_LOCATE_BB_SENDRECV_ORDERED,
3056 _LOCATE_BB_SENDRECV_ORDERED,
3057 0};
3058 ple_lnum_t *location_rank_id;
3059
3060 /* Check that at least one of the local or distant nodal meshes
3061 is non-NULL, and at least one of the local or distant
3062 point sets is non null */
3063
3064 if (mesh != NULL)
3065 locflag[0] = dim;
3066
3067 if (n_points > 0)
3068 locflag[1] = dim;
3069
3070 if (n_points > 0 && point_tag != NULL)
3071 locflag[5] = 1;
3072
3073 _locator_trace_start_comm(_ple_locator_log_start_g_comm, comm_timing);
3074
3075 MPI_Allreduce(locflag, globflag, 6, MPI_INT, MPI_MAX,
3076 this_locator->comm);
3077
3078 _locator_trace_end_comm(_ple_locator_log_end_g_comm, comm_timing);
3079
3080 if (globflag[0] < 0 || globflag[1] < 0)
3081 return;
3082 else if (mesh != NULL && globflag[1] != dim)
3083 ple_error(__FILE__, __LINE__, 0,
3084 _("Locator trying to use distant space dimension %d\n"
3085 "with local space dimension %d\n"),
3086 globflag[1], dim);
3087 else if (mesh == NULL && globflag[0] != dim)
3088 ple_error(__FILE__, __LINE__, 0,
3089 _("Locator trying to use local space dimension %d\n"
3090 "with distant space dimension %d\n"),
3091 dim, globflag[0]);
3092
3093 /* Check algorithm versions and supported features */
3094
3095 globflag[3] = -globflag[3];
3096
3097 /* Compatibility with older versions */
3098 for (i = 2; i < 5; i++) {
3099 if (globflag[i] == 1)
3100 globflag[i] = _LOCATE_BB_SENDRECV;
3101 }
3102
3103 if (globflag[2] > globflag[3])
3104 ple_error(__FILE__, __LINE__, 0,
3105 _("Incompatible locator algorithm ranges:\n"
3106 " global minimum algorithm id %d\n"
3107 " global maximum algorithm id %d\n"
3108 "PLE library versions or builds are incompatible."),
3109 globflag[2], globflag[3]);
3110
3111 if (globflag[4] < globflag[2])
3112 globflag[4] = globflag[2];
3113 if (globflag[4] > globflag[3])
3114 globflag[4] = globflag[3];
3115
3116 this_locator->locate_algorithm = globflag[4];
3117
3118 if (globflag[5] > 0)
3119 this_locator->have_tags = 1;
3120
3121 /* Free temporary memory */
3122
3123 PLE_MALLOC(location, n_points, ple_lnum_t);
3124 PLE_MALLOC(location_rank_id, n_points, ple_lnum_t);
3125
3126 _transfer_location_distant(this_locator,
3127 n_points,
3128 location,
3129 location_rank_id);
3130
3131 _locate_all_distant(this_locator,
3132 mesh,
3133 tolerance_base,
3134 tolerance_fraction,
3135 n_points,
3136 point_list,
3137 point_tag,
3138 point_coords,
3139 location,
3140 location_rank_id,
3141 distance,
3142 mesh_extents_f,
3143 mesh_locate_f);
3144
3145 PLE_FREE(location_rank_id);
3146 }
3147
3148 #endif
3149
3150 /* Prepare locator (local version) */
3151 /*---------------------------------*/
3152
3153 if (!mpi_flag) {
3154
3155 if (mesh == NULL || n_points == 0)
3156 return;
3157
3158 if (point_tag != NULL)
3159 this_locator->have_tags = 1;
3160
3161 PLE_MALLOC(location, n_points, ple_lnum_t);
3162
3163 _transfer_location_local(this_locator,
3164 n_points,
3165 location);
3166
3167 _locate_all_local(this_locator,
3168 mesh,
3169 tolerance_base,
3170 tolerance_fraction,
3171 n_points,
3172 point_list,
3173 point_tag,
3174 point_coords,
3175 location,
3176 distance,
3177 mesh_extents_f,
3178 mesh_locate_f);
3179
3180 PLE_FREE(location);
3181
3182 }
3183
3184 /* Update local_point_ids values */
3185 /*-------------------------------*/
3186
3187 if ( this_locator->n_interior > 0
3188 && this_locator->local_point_ids != NULL) {
3189
3190 ple_lnum_t *reduced_index;
3191
3192 PLE_MALLOC(reduced_index, n_points, ple_lnum_t);
3193
3194 for (i = 0; i < n_points; i++)
3195 reduced_index[i] = -1;
3196
3197 assert( this_locator->local_points_idx[this_locator->n_intersects]
3198 == this_locator->n_interior);
3199
3200 for (i = 0; i < this_locator->n_interior; i++)
3201 reduced_index[this_locator->interior_list[i] - idb] = i;
3202
3203 /* Update this_locator->local_point_ids[] so that it refers
3204 to an index in a dense [0, this_locator->n_interior] subset
3205 of the local points */
3206
3207 for (i = 0; i < this_locator->n_interior; i++)
3208 this_locator->local_point_ids[i]
3209 = reduced_index[this_locator->local_point_ids[i]];
3210
3211 for (i = 0; i < this_locator->n_interior; i++)
3212 assert(this_locator->local_point_ids[i] > -1);
3213
3214 PLE_FREE(reduced_index);
3215
3216 }
3217
3218 /* If an initial point list was given, update
3219 this_locator->interior_list and this_locator->exterior_list
3220 so that they refer to the same point set as that initial
3221 list (and not to an index within the selected point set) */
3222
3223 if (point_list != NULL) {
3224
3225 for (i = 0; i < this_locator->n_interior; i++)
3226 this_locator->interior_list[i]
3227 = point_list[this_locator->interior_list[i] - idb];
3228
3229 for (i = 0; i < this_locator->n_exterior; i++)
3230 this_locator->exterior_list[i]
3231 = point_list[this_locator->exterior_list[i] - idb];
3232
3233 }
3234
3235 /* Finalize timing */
3236
3237 w_end = ple_timer_wtime();
3238 cpu_end = ple_timer_cpu_time();
3239
3240 this_locator->location_wtime[0] += (w_end - w_start);
3241 this_locator->location_cpu_time[0] += (cpu_end - cpu_start);
3242
3243 this_locator->location_wtime[1] += comm_timing[0];
3244 this_locator->location_cpu_time[1] += comm_timing[1];
3245 }
3246
3247 /*----------------------------------------------------------------------------*/
3248 /*!
3249 * \brief Shift location ids for located points after locator initialization.
3250 *
3251 * This is useful mainly to switch between 0-based to 1-based numberings.
3252 *
3253 * \param[in, out] this_locator pointer to locator structure
3254 * \param[in] location_shift shift value
3255 */
3256 /*----------------------------------------------------------------------------*/
3257
3258 void
ple_locator_shift_locations(ple_locator_t * this_locator,ple_lnum_t location_shift)3259 ple_locator_shift_locations(ple_locator_t *this_locator,
3260 ple_lnum_t location_shift)
3261 {
3262 int n_intersects = this_locator->n_intersects;
3263 if (n_intersects == 0)
3264 return;
3265
3266 const ple_lnum_t n_points = this_locator->distant_points_idx[n_intersects];
3267
3268 for (ple_lnum_t i = 0; i < n_points; i++) {
3269 if (this_locator->distant_point_location[i] > -1)
3270 this_locator->distant_point_location[i] += location_shift;
3271 }
3272 }
3273
3274 /*----------------------------------------------------------------------------*/
3275 /*!
3276 * \brief Return number of distant points after locator initialization.
3277 *
3278 * \param[in] this_locator pointer to locator structure
3279 *
3280 * \return number of distant points.
3281 */
3282 /*----------------------------------------------------------------------------*/
3283
3284 ple_lnum_t
ple_locator_get_n_dist_points(const ple_locator_t * this_locator)3285 ple_locator_get_n_dist_points(const ple_locator_t *this_locator)
3286 {
3287 ple_lnum_t retval = 0;
3288
3289 if (this_locator != NULL) {
3290 if (this_locator->n_intersects != 0)
3291 retval = this_locator->distant_points_idx[this_locator->n_intersects];
3292 }
3293
3294 return retval;
3295 }
3296
3297 /*----------------------------------------------------------------------------*/
3298 /*!
3299 * \brief Return an array of local element numbers containing (or nearest to)
3300 * each distant point after locator initialization.
3301 *
3302 * \param[in] this_locator pointer to locator structure
3303 *
3304 * \return local element numbers associated with distant points.
3305 */
3306 /*----------------------------------------------------------------------------*/
3307
3308 const ple_lnum_t *
ple_locator_get_dist_locations(const ple_locator_t * this_locator)3309 ple_locator_get_dist_locations(const ple_locator_t *this_locator)
3310 {
3311 const ple_lnum_t * retval = NULL;
3312
3313 if (this_locator != NULL) {
3314 if (this_locator->n_ranks != 0)
3315 retval = this_locator->distant_point_location;
3316 }
3317
3318 return retval;
3319 }
3320
3321 /*----------------------------------------------------------------------------*/
3322 /*!
3323 * \brief Return an array of coordinates of each distant point after
3324 * locator initialization.
3325 *
3326 * \param[in] this_locator pointer to locator structure
3327 *
3328 * \return coordinate array associated with distant points (interlaced).
3329 */
3330 /*----------------------------------------------------------------------------*/
3331
3332 const ple_coord_t *
ple_locator_get_dist_coords(const ple_locator_t * this_locator)3333 ple_locator_get_dist_coords(const ple_locator_t *this_locator)
3334 {
3335 const ple_coord_t * retval = NULL;
3336
3337 if (this_locator != NULL) {
3338 if (this_locator->n_intersects != 0)
3339 retval = this_locator->distant_point_coords;
3340 }
3341
3342 return retval;
3343 }
3344
3345 /*----------------------------------------------------------------------------*/
3346 /*!
3347 * \brief Return number of points located after locator initialization.
3348 *
3349 * \param[in] this_locator pointer to locator structure
3350 *
3351 * \return number of points located.
3352 */
3353 /*----------------------------------------------------------------------------*/
3354
3355 ple_lnum_t
ple_locator_get_n_interior(const ple_locator_t * this_locator)3356 ple_locator_get_n_interior(const ple_locator_t *this_locator)
3357 {
3358 ple_lnum_t retval = 0;
3359
3360 if (this_locator != NULL)
3361 retval = this_locator->n_interior;
3362
3363 return retval;
3364 }
3365
3366 /*----------------------------------------------------------------------------*/
3367 /*!
3368 * \brief Return list of points located after locator initialization.
3369 * This list defines a subset of the point set used at initialization.
3370 *
3371 * \param[in] this_locator pointer to locator structure
3372 *
3373 * \return list of points located.
3374 */
3375 /*----------------------------------------------------------------------------*/
3376
3377 const ple_lnum_t *
ple_locator_get_interior_list(const ple_locator_t * this_locator)3378 ple_locator_get_interior_list(const ple_locator_t *this_locator)
3379 {
3380 return this_locator->interior_list;
3381 }
3382
3383 /*----------------------------------------------------------------------------*/
3384 /*!
3385 * \brief Return number of points not located after locator initialization.
3386 *
3387 * \param[in] this_locator pointer to locator structure
3388 *
3389 * \return number of points not located.
3390 */
3391 /*----------------------------------------------------------------------------*/
3392
3393 ple_lnum_t
ple_locator_get_n_exterior(const ple_locator_t * this_locator)3394 ple_locator_get_n_exterior(const ple_locator_t *this_locator)
3395 {
3396 return this_locator->n_exterior;
3397 }
3398
3399 /*----------------------------------------------------------------------------*/
3400 /*!
3401 * \brief Return list of points not located after locator initialization.
3402 * This list defines a subset of the point set used at initialization.
3403 *
3404 * \param[in] this_locator pointer to locator structure
3405 *
3406 * \return list of points not located.
3407 */
3408 /*----------------------------------------------------------------------------*/
3409
3410 const ple_lnum_t *
ple_locator_get_exterior_list(const ple_locator_t * this_locator)3411 ple_locator_get_exterior_list(const ple_locator_t *this_locator)
3412 {
3413 return this_locator->exterior_list;
3414 }
3415
3416 /*----------------------------------------------------------------------------*/
3417 /*!
3418 * \brief Discard list of points not located after locator initialization.
3419 * This list defines a subset of the point set used at initialization.
3420 *
3421 * \param[in] this_locator pointer to locator structure
3422 */
3423 /*----------------------------------------------------------------------------*/
3424
3425 void
ple_locator_discard_exterior(ple_locator_t * this_locator)3426 ple_locator_discard_exterior(ple_locator_t *this_locator)
3427 {
3428 this_locator->n_exterior = 0;
3429 PLE_FREE(this_locator->exterior_list);
3430 }
3431
3432 /*----------------------------------------------------------------------------*/
3433 /*!
3434 * \brief Distribute variable defined on distant points to processes owning
3435 * the original points (i.e. distant processes).
3436 *
3437 * The exchange is symmetric if both variables are defined, receive
3438 * only if distant_var is NULL, or send only if local_var is NULL.
3439 *
3440 * The caller should have defined the values of distant_var[] for the
3441 * distant points, whose coordinates are given by
3442 * ple_locator_get_dist_coords(), and which are located in the elements
3443 * whose numbers are given by ple_locator_get_dist_locations().
3444 *
3445 * The local_var[] is defined at the located points (those whose
3446 * numbers are returned by ple_locator_get_interior_list().
3447 *
3448 * If the optional local_list indirection is used, it is assumed to use
3449 * the same base numbering as that defined by the options for the previous
3450 * call to ple_locator_set_mesh() or ple_locator_extend_search().
3451 *
3452 * \param[in] this_locator pointer to locator structure
3453 * \param[in, out] distant_var variable defined on distant points
3454 * (ready to send); size: n_dist_points*stride
3455 * \param[in, out] local_var variable defined on located local points
3456 * (received); size: n_interior*stride
3457 * \param[in] local_list optional indirection list for local_var
3458 * \param[in] type_size sizeof (float or double) variable type
3459 * \param[in] stride dimension (1 for scalar,
3460 * 3 for interleaved vector)
3461 * \param[in] reverse if nonzero, exchange is reversed
3462 * (receive values associated with distant points
3463 * from the processes owning the original points)
3464 */
3465 /*----------------------------------------------------------------------------*/
3466
3467 void
ple_locator_exchange_point_var(ple_locator_t * this_locator,void * distant_var,void * local_var,const ple_lnum_t * local_list,size_t type_size,size_t stride,int reverse)3468 ple_locator_exchange_point_var(ple_locator_t *this_locator,
3469 void *distant_var,
3470 void *local_var,
3471 const ple_lnum_t *local_list,
3472 size_t type_size,
3473 size_t stride,
3474 int reverse)
3475 {
3476 double w_start, w_end, cpu_start, cpu_end;
3477
3478 int mpi_flag = 0;
3479 _Bool _reverse = reverse;
3480
3481 /* Initialize timing */
3482
3483 w_start = ple_timer_wtime();
3484 cpu_start = ple_timer_cpu_time();
3485
3486 #if defined(PLE_HAVE_MPI)
3487
3488 MPI_Initialized(&mpi_flag);
3489
3490 if (mpi_flag && this_locator->comm == MPI_COMM_NULL)
3491 mpi_flag = 0;
3492
3493 if (mpi_flag) {
3494
3495 MPI_Datatype datatype = MPI_DATATYPE_NULL;
3496
3497 if (type_size == sizeof(double))
3498 datatype = MPI_DOUBLE;
3499 else if (type_size == sizeof(float))
3500 datatype = MPI_FLOAT;
3501 else
3502 ple_error(__FILE__, __LINE__, 0,
3503 _("type_size passed to ple_locator_exchange_point_var() does\n"
3504 "not correspond to double or float."));
3505
3506 assert (datatype != MPI_DATATYPE_NULL);
3507
3508 if (this_locator->exchange_algorithm == _EXCHANGE_SENDRECV)
3509 _exchange_point_var_distant(this_locator,
3510 distant_var,
3511 local_var,
3512 local_list,
3513 datatype,
3514 stride,
3515 _reverse);
3516
3517 else if (this_locator->exchange_algorithm == _EXCHANGE_ISEND_IRECV)
3518 _exchange_point_var_distant_asyn(this_locator,
3519 distant_var,
3520 local_var,
3521 local_list,
3522 datatype,
3523 stride,
3524 _reverse);
3525
3526 }
3527
3528 #endif /* defined(PLE_HAVE_MPI) */
3529
3530 if (!mpi_flag)
3531 _exchange_point_var_local(this_locator,
3532 distant_var,
3533 local_var,
3534 local_list,
3535 type_size,
3536 stride,
3537 _reverse);
3538
3539 /* Finalize timing */
3540
3541 w_end = ple_timer_wtime();
3542 cpu_end = ple_timer_cpu_time();
3543
3544 this_locator->exchange_wtime[0] += (w_end - w_start);
3545 this_locator->exchange_cpu_time[0] += (cpu_end - cpu_start);
3546 }
3547
3548 /*----------------------------------------------------------------------------*/
3549 /*!
3550 * \brief Return timing information.
3551 *
3552 * In parallel mode, this includes communication time.
3553 *
3554 * When location on closest elements to force location of all points is
3555 * active, location times include a total value, followed by the value
3556 * associated with the location of closest elements stage.
3557 *
3558 * \param[in] this_locator pointer to locator structure
3559 * \param[out] location_wtime Location Wall-clock time (or NULL)
3560 * \param[out] location_cpu_time Location CPU time (or NULL)
3561 * \param[out] exchange_wtime Variable exchange Wall-clock time
3562 * (size: 1 or NULL)
3563 * \param[out] exchange_cpu_time Variable exchange CPU time (size: 2 or NULL)
3564 */
3565 /*----------------------------------------------------------------------------*/
3566
3567 void
ple_locator_get_times(const ple_locator_t * this_locator,double * location_wtime,double * location_cpu_time,double * exchange_wtime,double * exchange_cpu_time)3568 ple_locator_get_times(const ple_locator_t *this_locator,
3569 double *location_wtime,
3570 double *location_cpu_time,
3571 double *exchange_wtime,
3572 double *exchange_cpu_time)
3573 {
3574 _get_times(this_locator,
3575 0,
3576 location_wtime, location_cpu_time,
3577 exchange_wtime, exchange_cpu_time);
3578 }
3579
3580 /*----------------------------------------------------------------------------*/
3581 /*!
3582 * \brief Return communication timing information.
3583 *
3584 * In serial mode, return times are always zero.
3585 *
3586 * When location on closest elements to force location of all points is
3587 * active, location times include a total value, followed by the value
3588 * associated with the location of closest elements stage.
3589 *
3590 * parameters:
3591 * \param[in] this_locator pointer to locator structure
3592 * \param[out] location_wtime Location Wall-clock time (or NULL)
3593 * \param[out] location_cpu_time Location CPU time (or NULL)
3594 * \param[out] exchange_wtime Variable exchange Wall-clock time (or NULL)
3595 * \param[out] exchange_cpu_time Variable exchange CPU time (or NULL)
3596 */
3597 /*----------------------------------------------------------------------------*/
3598
3599 void
ple_locator_get_comm_times(const ple_locator_t * this_locator,double * location_wtime,double * location_cpu_time,double * exchange_wtime,double * exchange_cpu_time)3600 ple_locator_get_comm_times(const ple_locator_t *this_locator,
3601 double *location_wtime,
3602 double *location_cpu_time,
3603 double *exchange_wtime,
3604 double *exchange_cpu_time)
3605 {
3606 _get_times(this_locator,
3607 1,
3608 location_wtime, location_cpu_time,
3609 exchange_wtime, exchange_cpu_time);
3610 }
3611
3612 /*----------------------------------------------------------------------------*/
3613 /*!
3614 * \brief Dump printout of a locator structure.
3615 *
3616 * \param this_locator pointer to structure that should be dumped
3617 */
3618 /*----------------------------------------------------------------------------*/
3619
3620 void
ple_locator_dump(const ple_locator_t * this_locator)3621 ple_locator_dump(const ple_locator_t *this_locator)
3622 {
3623 int i;
3624 ple_lnum_t j;
3625 const ple_lnum_t *idx, *index, *loc;
3626 const ple_coord_t *coords;
3627
3628 const ple_locator_t *_locator = this_locator;
3629
3630 if (this_locator == NULL)
3631 return;
3632
3633 /* Basic information */
3634 /*-------------------*/
3635
3636 ple_printf("\n"
3637 "Locator:\n\n"
3638 "Spatial dimension: %d\n"
3639 "Exchange algorithm: %d\n"
3640 "Number of ranks of distant location: %d\n"
3641 "First rank of distant location: %d\n"
3642 "Number of intersecting distant ranks: %d\n",
3643 _locator->dim,
3644 _locator->exchange_algorithm,
3645 _locator->n_ranks, _locator->start_rank,
3646 _locator->n_intersects);
3647
3648 #if defined(PLE_HAVE_MPI)
3649 if (_locator->comm != MPI_COMM_NULL)
3650 ple_printf("\n"
3651 "Associated MPI communicator: %ld\n",
3652 (long)(_locator->comm));
3653 #endif
3654
3655 /* Arrays indexed by rank */
3656 /*------------------------*/
3657
3658 if (_locator->intersect_rank != NULL) {
3659 for (i = 0; i < _locator->n_intersects; i++)
3660 ple_printf("\n"
3661 " Intersection %d with distant rank %d\n\n",
3662 i, _locator->intersect_rank[i]);
3663 }
3664 if (_locator->intersect_rank != NULL) {
3665 for (i = 0; i < _locator->n_intersects; i++)
3666 ple_printf("\n"
3667 " Communication ordering %d: %d\n\n",
3668 i, _locator->comm_order[i]);
3669 }
3670
3671 if (_locator->n_interior > 0) {
3672
3673 if (_locator->local_point_ids != NULL) {
3674
3675 ple_printf("\n Local point ids (for receiving):\n\n");
3676 idx = _locator->local_points_idx;
3677 index = _locator->local_point_ids;
3678 for (i = 0; i < _locator->n_intersects; i++) {
3679 if (idx[i+1] > idx[i]) {
3680 ple_printf("%6d (idx = %10d) %10d\n",
3681 i, idx[i], index[idx[i]]);
3682 for (j = idx[i] + 1; j < idx[i + 1]; j++)
3683 ple_printf(" %10d\n", index[j]);
3684 }
3685 else {
3686 ple_printf("%6d (idx = %10d)\n", i, idx[i]);
3687 }
3688 ple_printf(" end (idx = %10d)\n", idx[_locator->n_intersects]);
3689 }
3690
3691 }
3692
3693 }
3694
3695 if (_locator->distant_points_idx != NULL) {
3696
3697 idx = _locator->distant_points_idx;
3698 loc = _locator->distant_point_location;
3699 coords = _locator->distant_point_coords;
3700
3701 if (idx[_locator->n_intersects] > 0)
3702 ple_printf("\n Distant point location:\n\n");
3703
3704 for (i = 0; i < _locator->n_intersects; i++) {
3705
3706 if (idx[i+1] > idx[i]) {
3707
3708 if (_locator->dim == 1) {
3709 ple_printf("%6d (idx = %10d) %10d [%12.5e]\n",
3710 i, _locator->intersect_rank[i], idx[i],
3711 loc[idx[i]], coords[idx[i]]);
3712 for (j = idx[i] + 1; j < idx[i + 1]; j++)
3713 ple_printf(" %10d [%12.5e]\n",
3714 loc[j], coords[j]);
3715 }
3716 else if (_locator->dim == 2) {
3717 ple_printf("%6d (idx = %10d) %10d [%12.5e, %12.5e]\n",
3718 i, idx[i], loc[idx[i]],
3719 coords[2*idx[i]], coords[2*idx[i]+1]);
3720 for (j = idx[i] + 1; j < idx[i + 1]; j++)
3721 ple_printf(" %10d [%12.5e, %12.5e]\n",
3722 loc[j], coords[2*j], coords[2*j+1]);
3723 }
3724 else if (_locator->dim == 3) {
3725 ple_printf("%6d (idx = %10d) %10d [%12.5e, %12.5e, %12.5e]\n",
3726 i, idx[i], loc[idx[i]],
3727 coords[3*idx[i]], coords[3*idx[i]+1], coords[3*idx[i]+2]);
3728 for (j = idx[i] + 1; j < idx[i + 1]; j++)
3729 ple_printf(" "
3730 "%10d [%12.5e, %12.5e, %12.5e]\n",
3731 loc[j], coords[3*j], coords[3*j+1], coords[3*j+2]);
3732 }
3733
3734 } /* if (idx[i+1] > idx[i]) */
3735
3736 }
3737
3738 if (idx[_locator->n_intersects] > 0)
3739 ple_printf(" end (idx = %10d)\n", idx[_locator->n_intersects]);
3740 }
3741
3742 /* Local arrays */
3743 /*--------------*/
3744
3745 ple_printf("\n"
3746 " Number of local points successfully located: %d\n\n",
3747 _locator->n_interior);
3748
3749 for (j = 0; j < _locator->n_interior; j++)
3750 ple_printf(" %10d\n", _locator->interior_list[j]);
3751
3752 if (_locator->n_interior > 0)
3753 ple_printf("\n");
3754
3755 ple_printf(" Number of local points not located: %d\n",
3756 _locator->n_exterior);
3757
3758 for (j = 0; j < _locator->n_exterior; j++)
3759 ple_printf(" %10d\n", _locator->exterior_list[j]);
3760
3761 if (_locator->n_exterior > 0)
3762 ple_printf("\n");
3763
3764 /* Timing information */
3765 /*--------------------*/
3766
3767 ple_printf(" Location Wall-clock time: %12.5f (comm: %12.5f)\n",
3768 _locator->location_wtime[0], _locator->location_wtime[1]);
3769
3770 ple_printf(" Location CPU time: %12.5f (comm: %12.5f)\n",
3771 _locator->location_cpu_time[0], _locator->location_cpu_time[1]);
3772
3773 ple_printf(" Exchange Wall-clock time: %12.5f (comm: %12.5f)\n",
3774 _locator->exchange_wtime[0], _locator->exchange_wtime[1]);
3775
3776 ple_printf(" Exchange CPU time: %12.5f (comm: %12.5f)\n",
3777 _locator->exchange_cpu_time[0], _locator->exchange_cpu_time[1]);
3778
3779 }
3780
3781 #if defined(PLE_HAVE_MPI)
3782
3783 /*----------------------------------------------------------------------------*/
3784 /*!
3785 * \brief Get the maximum number of exchanging ranks for which we use
3786 * asynchronous MPI sends and receives instead of MPI_SendRecv.
3787 *
3788 * \return the maximum number of ranks allowing asynchronous exchanges
3789 */
3790 /*----------------------------------------------------------------------------*/
3791
3792 int
ple_locator_get_async_threshold(void)3793 ple_locator_get_async_threshold(void)
3794 {
3795 return _ple_locator_async_threshold;
3796 }
3797
3798 /*----------------------------------------------------------------------------*/
3799 /*!
3800 * \brief Set the maximum number of exchanging ranks for which we use
3801 * asynchronous MPI sends and receives instead of MPI_SendRecv.
3802 *
3803 * \param threshold maximum number of ranks allowing asynchronous exchanges
3804 */
3805 /*----------------------------------------------------------------------------*/
3806
3807 void
ple_locator_set_async_threshold(int threshold)3808 ple_locator_set_async_threshold(int threshold)
3809 {
3810 _ple_locator_async_threshold = threshold;
3811 }
3812
3813 /*----------------------------------------------------------------------------*/
3814 /*!
3815 * \brief Register communication logging functions for locator instrumentation.
3816 *
3817 * By default, locators are not instrumented.
3818 *
3819 * Functions using MPE may be defined and used, but other similar systems
3820 * may be used.
3821 *
3822 * \param[in] log_function pointer to logging function
3823 * \param[in] start_p_comm point to point communication start event number
3824 * \param[in] end_p_comm point to point communication end event number
3825 * \param[in] start_g_comm global communication start event number
3826 * \param[in] end_g_comm global communication end event number
3827 */
3828 /*----------------------------------------------------------------------------*/
3829
3830 void
ple_locator_set_comm_log(ple_locator_log_t * log_function,int start_p_comm,int end_p_comm,int start_g_comm,int end_g_comm)3831 ple_locator_set_comm_log(ple_locator_log_t *log_function,
3832 int start_p_comm,
3833 int end_p_comm,
3834 int start_g_comm,
3835 int end_g_comm)
3836 {
3837 _ple_locator_log_func = log_function;
3838
3839 _ple_locator_log_start_p_comm = start_p_comm;
3840 _ple_locator_log_end_p_comm = end_p_comm;
3841 _ple_locator_log_start_g_comm = start_g_comm;
3842 _ple_locator_log_end_g_comm = end_g_comm;
3843 }
3844
3845 #endif /* defined(PLE_HAVE_MPI) */
3846
3847 /*----------------------------------------------------------------------------*/
3848
3849 #ifdef __cplusplus
3850 }
3851 #endif /* __cplusplus */
3852