1 /* One-sided MPI implementation of Libcaf
2 *
3 * Copyright (c) 2012-2018, Sourcery, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Sourcery, Inc., nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
27
28 /****l* mpi/mpi_caf.c
29 * NAME
30 * mpi_caf
31 * SYNOPSIS
32 * This program implements the LIBCAF_MPI transport layer.
33 ******
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h> /* For memcpy. */
39 #include <stdarg.h> /* For variadic arguments. */
40 #include <float.h> /* For type conversion of floating point numbers. */
41 #ifndef ALLOCA_MISSING
42 #include <alloca.h> /* Assume functionality provided elsewhere if missing */
43 #endif
44 #include <unistd.h>
45 #include <stdint.h> /* For int32_t. */
46 #include <mpi.h>
47 #include <pthread.h>
48 #include <signal.h> /* For raise */
49
50 #ifdef HAVE_MPI_EXT_H
51 #include <mpi-ext.h>
52 #endif
53 #ifdef USE_FAILED_IMAGES
54 #define WITH_FAILED_IMAGES 1
55 #endif
56
57 #include "libcaf.h"
58
59 /* Define GFC_CAF_CHECK to enable run-time checking. */
60 /* #define GFC_CAF_CHECK 1 */
61
62 /* Debug array referencing */
63 static char* caf_array_ref_str[] = {
64 "CAF_ARR_REF_NONE",
65 "CAF_ARR_REF_VECTOR",
66 "CAF_ARR_REF_FULL",
67 "CAF_ARR_REF_RANGE",
68 "CAF_ARR_REF_SINGLE",
69 "CAF_ARR_REF_OPEN_END",
70 "CAF_ARR_REF_OPEN_START"
71 };
72
73 static char* caf_ref_type_str[] = {
74 "CAF_REF_COMPONENT",
75 "CAF_REF_ARRAY",
76 "CAF_REF_STATIC_ARRAY",
77 };
78
79 #ifndef EXTRA_DEBUG_OUTPUT
80 #define dprint(...)
81 #define chk_err(...)
82 #else
83 #define dprint(format, ...) \
84 fprintf(stderr, "%d/%d: %s(%d) " format, \
85 caf_this_image, caf_num_images, \
86 __FUNCTION__, __LINE__, ## __VA_ARGS__)
87 #define chk_err(ierr) \
88 do \
89 { \
90 if (ierr != MPI_SUCCESS) \
91 { \
92 int err_class, err_len; \
93 char err_str[MPI_MAX_ERROR_STRING]; \
94 MPI_Error_class(ierr, &err_class); \
95 MPI_Error_string(ierr, err_str, &err_len); \
96 dprint("MPI-error: err_class=%d ierr=%d [%s]", \
97 err_class, ierr, err_str); \
98 } \
99 } while (0)
100 #endif
101
102 #ifdef GCC_GE_7
103 /* The caf-token of the mpi-library.
104 * Objects of this data structure are owned by the library and are treated as a
105 * black box by the compiler. In the coarray-program the tokens are opaque
106 * pointers, i.e. black boxes.
107 *
108 * For each coarray (allocatable|save|pointer) (scalar|array|event|lock) a
109 * token needs to be present. */
110 typedef struct mpi_caf_token_t
111 {
112 /* The pointer to memory associated to this token's data on the local image.
113 * The compiler uses the address for direct access to the memory of the object
114 * this token is assocated to, i.e., the memory pointed to be local_memptr is
115 * the scalar or array.
116 * When the library is responsible for deleting the memory, then this is the
117 * one to free. */
118 void *memptr;
119 /* The MPI window to associated to the object's data.
120 * The window is used to access the data on other images. In pre GCC_GE_7
121 * installations this was the token. */
122 MPI_Win memptr_win;
123 /* The pointer to the primary array, i.e., to coarrays that are arrays and
124 * not a derived type. */
125 gfc_descriptor_t *desc;
126 } mpi_caf_token_t;
127
128 /* For components of derived type coarrays a slave_token is needed when the
129 * component has the allocatable or pointer attribute. The token is reduced in
130 * size, because the other data is already accessible and has been read from
131 * the remote to fullfill the request.
132 *
133 * TYPE t
134 * +------------------+
135 * | comp * |
136 * | comp_token * |
137 * +------------------+
138 *
139 * TYPE(t) : o struct T // the mpi_caf_token to t
140 * +----------------+
141 * | ... |
142 * +----------------+
143 *
144 * o[2]%.comp // using T to get o of [2]
145 *
146 * +-o-on-image-2----+ "copy" of the requierd parts of o[2] on current image
147 * | 0x4711 | comp * in global_dynamic_window
148 * | 0x2424 | comp_token * of type slave_token
149 * +-----------------+
150 * now all required data is present on the current image to access the remote
151 * components. This nests without limit. */
152 typedef struct mpi_caf_slave_token_t
153 {
154 /* The pointer to the memory associated to this slave token's data on the
155 * local image. When the library is responsible for deleting the memory,
156 * then this is the one to free. And this is the only reason why its stored
157 * here. */
158 void *memptr;
159 /* The pointer to the descriptor or NULL for scalars.
160 * When referencing a remote component array, then the extensions of the array
161 * are needed. Usually the data pointer is at offset zero of the descriptor_t
162 * structure, but we don't rely on it. So store the pointer to the base
163 * address of the descriptor. The descriptor always is in the window of the
164 * master data or the allocated component and is never stored at an address
165 * not accessible by a window. */
166 gfc_descriptor_t *desc;
167 } mpi_caf_slave_token_t;
168
169 #define TOKEN(X) &(((mpi_caf_token_t *) (X))->memptr_win)
170 #else
171 typedef MPI_Win *mpi_caf_token_t;
172 #define TOKEN(X) ((mpi_caf_token_t) (X))
173 #endif
174
175 /* Forward declaration of prototype. */
176
177 static void terminate_internal (int stat_code, int exit_code)
178 __attribute__((noreturn));
179 static void sync_images_internal (int count, int images[], int *stat,
180 char *errmsg, size_t errmsg_len,
181 bool internal);
182 static void error_stop_str (const char *string, size_t len, bool quiet)
183 __attribute__((noreturn));
184
185 /* Global variables. */
186 static int caf_this_image;
187 static int caf_num_images = 0;
188 static int caf_is_finalized = 0;
189 static MPI_Win global_dynamic_win;
190
191 #if MPI_VERSION >= 3
192 MPI_Info mpi_info_same_size;
193 #endif // MPI_VERSION
194
195 /* The size of pointer on this plattform. */
196 static const size_t stdptr_size = sizeof(void *);
197
198 /* Variables needed for syncing images. */
199
200 static int *images_full;
201 MPI_Request *sync_handles;
202 static int *arrived;
203 static const int MPI_TAG_CAF_SYNC_IMAGES = 424242;
204
205 /* Pending puts */
206 #if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
207 typedef struct win_sync {
208 MPI_Win *win;
209 int img;
210 struct win_sync *next;
211 } win_sync;
212
213 static win_sync *last_elem = NULL;
214 static win_sync *pending_puts = NULL;
215 #endif
216
217 /* Linked list of static coarrays registered. Do not expose to public in the
218 * header, because it is implementation specific. */
219 struct caf_allocated_tokens_t
220 {
221 caf_token_t token;
222 struct caf_allocated_tokens_t *prev;
223 } *caf_allocated_tokens = NULL;
224
225 #ifdef GCC_GE_7
226 /* Linked list of slave coarrays registered. */
227 struct caf_allocated_slave_tokens_t
228 {
229 mpi_caf_slave_token_t *token;
230 struct caf_allocated_slave_tokens_t *prev;
231 } *caf_allocated_slave_tokens = NULL;
232 #endif
233
234 /* Image status variable */
235 static int img_status = 0;
236 static MPI_Win *stat_tok;
237
238 /* Active messages variables */
239 char **buff_am;
240 MPI_Status *s_am;
241 MPI_Request *req_am;
242 MPI_Datatype *dts;
243 char *msgbody;
244 pthread_mutex_t lock_am;
245 int done_am = 0;
246
247 char err_buffer[MPI_MAX_ERROR_STRING];
248
249 /* All CAF runtime calls should use this comm instead of MPI_COMM_WORLD for
250 * interoperability purposes. */
251 MPI_Comm CAF_COMM_WORLD;
252
253 static caf_teams_list *teams_list = NULL;
254 static caf_used_teams_list *used_teams = NULL;
255
256 /* Emitted when a theorectically unreachable part is reached. */
257 const char unreachable[] = "Fatal error: unreachable alternative found.\n";
258
259 #ifdef WITH_FAILED_IMAGES
260 /* The stati of the other images. image_stati is an array of size
261 * caf_num_images at the beginning the status of each image is noted here where
262 * the index is the image number minus one. */
263 int *image_stati;
264
265 /* This gives the number of all images that are known to have failed. */
266 int num_images_failed = 0;
267
268 /* This is the number of all images that are known to have stopped. */
269 int num_images_stopped = 0;
270
271 /* The async. request-handle to all participating images. */
272 MPI_Request alive_request;
273
274 /* This dummy is used for the alive request. Its content is arbitrary and
275 * never read. Its just a memory location where one could put something,
276 * which is never done. */
277 int alive_dummy;
278
279 /* The mpi error-handler object associate to CAF_COMM_WORLD. */
280 MPI_Errhandler failed_CAF_COMM_mpi_err_handler;
281
282 /* The monitor comm for detecting failed images. We can not attach the monitor
283 * to CAF_COMM_WORLD or the messages send by sync images would be caught by the
284 * monitor. */
285 MPI_Comm alive_comm;
286
287 /* Set when entering a sync_images_internal, to prevent the error handler from
288 * eating our messages. */
289 bool no_stopped_images_check_in_errhandler = 0;
290 #endif
291
292 /* For MPI interoperability, allow external initialization
293 * (and thus finalization) of MPI. */
294 bool caf_owns_mpi = false;
295
296 /* Foo function pointers for coreduce.
297 * The handles when arguments are passed by reference. */
298 int (*int8_t_by_reference)(void *, void *);
299 int (*int16_t_by_reference)(void *, void *);
300 int (*int32_t_by_reference)(void *, void *);
301 int (*int64_t_by_reference)(void *, void *);
302 float (*float_by_reference)(void *, void *);
303 double (*double_by_reference)(void *, void *);
304 /* Strings are always passed by reference. */
305 void (*char_by_reference)(void *, int, void *, void *, int, int);
306 /* The handles when arguments are passed by value. */
307 int8_t (*int8_t_by_value)(int8_t, int8_t);
308 int16_t (*int16_t_by_value)(int16_t, int16_t);
309 int (*int32_t_by_value)(int32_t, int32_t);
310 int64_t (*int64_t_by_value)(int64_t, int64_t);
311 float (*float_by_value)(float, float);
312 double (*double_by_value)(double, double);
313
314 /* Define shortcuts for Win_lock and _unlock depending on whether the primitives
315 * are available in the MPI implementation. When they are not available the
316 * shortcut is expanded to nothing by the preprocessor else to the API call.
317 * This prevents having #ifdef #else #endif constructs strewn all over the code
318 * reducing its readability. */
319 #ifdef CAF_MPI_LOCK_UNLOCK
320 #define CAF_Win_lock(type, img, win) MPI_Win_lock (type, img, 0, win)
321 #define CAF_Win_unlock(img, win) MPI_Win_unlock (img, win)
322 #define CAF_Win_lock_all(win)
323 #define CAF_Win_unlock_all(win)
324 #else // CAF_MPI_LOCK_UNLOCK
325 #define CAF_Win_lock(type, img, win)
326 #define CAF_Win_unlock(img, win) MPI_Win_flush (img, win)
327 #if MPI_VERSION >= 3
328 #define CAF_Win_lock_all(win) MPI_Win_lock_all (MPI_MODE_NOCHECK, win)
329 #else
330 #define CAF_Win_lock_all(win)
331 #endif
332 #define CAF_Win_unlock_all(win) MPI_Win_unlock_all (win)
333 #endif // CAF_MPI_LOCK_UNLOCK
334
335 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
336
337 #if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
explicit_flush()338 void explicit_flush()
339 {
340 win_sync *w = pending_puts, *t;
341 MPI_Win *p;
342 int ierr;
343 while (w != NULL)
344 {
345 p = w->win;
346 ierr = MPI_Win_flush(w->img,*p); chk_err(ierr);
347 t = w;
348 w = w->next;
349 free(t);
350 }
351 last_elem = NULL;
352 pending_puts = NULL;
353 }
354 #endif
355
356 #ifdef HELPER
helperFunction()357 void helperFunction()
358 {
359 int i = 0, flag = 0, msgid = 0, ierr;
360 int ndim = 0, position = 0;
361
362 s_am = calloc(caf_num_images, sizeof(MPI_Status));
363 req_am = calloc(caf_num_images, sizeof(MPI_Request));
364 dts = calloc(caf_num_images, sizeof(MPI_Datatype));
365
366 for (i = 0; i < caf_num_images; i++)
367 {
368 ierr = MPI_Irecv(buff_am[i], 1000, MPI_PACKED, i, 1, CAF_COMM_WORLD,
369 &req_am[i]); chk_err(ierr);
370 }
371
372 while (1)
373 {
374 pthread_mutex_lock(&lock_am);
375 for (i = 0; i < caf_num_images; i++)
376 {
377 if (!caf_is_finalized)
378 {
379 ierr = MPI_Test(&req_am[i], &flag, &s_am[i]); chk_err(ierr);
380 if (flag == 1)
381 {
382 position = 0;
383 ierr = MPI_Unpack(buff_am[i], 1000, &position, &msgid, 1, MPI_INT,
384 CAF_COMM_WORLD); chk_err(ierr);
385 /* msgid=2 was initially assigned to strided transfers,
386 * it can be reused
387 * Strided transfers Msgid=2
388 * You can add you own function */
389
390 if (msgid == 2)
391 {
392 msgid = 0; position = 0;
393 }
394 ierr = MPI_Irecv(buff_am[i], 1000, MPI_PACKED, i, 1, CAF_COMM_WORLD,
395 &req_am[i]); chk_err(ierr);
396 flag = 0;
397 }
398 }
399 else
400 {
401 done_am = 1;
402 pthread_mutex_unlock(&lock_am);
403 return;
404 }
405 }
406 pthread_mutex_unlock(&lock_am);
407 }
408 }
409 #endif
410
411
412 /* Keep in sync with single.c. */
413
414 static void
caf_runtime_error(const char * message,...)415 caf_runtime_error (const char *message, ...)
416 {
417 va_list ap;
418 fprintf(stderr, "Fortran runtime error on image %d: ", caf_this_image);
419 va_start(ap, message);
420 vfprintf(stderr, message, ap);
421 va_end(ap);
422 fprintf(stderr, "\n");
423
424 /* FIXME: Shutdown the Fortran RTL to flush the buffer. PR 43849.
425 * FIXME: Do some more effort than just to abort. */
426 // MPI_Finalize();
427
428 /* Should be unreachable, but to make sure also call exit. */
429 exit(EXIT_FAILURE);
430 }
431
432 /* Forward declaration of the feature unsupported message for failed images
433 * functions. */
434 static void
435 unsupported_fail_images_message(const char * functionname);
436
437 /* Forward declaration of the feature unimplemented message for allocatable
438 * components. */
439 static void
440 unimplemented_alloc_comps_message(const char * functionname);
441
442 static void
locking_atomic_op(MPI_Win win,int * value,int newval,int compare,int image_index,size_t index)443 locking_atomic_op(MPI_Win win, int *value, int newval,
444 int compare, int image_index, size_t index)
445 {
446 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image_index - 1, win);
447 int ierr = MPI_Compare_and_swap(&newval, &compare,value, MPI_INT,
448 image_index - 1, index * sizeof(int), win);
449 chk_err(ierr);
450 CAF_Win_unlock(image_index - 1, win);
451 }
452
453
454 /* Define a helper to check whether the image at the given index is healthy,
455 * i.e., it hasn't failed. */
456 #ifdef WITH_FAILED_IMAGES
457 #define check_image_health(image_index, stat) \
458 if (image_stati[image_index - 1] == STAT_FAILED_IMAGE) \
459 { \
460 if (stat == NULL) terminate_internal (STAT_FAILED_IMAGE, 0); \
461 *stat = STAT_FAILED_IMAGE; \
462 return; \
463 }
464 #else
465 #define check_image_health(image_index, stat)
466 #endif
467
468 #ifdef WITH_FAILED_IMAGES
469 /* Handle failed image's errors and try to recover the remaining process to
470 * allow the user to detect an image fail and exit gracefully. */
471 static void
failed_stopped_errorhandler_function(MPI_Comm * pcomm,int * perr,...)472 failed_stopped_errorhandler_function (MPI_Comm* pcomm, int* perr, ...)
473 {
474 MPI_Comm comm, shrunk, newcomm;
475 int num_failed_in_group, i, err, ierr;
476 MPI_Group comm_world_group, failed_group;
477 int *ranks_of_failed_in_comm_world, *ranks_failed;
478 int ns, srank, crank, rc, flag, drank, newrank;
479 bool stopped = false;
480
481 comm = *pcomm;
482
483 MPI_Error_class(*perr, &err);
484 if (err != MPIX_ERR_PROC_FAILED && err != MPIX_ERR_REVOKED)
485 {
486 /* We can handle PROC_FAILED and REVOKED ones only. */
487 char errstr[MPI_MAX_ERROR_STRING];
488 int errlen;
489 MPI_Error_string(err, errstr, &errlen);
490 /* We can't use caf_runtime_error here, because that would exit, which
491 * means only the one process will stop, but we need to stop MPI
492 * completely, which can be done by calling MPI_Abort(). */
493 fprintf(stderr,
494 "Fortran runtime error on image #%d:\nMPI error: '%s'.\n",
495 caf_this_image, errstr);
496 MPI_Abort(*pcomm, err);
497 }
498
499 dprint("(error = %d)\n", err);
500
501 ierr = MPIX_Comm_failure_ack(comm); chk_err(ierr);
502 ierr = MPIX_Comm_failure_get_acked(comm, &failed_group); chk_err(ierr);
503 ierr = MPI_Group_size(failed_group, &num_failed_in_group); chk_err(ierr);
504
505 dprint("%d images failed.\n", num_failed_in_group);
506 if (num_failed_in_group <= 0)
507 {
508 *perr = MPI_SUCCESS;
509 return;
510 }
511 if (num_failed_in_group > caf_num_images)
512 {
513 *perr = MPI_SUCCESS;
514 return;
515 }
516
517 ierr = MPI_Comm_group(comm, &comm_world_group); chk_err(ierr);
518 ranks_of_failed_in_comm_world =
519 (int *) alloca(sizeof(int) * num_failed_in_group);
520 ranks_failed = (int *) alloca(sizeof(int) * num_failed_in_group);
521 for (i = 0; i < num_failed_in_group; ++i)
522 {
523 ranks_failed[i] = i;
524 }
525 /* Now translate the ranks of the failed images into communicator world. */
526 ierr = MPI_Group_translate_ranks(failed_group, num_failed_in_group,
527 ranks_failed, comm_world_group,
528 ranks_of_failed_in_comm_world);
529 chk_err(ierr);
530
531 num_images_failed += num_failed_in_group;
532
533 if (!no_stopped_images_check_in_errhandler)
534 {
535 int buffer, flag;
536 MPI_Request req;
537 MPI_Status request_status;
538 dprint("Checking for stopped images.\n");
539 ierr = MPI_Irecv(&buffer, 1, MPI_INT, MPI_ANY_SOURCE,
540 MPI_TAG_CAF_SYNC_IMAGES, CAF_COMM_WORLD, &req);
541 chk_err(ierr);
542 if (ierr == MPI_SUCCESS)
543 {
544 ierr = MPI_Test(&req, &flag, &request_status); chk_err(ierr);
545 if (flag)
546 {
547 // Received a result
548 if (buffer == STAT_STOPPED_IMAGE)
549 {
550 dprint("Image #%d found stopped.\n", request_status.MPI_SOURCE);
551 stopped = true;
552 if (image_stati[request_status.MPI_SOURCE] == 0)
553 ++num_images_stopped;
554 image_stati[request_status.MPI_SOURCE] = STAT_STOPPED_IMAGE;
555 }
556 }
557 else
558 {
559 dprint("No stopped images found.\n");
560 ierr = MPI_Cancel(&req); chk_err(ierr);
561 }
562 }
563 else
564 {
565 int err;
566 MPI_Error_class(ierr, &err);
567 dprint("Error on checking for stopped images %d.\n", err);
568 }
569 }
570
571 /* TODO: Consider whether removing the failed image from images_full will be
572 * necessary. This is more or less politics. */
573 for (i = 0; i < num_failed_in_group; ++i)
574 {
575 if (ranks_of_failed_in_comm_world[i] >= 0
576 && ranks_of_failed_in_comm_world[i] < caf_num_images)
577 {
578 if (image_stati[ranks_of_failed_in_comm_world[i]] == 0)
579 image_stati[ranks_of_failed_in_comm_world[i]] = STAT_FAILED_IMAGE;
580 }
581 else
582 {
583 dprint("Rank of failed image %d out of range of images 0..%d.\n",
584 ranks_of_failed_in_comm_world[i], caf_num_images);
585 }
586 }
587
588 redo:
589 dprint("Before shrink. \n");
590 ierr = MPIX_Comm_shrink(*pcomm, &shrunk);
591 dprint("After shrink, rc = %d.\n", ierr);
592 ierr = MPI_Comm_set_errhandler(shrunk, failed_CAF_COMM_mpi_err_handler);
593 chk_err(ierr);
594 ierr = MPI_Comm_size(shrunk, &ns); chk_err(ierr);
595 ierr = MPI_Comm_rank(shrunk, &srank); chk_err(ierr);
596 ierr = MPI_Comm_rank(*pcomm, &crank); chk_err(ierr);
597
598 dprint("After getting ranks, ns = %d, srank = %d, crank = %d.\n",
599 ns, srank, crank);
600
601 /* Split does the magic: removing spare processes and reordering ranks
602 * so that all surviving processes remain at their former place */
603 rc = MPI_Comm_split(shrunk, (crank < 0) ? MPI_UNDEFINED : 1, crank, &newcomm);
604 ierr = MPI_Comm_rank(newcomm, &newrank); chk_err(ierr);
605 dprint("After split, rc = %d, rank = %d.\n", rc, newrank);
606 flag = (rc == MPI_SUCCESS);
607 /* Split or some of the communications above may have failed if
608 * new failures have disrupted the process: we need to
609 * make sure we succeeded at all ranks, or retry until it works. */
610 flag = MPIX_Comm_agree(newcomm, &flag);
611 dprint("After agree, flag = %d.\n", flag);
612
613 ierr = MPI_Comm_rank(newcomm, &drank); chk_err(ierr);
614 dprint("After rank, drank = %d.\n", drank);
615
616 ierr = MPI_Comm_free(&shrunk); chk_err(ierr);
617 if (MPI_SUCCESS != flag)
618 {
619 if (MPI_SUCCESS == rc)
620 {
621 ierr = MPI_Comm_free(&newcomm); chk_err(ierr);
622 }
623 goto redo;
624 }
625
626 {
627 int cmpres;
628 ierr = MPI_Comm_compare(*pcomm, CAF_COMM_WORLD, &cmpres); chk_err(ierr);
629 dprint("Comm_compare(*comm, CAF_COMM_WORLD, res = %d) = %d.\n",
630 cmpres, ierr);
631 ierr = MPI_Comm_compare(*pcomm, alive_comm, &cmpres); chk_err(ierr);
632 dprint("Comm_compare(*comm, alive_comm, res = %d) = %d.\n",
633 cmpres, ierr);
634 if (cmpres == MPI_CONGRUENT)
635 {
636 ierr = MPI_Win_detach(*stat_tok, &img_status); chk_err(ierr);
637 dprint("detached win img_status.\n");
638 ierr = MPI_Win_free(stat_tok); chk_err(ierr);
639 dprint("freed win img_status.\n");
640 ierr = MPI_Win_create(&img_status, sizeof(int), 1, mpi_info_same_size,
641 newcomm, stat_tok); chk_err(ierr);
642 dprint("(re-)created win img_status.\n");
643 CAF_Win_lock_all(*stat_tok);
644 dprint("Win_lock_all on img_status.\n");
645 }
646 }
647 /* Also free the old communicator before replacing it. */
648 ierr = MPI_Comm_free(pcomm); chk_err(ierr);
649 *pcomm = newcomm;
650
651 *perr = stopped ? STAT_STOPPED_IMAGE : STAT_FAILED_IMAGE;
652 }
653 #endif
654
mutex_lock(MPI_Win win,int image_index,size_t index,int * stat,int * acquired_lock,char * errmsg,size_t errmsg_len)655 void mutex_lock(MPI_Win win, int image_index, size_t index, int *stat,
656 int *acquired_lock, char *errmsg, size_t errmsg_len)
657 {
658 const char msg[] = "Already locked";
659 #if MPI_VERSION >= 3
660 int value = 0, compare = 0, newval = caf_this_image, ierr = 0, i = 0;
661 #ifdef WITH_FAILED_IMAGES
662 int flag, check_failure = 100, zero = 0;
663 #endif
664
665 if (stat != NULL)
666 *stat = 0;
667
668 #ifdef WITH_FAILED_IMAGES
669 ierr = MPI_Test(&alive_request, &flag, MPI_STATUS_IGNORE); chk_err(ierr);
670 #endif
671
672 locking_atomic_op(win, &value, newval, compare, image_index, index);
673
674 if (value == caf_this_image && image_index == caf_this_image)
675 goto stat_error;
676
677 if (acquired_lock != NULL)
678 {
679 if (value == 0)
680 *acquired_lock = 1;
681 else
682 *acquired_lock = 0;
683 return;
684 }
685
686 while (value != 0)
687 {
688 ++i;
689 #ifdef WITH_FAILED_IMAGES
690 if (i == check_failure)
691 {
692 i = 1;
693 ierr = MPI_Test(&alive_request, &flag, MPI_STATUS_IGNORE); chk_err(ierr);
694 }
695 #endif
696
697 locking_atomic_op(win, &value, newval, compare, image_index, index);
698 #ifdef WITH_FAILED_IMAGES
699 if (image_stati[value] == STAT_FAILED_IMAGE)
700 {
701 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image_index - 1, win);
702 /* MPI_Fetch_and_op(&zero, &newval, MPI_INT, image_index - 1,
703 * index * sizeof(int), MPI_REPLACE, win); */
704 ierr = MPI_Compare_and_swap(&zero, &value, &newval, MPI_INT,
705 image_index - 1, index * sizeof(int), win);
706 chk_err(ierr);
707 CAF_Win_unlock(image_index - 1, win);
708 break;
709 }
710 #else
711 usleep(caf_this_image * i);
712 #endif
713 }
714
715 if (stat)
716 *stat = ierr;
717 else if (ierr == STAT_FAILED_IMAGE)
718 terminate_internal(ierr, 0);
719
720 return;
721
722 stat_error:
723 if (errmsg != NULL)
724 {
725 memset(errmsg,' ',errmsg_len);
726 memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
727 }
728
729 if (stat != NULL)
730 *stat = 99;
731 else
732 terminate_internal(99, 1);
733 #else // MPI_VERSION
734 #warning Locking for MPI-2 is not implemented
735 printf("Locking for MPI-2 is not supported, "
736 "please update your MPI implementation\n");
737 #endif // MPI_VERSION
738 }
739
mutex_unlock(MPI_Win win,int image_index,size_t index,int * stat,char * errmsg,size_t errmsg_len)740 void mutex_unlock(MPI_Win win, int image_index, size_t index, int *stat,
741 char* errmsg, size_t errmsg_len)
742 {
743 const char msg[] = "Variable is not locked";
744 if (stat != NULL)
745 *stat = 0;
746 #if MPI_VERSION >= 3
747 int value = 1, ierr = 0, newval = 0, flag;
748 #ifdef WITH_FAILED_IMAGES
749 ierr = MPI_Test(&alive_request, &flag, MPI_STATUS_IGNORE); chk_err(ierr);
750 #endif
751
752 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image_index - 1, win);
753 ierr = MPI_Fetch_and_op(&newval, &value, MPI_INT, image_index - 1,
754 index * sizeof(int), MPI_REPLACE, win); chk_err(ierr);
755 ierr = CAF_Win_unlock(image_index - 1, win); chk_err(ierr);
756
757 /* Temporarily commented */
758 /* if (value == 0)
759 * goto stat_error; */
760
761 if (stat)
762 *stat = ierr;
763 else if (ierr == STAT_FAILED_IMAGE)
764 terminate_internal(ierr, 0);
765
766 return;
767
768 stat_error:
769 if (errmsg != NULL)
770 {
771 memset(errmsg,' ',errmsg_len);
772 memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
773 }
774 if (stat != NULL)
775 *stat = 99;
776 else
777 terminate_internal(99, 1);
778 #else // MPI_VERSION
779 #warning Locking for MPI-2 is not implemented
780 printf("Locking for MPI-2 is not supported, "
781 "please update your MPI implementation\n");
782 #endif // MPI_VERSION
783 }
784
785 /* Initialize coarray program. This routine assumes that no other
786 * MPI initialization happened before. */
787
788 void
PREFIX(init)789 PREFIX(init) (int *argc, char ***argv)
790 {
791 int flag;
792 if (caf_num_images == 0)
793 {
794 int ierr = 0, i = 0, j = 0, rc, prov_lev = 0;
795 int is_init = 0, prior_thread_level = MPI_THREAD_FUNNELED;
796 ierr = MPI_Initialized(&is_init); chk_err(ierr);
797
798 if (is_init)
799 {
800 ierr = MPI_Query_thread(&prior_thread_level); chk_err(ierr);
801 }
802 #ifdef HELPER
803 if (is_init)
804 {
805 prov_lev = prior_thread_level;
806 caf_owns_mpi = false;
807 }
808 else
809 {
810 ierr = MPI_Init_thread(argc, argv, MPI_THREAD_MULTIPLE, &prov_lev);
811 chk_err(ierr);
812 caf_owns_mpi = true;
813 }
814
815 if (caf_this_image == 0 && MPI_THREAD_MULTIPLE != prov_lev)
816 caf_runtime_error("MPI_THREAD_MULTIPLE is not supported: %d", prov_lev);
817 #else
818 if (is_init)
819 caf_owns_mpi = false;
820 else
821 {
822 ierr = MPI_Init_thread(argc, argv, prior_thread_level, &prov_lev);
823 chk_err(ierr);
824 caf_owns_mpi = true;
825 if (caf_this_image == 0 && MPI_THREAD_FUNNELED > prov_lev)
826 caf_runtime_error("MPI_THREAD_FUNNELED is not supported: %d %d", MPI_THREAD_FUNNELED, prov_lev);
827 }
828 #endif
829 if (unlikely ((ierr != MPI_SUCCESS)))
830 caf_runtime_error("Failure when initializing MPI: %d", ierr);
831
832 /* Duplicate MPI_COMM_WORLD so that no CAF internal functions use it.
833 * This is critical for MPI-interoperability. */
834 rc = MPI_Comm_dup(MPI_COMM_WORLD, &CAF_COMM_WORLD);
835 #ifdef WITH_FAILED_IMAGES
836 flag = (MPI_SUCCESS == rc);
837 rc = MPIX_Comm_agree(MPI_COMM_WORLD, &flag);
838 if (rc != MPI_SUCCESS)
839 {
840 dprint("MPIX_Comm_agree(flag = %d) = %d.\n", flag, rc);
841 fflush(stderr);
842 MPI_Abort(MPI_COMM_WORLD, 10000);
843 }
844 MPI_Barrier(MPI_COMM_WORLD);
845 #endif
846
847 ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images); chk_err(ierr);
848 ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image); chk_err(ierr);
849
850 ++caf_this_image;
851 caf_is_finalized = 0;
852
853 /* BEGIN SYNC IMAGE preparation
854 * Prepare memory for syncing images. */
855 images_full = (int *) calloc(caf_num_images - 1, sizeof(int));
856 for (i = 1, j = 0; i <= caf_num_images; ++i)
857 {
858 if (i != caf_this_image)
859 images_full[j++] = i;
860 }
861
862 arrived = calloc(caf_num_images, sizeof(int));
863 sync_handles = malloc(caf_num_images * sizeof(MPI_Request));
864 /* END SYNC IMAGE preparation. */
865
866 stat_tok = malloc(sizeof(MPI_Win));
867
868 teams_list = (caf_teams_list *)calloc(1, sizeof(caf_teams_list));
869 teams_list->team_id = -1;
870 MPI_Comm *tmp_comm = (MPI_Comm *)calloc(1, sizeof(MPI_Comm));
871 *tmp_comm = CAF_COMM_WORLD;
872 teams_list->team = tmp_comm;
873 teams_list->prev = NULL;
874 used_teams = (caf_used_teams_list *)calloc(1, sizeof(caf_used_teams_list));
875 used_teams->team_list_elem = teams_list;
876 used_teams->prev = NULL;
877
878 #ifdef WITH_FAILED_IMAGES
879 MPI_Comm_dup(MPI_COMM_WORLD, &alive_comm);
880 /* Handling of failed/stopped images is done by setting an error handler
881 * on a asynchronous request to each other image. For a failing image
882 * the request will trigger the call of the error handler thus allowing
883 * each other image to handle the failed/stopped image. */
884 ierr = MPI_Comm_create_errhandler(failed_stopped_errorhandler_function,
885 &failed_CAF_COMM_mpi_err_handler);
886 chk_err(ierr);
887 ierr = MPI_Comm_set_errhandler(CAF_COMM_WORLD,
888 failed_CAF_COMM_mpi_err_handler);
889 chk_err(ierr);
890 ierr = MPI_Comm_set_errhandler(alive_comm,
891 failed_CAF_COMM_mpi_err_handler);
892 chk_err(ierr);
893 ierr = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
894 chk_err(ierr);
895
896 ierr = MPI_Irecv(&alive_dummy, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG,
897 alive_comm, &alive_request); chk_err(ierr);
898
899 image_stati = (int *) calloc(caf_num_images, sizeof(int));
900 #endif
901
902 #if MPI_VERSION >= 3
903 ierr = MPI_Info_create(&mpi_info_same_size); chk_err(ierr);
904 ierr = MPI_Info_set(mpi_info_same_size, "same_size", "true"); chk_err(ierr);
905
906 /* Setting img_status */
907 ierr = MPI_Win_create(&img_status, sizeof(int), 1, mpi_info_same_size,
908 CAF_COMM_WORLD, stat_tok); chk_err(ierr);
909 CAF_Win_lock_all(*stat_tok);
910 #else
911 ierr = MPI_Win_create(&img_status, sizeof(int), 1, MPI_INFO_NULL,
912 CAF_COMM_WORLD, stat_tok); chk_err(ierr);
913 #endif // MPI_VERSION
914
915 /* Create the dynamic window to allow images to asyncronously attach
916 * memory. */
917 ierr = MPI_Win_create_dynamic(MPI_INFO_NULL, CAF_COMM_WORLD,
918 &global_dynamic_win); chk_err(ierr);
919 CAF_Win_lock_all(global_dynamic_win);
920 }
921 }
922
923
924 /* Internal finalize of coarray program. */
925
926 void
finalize_internal(int status_code)927 finalize_internal(int status_code)
928 {
929 int ierr;
930 dprint("(status_code = %d)\n", status_code);
931
932 #ifdef WITH_FAILED_IMAGES
933 no_stopped_images_check_in_errhandler = true;
934 ierr = MPI_Win_flush_all(*stat_tok); chk_err(ierr);
935 #endif
936 /* For future security enclose setting img_status in a lock. */
937 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *stat_tok);
938 if (status_code == 0)
939 {
940 img_status = STAT_STOPPED_IMAGE;
941 #ifdef WITH_FAILED_IMAGES
942 image_stati[caf_this_image - 1] = STAT_STOPPED_IMAGE;
943 #endif
944 }
945 else
946 {
947 img_status = status_code;
948 #ifdef WITH_FAILED_IMAGES
949 image_stati[caf_this_image - 1] = status_code;
950 #endif
951 }
952 CAF_Win_unlock(caf_this_image - 1, *stat_tok);
953
954 /* Announce to all other images, that this one has changed its execution
955 * status. */
956 for (int i = 0; i < caf_num_images - 1; ++i)
957 {
958 ierr = MPI_Send(&img_status, 1, MPI_INT, images_full[i] - 1,
959 MPI_TAG_CAF_SYNC_IMAGES, CAF_COMM_WORLD); chk_err(ierr);
960 }
961
962 #ifdef WITH_FAILED_IMAGES
963 /* Terminate the async request before revoking the comm, or we will get
964 * triggered by the errorhandler, which we don't want here anymore. */
965 ierr = MPI_Cancel(&alive_request); chk_err(ierr);
966
967 if (status_code == 0)
968 {
969 /* In finalization do not report stopped or failed images any more. */
970 ierr = MPI_Errhandler_set(CAF_COMM_WORLD, MPI_ERRORS_RETURN); chk_err(ierr);
971 ierr = MPI_Errhandler_set(alive_comm, MPI_ERRORS_RETURN); chk_err(ierr);
972 /* Only add a conventional barrier to prevent images rom quitting too
973 * early, when this images is not failing. */
974 dprint("Before MPI_Barrier(CAF_COMM_WORLD)\n");
975 ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
976 dprint("After MPI_Barrier(CAF_COMM_WORLD) = %d\n", ierr);
977 }
978 else
979 return;
980 #else
981 /* Add a conventional barrier to prevent images from quitting too early. */
982 if (status_code == 0)
983 {
984 ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
985 }
986 else
987 /* Without failed images support, but a given status_code, we need to
988 * return to the caller, or we will hang in the following instead of
989 * terminating the program. */
990 return;
991 #endif
992
993 #ifdef GCC_GE_7
994 struct caf_allocated_slave_tokens_t
995 *cur_stok = caf_allocated_slave_tokens,
996 *prev_stok = NULL;
997 CAF_Win_unlock_all(global_dynamic_win);
998 while (cur_stok)
999 {
1000 prev_stok = cur_stok->prev;
1001 ierr = MPI_Win_detach(global_dynamic_win, cur_stok); chk_err(ierr);
1002 if (cur_stok->token->memptr)
1003 {
1004 ierr = MPI_Win_detach(global_dynamic_win, cur_stok->token->memptr);
1005 chk_err(ierr);
1006 free(cur_stok->token->memptr);
1007 }
1008 free(cur_stok->token);
1009 free(cur_stok);
1010 cur_stok = prev_stok;
1011 }
1012 #else
1013 CAF_Win_unlock_all(global_dynamic_win);
1014 #endif
1015
1016 dprint("Freed all slave tokens.\n");
1017 struct caf_allocated_tokens_t
1018 *cur_tok = caf_allocated_tokens,
1019 *prev = caf_allocated_tokens;
1020 MPI_Win *p;
1021
1022 while (cur_tok)
1023 {
1024 prev = cur_tok->prev;
1025 p = TOKEN(cur_tok->token);
1026 if (p != NULL)
1027 CAF_Win_unlock_all(*p);
1028 #ifdef GCC_GE_7
1029 /* Unregister the window to the descriptors when freeing the token. */
1030 dprint("MPI_Win_free(p);\n");
1031 ierr = MPI_Win_free(p); chk_err(ierr);
1032 free(cur_tok->token);
1033 #else // GCC_GE_7
1034 ierr = MPI_Win_free(p); chk_err(ierr);
1035 #endif // GCC_GE_7
1036 free(cur_tok);
1037 cur_tok = prev;
1038 }
1039 #if MPI_VERSION >= 3
1040 ierr = MPI_Info_free(&mpi_info_same_size); chk_err(ierr);
1041 #endif // MPI_VERSION
1042
1043 /* Free the global dynamic window. */
1044 ierr = MPI_Win_free(&global_dynamic_win); chk_err(ierr);
1045 #ifdef WITH_FAILED_IMAGES
1046 if (status_code == 0)
1047 {
1048 dprint("before Win_unlock_all.\n");
1049 CAF_Win_unlock_all(*stat_tok);
1050 dprint("before Win_free(stat_tok)\n");
1051 ierr = MPI_Win_free(stat_tok); chk_err(ierr);
1052 dprint("before Comm_free(CAF_COMM_WORLD)\n");
1053 ierr = MPI_Comm_free(&CAF_COMM_WORLD); chk_err(ierr);
1054 ierr = MPI_Comm_free(&alive_comm); chk_err(ierr);
1055 dprint("after Comm_free(CAF_COMM_WORLD)\n");
1056 }
1057
1058 ierr = MPI_Errhandler_free(&failed_CAF_COMM_mpi_err_handler); chk_err(ierr);
1059
1060 /* Only call Finalize if CAF runtime Initialized MPI. */
1061 if (caf_owns_mpi)
1062 {
1063 ierr = MPI_Finalize(); chk_err(ierr);
1064 }
1065 #else
1066 ierr = MPI_Comm_free(&CAF_COMM_WORLD); chk_err(ierr);
1067
1068 CAF_Win_unlock_all(*stat_tok);
1069 ierr = MPI_Win_free(stat_tok); chk_err(ierr);
1070
1071 /* Only call Finalize if CAF runtime Initialized MPI. */
1072 if (caf_owns_mpi)
1073 {
1074 ierr = MPI_Finalize(); chk_err(ierr);
1075 }
1076 #endif
1077
1078 pthread_mutex_lock(&lock_am);
1079 caf_is_finalized = 1;
1080 pthread_mutex_unlock(&lock_am);
1081 free(sync_handles);
1082 dprint("Finalisation done!!!\n");
1083 }
1084
1085
1086 /* Finalize coarray program. */
1087 void
PREFIX(finalize)1088 PREFIX(finalize) (void)
1089 {
1090 finalize_internal(0);
1091 }
1092
1093 /* TODO: This is interface is violating the F2015 standard, but not the gfortran
1094 * API. Fix it (the fortran API). */
1095 int
PREFIX(this_image)1096 PREFIX(this_image) (int distance __attribute__((unused)))
1097 {
1098 return caf_this_image;
1099 }
1100
1101 /* TODO: This is interface is violating the F2015 standard, but not the gfortran
1102 * API. Fix it (the fortran API). */
1103 int
PREFIX(num_images)1104 PREFIX(num_images) (int distance __attribute__((unused)),
1105 int failed __attribute__((unused)))
1106 {
1107 return caf_num_images;
1108 }
1109
1110 #ifdef GCC_GE_7
1111 /* Register an object with the coarray library creating a token where
1112 * necessary/requested.
1113 * See the ABI-documentation of gfortran for the expected behavior.
1114 * Contrary to this expected behavior is this routine not registering memory
1115 * in the descriptor, that is already present. I.e., when the compiler
1116 * expects the library to allocate the memory for an object in desc, then
1117 * its data_ptr is NULL. This is still missing here. At the moment the
1118 * compiler also does not make use of it, but it is contrary to the
1119 * documentation. */
1120 void
PREFIX(register)1121 PREFIX(register) (size_t size, caf_register_t type, caf_token_t *token,
1122 gfc_descriptor_t *desc, int *stat, char *errmsg,
1123 charlen_t errmsg_len)
1124 {
1125 void *mem = NULL;
1126 size_t actual_size;
1127 int l_var = 0, *init_array = NULL, ierr;
1128
1129 if (unlikely(caf_is_finalized))
1130 goto error;
1131
1132 /* Start GASNET if not already started. */
1133 if (caf_num_images == 0)
1134 PREFIX(init) (NULL, NULL);
1135
1136 if (type == CAF_REGTYPE_LOCK_STATIC || type == CAF_REGTYPE_LOCK_ALLOC ||
1137 type == CAF_REGTYPE_CRITICAL || type == CAF_REGTYPE_EVENT_STATIC ||
1138 type == CAF_REGTYPE_EVENT_ALLOC)
1139 {
1140 actual_size = size * sizeof(int);
1141 l_var = 1;
1142 }
1143 else
1144 actual_size = size;
1145
1146 switch (type)
1147 {
1148 case CAF_REGTYPE_COARRAY_ALLOC_REGISTER_ONLY:
1149 case CAF_REGTYPE_COARRAY_ALLOC_ALLOCATE_ONLY:
1150 {
1151 /* Create or allocate a slave token. */
1152 mpi_caf_slave_token_t *slave_token;
1153 #ifdef EXTRA_DEBUG_OUTPUT
1154 MPI_Aint mpi_address;
1155 #endif
1156 CAF_Win_unlock_all(global_dynamic_win);
1157 if (type == CAF_REGTYPE_COARRAY_ALLOC_REGISTER_ONLY)
1158 {
1159 *token = calloc(1, sizeof(mpi_caf_slave_token_t));
1160 slave_token = (mpi_caf_slave_token_t *)(*token);
1161 ierr = MPI_Win_attach(global_dynamic_win, *token,
1162 sizeof(mpi_caf_slave_token_t)); chk_err(ierr);
1163 #ifdef EXTRA_DEBUG_OUTPUT
1164 ierr = MPI_Get_address(*token, &mpi_address); chk_err(ierr);
1165 #endif
1166 dprint("Attach slave token %p (mpi-address: %zd) to "
1167 "global_dynamic_window = %d\n",
1168 slave_token, mpi_address, global_dynamic_win);
1169
1170 /* Register the memory for auto freeing. */
1171 struct caf_allocated_slave_tokens_t *tmp =
1172 malloc(sizeof(struct caf_allocated_slave_tokens_t));
1173 tmp->prev = caf_allocated_slave_tokens;
1174 tmp->token = *token;
1175 caf_allocated_slave_tokens = tmp;
1176 }
1177 else // (type == CAF_REGTYPE_COARRAY_ALLOC_ALLOCATE_ONLY)
1178 {
1179 int ierr;
1180 slave_token = (mpi_caf_slave_token_t *)(*token);
1181 mem = malloc(actual_size);
1182 slave_token->memptr = mem;
1183 ierr = MPI_Win_attach(global_dynamic_win, mem, actual_size);
1184 chk_err(ierr);
1185 #ifdef EXTRA_DEBUG_OUTPUT
1186 ierr = MPI_Get_address(mem, &mpi_address); chk_err(ierr);
1187 #endif
1188 dprint("Attach mem %p (mpi-address: %zd) to global_dynamic_window = "
1189 "%d on slave_token %p, size %zd, ierr: %d\n",
1190 mem, mpi_address, global_dynamic_win, slave_token,
1191 actual_size, ierr);
1192 if (desc != NULL && GFC_DESCRIPTOR_RANK(desc) != 0)
1193 {
1194 slave_token->desc = desc;
1195 #ifdef EXTRA_DEBUG_OUTPUT
1196 ierr = MPI_Get_address(desc, &mpi_address); chk_err(ierr);
1197 #endif
1198 dprint("Attached descriptor %p (mpi-address: %zd) to "
1199 "global_dynamic_window %d at address %p, ierr = %d.\n",
1200 desc, mpi_address, global_dynamic_win, &slave_token->desc,
1201 ierr);
1202 }
1203 }
1204 CAF_Win_lock_all(global_dynamic_win);
1205 dprint("Slave token %p on exit: mpi_caf_slave_token_t { desc: %p }\n",
1206 slave_token, slave_token->desc);
1207 }
1208 break;
1209 default:
1210 {
1211 mpi_caf_token_t *mpi_token;
1212 MPI_Win *p;
1213
1214 *token = calloc(1, sizeof(mpi_caf_token_t));
1215 mpi_token = (mpi_caf_token_t *) (*token);
1216 p = TOKEN(mpi_token);
1217
1218 #if MPI_VERSION >= 3
1219 ierr = MPI_Win_allocate(actual_size, 1, MPI_INFO_NULL, CAF_COMM_WORLD,
1220 &mem, p); chk_err(ierr);
1221 CAF_Win_lock_all(*p);
1222 #else // MPI_VERSION
1223 ierr = MPI_Alloc_mem(actual_size, MPI_INFO_NULL, &mem); chk_err(ierr);
1224 ierr = MPI_Win_create(mem, actual_size, 1, MPI_INFO_NULL,
1225 CAF_COMM_WORLD, p); chk_err(ierr);
1226 #endif // MPI_VERSION
1227
1228 #ifndef GCC_GE_8
1229 if (GFC_DESCRIPTOR_RANK(desc) != 0)
1230 #endif
1231 mpi_token->desc = desc;
1232
1233 if (l_var)
1234 {
1235 init_array = (int *)calloc(size, sizeof(int));
1236 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1237 ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1238 MPI_INT, *p); chk_err(ierr);
1239 CAF_Win_unlock(caf_this_image - 1, *p);
1240 free(init_array);
1241 }
1242
1243 struct caf_allocated_tokens_t *tmp =
1244 malloc(sizeof(struct caf_allocated_tokens_t));
1245 tmp->prev = caf_allocated_tokens;
1246 tmp->token = *token;
1247 caf_allocated_tokens = tmp;
1248
1249 if (stat)
1250 *stat = 0;
1251
1252 /* The descriptor will be initialized only after the call to
1253 * register. */
1254 mpi_token->memptr = mem;
1255 dprint("Token %p on exit: mpi_caf_token_t "
1256 "{ (local_)memptr: %p, memptr_win: %d }\n",
1257 mpi_token, mpi_token->memptr, mpi_token->memptr_win);
1258 } // default:
1259 break;
1260 } // switch
1261
1262 desc->base_addr = mem;
1263 return;
1264
1265 error:
1266 {
1267 char msg[80];
1268 strcpy(msg, "Failed to allocate coarray");
1269 if (caf_is_finalized)
1270 strcat(msg, " - there are stopped images");
1271
1272 if (stat)
1273 {
1274 *stat = caf_is_finalized ? STAT_STOPPED_IMAGE : 1;
1275 if (errmsg_len > 0)
1276 {
1277 size_t len = (strlen(msg) > (size_t) errmsg_len) ?
1278 (size_t) errmsg_len : strlen (msg);
1279 memcpy(errmsg, msg, len);
1280 if ((size_t) errmsg_len > len)
1281 memset(&errmsg[len], ' ', errmsg_len - len);
1282 }
1283 }
1284 else
1285 caf_runtime_error(msg);
1286 }
1287 }
1288 #else // GCC_LT_7
1289 void *
PREFIX(register)1290 PREFIX(register) (size_t size, caf_register_t type, caf_token_t *token,
1291 int *stat, char *errmsg, charlen_t errmsg_len)
1292 {
1293 void *mem;
1294 size_t actual_size;
1295 int l_var = 0, *init_array = NULL, ierr;
1296
1297 if (unlikely(caf_is_finalized))
1298 goto error;
1299
1300 /* Start GASNET if not already started. */
1301 if (caf_num_images == 0)
1302 #ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
1303 _gfortran_caf_init(NULL, NULL);
1304 #else
1305 PREFIX(init) (NULL, NULL);
1306 #endif
1307
1308 /* Token contains only a list of pointers. */
1309 *token = malloc(sizeof(MPI_Win));
1310 MPI_Win *p = *token;
1311
1312 if (type == CAF_REGTYPE_LOCK_STATIC || type == CAF_REGTYPE_LOCK_ALLOC ||
1313 type == CAF_REGTYPE_CRITICAL || type == CAF_REGTYPE_EVENT_STATIC ||
1314 type == CAF_REGTYPE_EVENT_ALLOC)
1315 {
1316 actual_size = size * sizeof(int);
1317 l_var = 1;
1318 }
1319 else
1320 actual_size = size;
1321
1322 #if MPI_VERSION >= 3
1323 ierr = MPI_Win_allocate(actual_size, 1, mpi_info_same_size, CAF_COMM_WORLD,
1324 &mem, p); chk_err(ierr);
1325 CAF_Win_lock_all(*p);
1326 #else // MPI_VERSION
1327 ierr = MPI_Alloc_mem(actual_size, MPI_INFO_NULL, &mem); chk_err(ierr);
1328 ierr = MPI_Win_create(mem, actual_size, 1, MPI_INFO_NULL,
1329 CAF_COMM_WORLD, p); chk_err(ierr);
1330 #endif // MPI_VERSION
1331
1332 if (l_var)
1333 {
1334 init_array = (int *)calloc(size, sizeof(int));
1335 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1336 ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1337 MPI_INT, *p); chk_err(ierr);
1338 CAF_Win_unlock(caf_this_image - 1, *p);
1339 free(init_array);
1340 }
1341
1342 PREFIX(sync_all) (NULL, NULL, 0);
1343
1344 struct caf_allocated_tokens_t *tmp =
1345 malloc(sizeof(struct caf_allocated_tokens_t));
1346 tmp->prev = caf_allocated_tokens;
1347 tmp->token = *token;
1348 caf_allocated_tokens = tmp;
1349
1350 if (stat)
1351 *stat = 0;
1352 return mem;
1353
1354 error:
1355 {
1356 char msg[80];
1357 strcpy(msg, "Failed to allocate coarray");
1358 if (caf_is_finalized)
1359 strcat(msg, " - there are stopped images");
1360
1361 if (stat)
1362 {
1363 *stat = caf_is_finalized ? STAT_STOPPED_IMAGE : 1;
1364 if (errmsg_len > 0)
1365 {
1366 size_t len = (strlen(msg) > (size_t) errmsg_len) ?
1367 (size_t) errmsg_len : strlen (msg);
1368 memcpy(errmsg, msg, len);
1369 if (errmsg_len > len)
1370 memset(&errmsg[len], ' ', errmsg_len - len);
1371 }
1372 }
1373 else
1374 caf_runtime_error(msg);
1375 }
1376 return NULL;
1377 }
1378 #endif // GCC_GE_7
1379
1380
1381 #ifdef GCC_GE_7
1382 void
PREFIX(deregister)1383 PREFIX(deregister) (caf_token_t *token, int type, int *stat, char *errmsg,
1384 charlen_t errmsg_len)
1385 #else
1386 void
1387 PREFIX(deregister) (caf_token_t *token, int *stat, char *errmsg,
1388 charlen_t errmsg_len)
1389 #endif
1390 {
1391 dprint("deregister(%p)\n", *token);
1392 int ierr;
1393
1394 if (unlikely(caf_is_finalized))
1395 {
1396 const char msg[] =
1397 "Failed to deallocate coarray - there are stopped images";
1398 if (stat)
1399 {
1400 *stat = STAT_STOPPED_IMAGE;
1401
1402 if (errmsg_len > 0)
1403 {
1404 size_t len = (sizeof(msg) - 1 > (size_t) errmsg_len) ?
1405 (size_t) errmsg_len : sizeof (msg) - 1;
1406 memcpy(errmsg, msg, len);
1407 if (errmsg_len > len)
1408 memset(&errmsg[len], ' ', errmsg_len - len);
1409 }
1410 return;
1411 }
1412 caf_runtime_error(msg);
1413 }
1414
1415 if (stat)
1416 *stat = 0;
1417
1418 #ifdef GCC_GE_7
1419 if (type != CAF_DEREGTYPE_COARRAY_DEALLOCATE_ONLY)
1420 {
1421 /* Sync all images only, when deregistering the token. Just freeing the
1422 * memory needs no sync. */
1423 #ifdef WITH_FAILED_IMAGES
1424 ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
1425 #else
1426 PREFIX(sync_all) (NULL, NULL, 0);
1427 #endif
1428 }
1429 #endif // GCC_GE_7
1430 {
1431 struct caf_allocated_tokens_t
1432 *cur = caf_allocated_tokens,
1433 *next = caf_allocated_tokens,
1434 *prev;
1435 MPI_Win *p;
1436
1437 while (cur)
1438 {
1439 prev = cur->prev;
1440
1441 if (cur->token == *token)
1442 {
1443 p = TOKEN(*token);
1444 #ifdef GCC_GE_7
1445 dprint("Found regular token %p for memptr_win: %d.\n",
1446 *token, ((mpi_caf_token_t *)*token)->memptr_win);
1447 #endif
1448 CAF_Win_unlock_all(*p);
1449 ierr = MPI_Win_free(p); chk_err(ierr);
1450
1451 next->prev = prev ? prev->prev: NULL;
1452
1453 if (cur == caf_allocated_tokens)
1454 caf_allocated_tokens = prev;
1455
1456 free(cur);
1457 free(*token);
1458 return;
1459 }
1460 next = cur;
1461 cur = prev;
1462 }
1463 }
1464
1465 #ifdef GCC_GE_7
1466 /* Feel through: Has to be a component token. */
1467 {
1468 struct caf_allocated_slave_tokens_t
1469 *cur_stok = caf_allocated_slave_tokens,
1470 *next_stok = caf_allocated_slave_tokens,
1471 *prev_stok;
1472
1473 while (cur_stok)
1474 {
1475 prev_stok = cur_stok->prev;
1476
1477 if (cur_stok->token == *token)
1478 {
1479 dprint("Found sub token %p.\n", *token);
1480
1481 mpi_caf_slave_token_t *slave_token = *(mpi_caf_slave_token_t **)token;
1482 CAF_Win_unlock_all(global_dynamic_win);
1483
1484 if (slave_token->memptr)
1485 {
1486 ierr = MPI_Win_detach(global_dynamic_win, slave_token->memptr);
1487 chk_err(ierr);
1488 free(slave_token->memptr);
1489 slave_token->memptr = NULL;
1490 if (type == CAF_DEREGTYPE_COARRAY_DEALLOCATE_ONLY)
1491 {
1492 CAF_Win_lock_all(global_dynamic_win);
1493 return; // All done.
1494 }
1495 }
1496 ierr = MPI_Win_detach(global_dynamic_win, slave_token); chk_err(ierr);
1497 CAF_Win_lock_all(global_dynamic_win);
1498
1499 next_stok->prev = prev_stok ? prev_stok->prev: NULL;
1500
1501 if (cur_stok == caf_allocated_slave_tokens)
1502 caf_allocated_slave_tokens = prev_stok;
1503
1504 free(cur_stok);
1505 free(*token);
1506 return;
1507 }
1508
1509 next_stok = cur_stok;
1510 cur_stok = prev_stok;
1511 }
1512 }
1513 #endif // GCC_GE_7
1514 #ifdef EXTRA_DEBUG_OUTPUT
1515 fprintf(stderr,
1516 "Fortran runtime warning on image %d: "
1517 "Could not find token to free %p", caf_this_image, *token);
1518 #endif
1519 }
1520
1521 void
PREFIX(sync_memory)1522 PREFIX(sync_memory) (int *stat __attribute__((unused)),
1523 char *errmsg __attribute__((unused)),
1524 charlen_t errmsg_len __attribute__((unused)))
1525 {
1526 #if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
1527 explicit_flush();
1528 #endif
1529 }
1530
1531
1532 void
PREFIX(sync_all)1533 PREFIX(sync_all) (int *stat, char *errmsg, charlen_t errmsg_len)
1534 {
1535 int err = 0, ierr;
1536
1537 dprint("Entering sync all.\n");
1538 if (unlikely(caf_is_finalized))
1539 {
1540 err = STAT_STOPPED_IMAGE;
1541 }
1542 else
1543 {
1544 #if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
1545 explicit_flush();
1546 #endif
1547
1548 #ifdef WITH_FAILED_IMAGES
1549 ierr = MPI_Barrier(alive_comm); chk_err(ierr);
1550 #else
1551 ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
1552 #endif
1553 dprint("MPI_Barrier = %d.\n", err);
1554 if (ierr == STAT_FAILED_IMAGE)
1555 err = STAT_FAILED_IMAGE;
1556 else if (ierr != 0)
1557 MPI_Error_class(ierr, &err);
1558 }
1559
1560 if (stat != NULL)
1561 *stat = err;
1562 #ifdef WITH_FAILED_IMAGES
1563 else if (err == STAT_FAILED_IMAGE)
1564 /* F2015 requests stat to be set for FAILED IMAGES, else error out. */
1565 terminate_internal(err, 0);
1566 #endif
1567
1568 if (err != 0 && err != STAT_FAILED_IMAGE)
1569 {
1570 char msg[80];
1571 strcpy(msg, "SYNC ALL failed");
1572 if (caf_is_finalized)
1573 strcat(msg, " - there are stopped images");
1574
1575 if (errmsg_len > 0)
1576 {
1577 size_t len = (strlen(msg) > (size_t) errmsg_len) ?
1578 (size_t) errmsg_len : strlen (msg);
1579 memcpy(errmsg, msg, len);
1580 if (errmsg_len > len)
1581 memset(&errmsg[len], ' ', errmsg_len - len);
1582 }
1583 else if (stat == NULL)
1584 caf_runtime_error(msg);
1585 }
1586 dprint("Leaving sync all.\n");
1587 }
1588
1589 /* Convert kind 4 characters into kind 1 one.
1590 * Copied from the gcc:libgfortran/caf/single.c. */
1591 static void
assign_char4_from_char1(size_t dst_size,size_t src_size,uint32_t * dst,unsigned char * src)1592 assign_char4_from_char1(size_t dst_size, size_t src_size, uint32_t *dst,
1593 unsigned char *src)
1594 {
1595 size_t i, n;
1596 n = (dst_size > src_size) ? src_size : dst_size;
1597 for (i = 0; i < n; ++i)
1598 {
1599 dst[i] = (int32_t) src[i];
1600 }
1601 for (; i < dst_size; ++i)
1602 {
1603 dst[i] = (int32_t) ' ';
1604 }
1605 }
1606
1607
1608 /* Convert kind 1 characters into kind 4 one.
1609 * Copied from the gcc:libgfortran/caf/single.c. */
1610 static void
assign_char1_from_char4(size_t dst_size,size_t src_size,unsigned char * dst,uint32_t * src)1611 assign_char1_from_char4(size_t dst_size, size_t src_size, unsigned char *dst,
1612 uint32_t *src)
1613 {
1614 size_t i, n;
1615 n = (dst_size > src_size) ? src_size : dst_size;
1616 for (i = 0; i < n; ++i)
1617 {
1618 dst[i] = src[i] > UINT8_MAX ? (unsigned char) '?' : (unsigned char) src[i];
1619 }
1620 if (dst_size > n)
1621 memset(&dst[n], ' ', dst_size - n);
1622 }
1623
1624 /* Convert convertable types.
1625 * Copied from the gcc:libgfortran/caf/single.c. Can't say much about it. */
1626 static void
convert_type(void * dst,int dst_type,int dst_kind,void * src,int src_type,int src_kind,int * stat)1627 convert_type(void *dst, int dst_type, int dst_kind, void *src, int src_type,
1628 int src_kind, int *stat)
1629 {
1630 #ifdef HAVE_GFC_INTEGER_16
1631 typedef __int128 int128t;
1632 #else
1633 typedef int64_t int128t;
1634 #endif
1635
1636 #if defined(GFC_REAL_16_IS_LONG_DOUBLE)
1637 typedef long double real128t;
1638 typedef _Complex long double complex128t;
1639 #elif defined(HAVE_GFC_REAL_16)
1640 typedef _Complex float __attribute__((mode(TC))) __complex128;
1641 typedef __float128 real128t;
1642 typedef __complex128 complex128t;
1643 #elif defined(HAVE_GFC_REAL_10)
1644 typedef long double real128t;
1645 typedef long double complex128t;
1646 #else
1647 typedef double real128t;
1648 typedef _Complex double complex128t;
1649 #endif
1650
1651 int128t int_val = 0;
1652 real128t real_val = 0;
1653 complex128t cmpx_val = 0;
1654
1655 switch (src_type)
1656 {
1657 case BT_INTEGER:
1658 if (src_kind == 1)
1659 int_val = *(int8_t*) src;
1660 else if (src_kind == 2)
1661 int_val = *(int16_t*) src;
1662 else if (src_kind == 4)
1663 int_val = *(int32_t*) src;
1664 else if (src_kind == 8)
1665 int_val = *(int64_t*) src;
1666 #ifdef HAVE_GFC_INTEGER_16
1667 else if (src_kind == 16)
1668 int_val = *(int128t*) src;
1669 #endif
1670 else
1671 goto error;
1672 break;
1673 case BT_REAL:
1674 if (src_kind == 4)
1675 real_val = *(float*) src;
1676 else if (src_kind == 8)
1677 real_val = *(double*) src;
1678 #ifdef HAVE_GFC_REAL_10
1679 else if (src_kind == 10)
1680 real_val = *(long double*) src;
1681 #endif
1682 #ifdef HAVE_GFC_REAL_16
1683 else if (src_kind == 16)
1684 real_val = *(real128t*) src;
1685 #endif
1686 else
1687 goto error;
1688 break;
1689 case BT_COMPLEX:
1690 if (src_kind == 4)
1691 cmpx_val = *(_Complex float*) src;
1692 else if (src_kind == 8)
1693 cmpx_val = *(_Complex double*) src;
1694 #ifdef HAVE_GFC_REAL_10
1695 else if (src_kind == 10)
1696 cmpx_val = *(_Complex long double*) src;
1697 #endif
1698 #ifdef HAVE_GFC_REAL_16
1699 else if (src_kind == 16)
1700 cmpx_val = *(complex128t*) src;
1701 #endif
1702 else
1703 goto error;
1704 break;
1705 default:
1706 goto error;
1707 }
1708
1709 switch (dst_type)
1710 {
1711 case BT_INTEGER:
1712 if (src_type == BT_INTEGER)
1713 {
1714 if (dst_kind == 1)
1715 *(int8_t*) dst = (int8_t) int_val;
1716 else if (dst_kind == 2)
1717 *(int16_t*) dst = (int16_t) int_val;
1718 else if (dst_kind == 4)
1719 *(int32_t*) dst = (int32_t) int_val;
1720 else if (dst_kind == 8)
1721 *(int64_t*) dst = (int64_t) int_val;
1722 #ifdef HAVE_GFC_INTEGER_16
1723 else if (dst_kind == 16)
1724 *(int128t*) dst = (int128t) int_val;
1725 #endif
1726 else
1727 goto error;
1728 }
1729 else if (src_type == BT_REAL)
1730 {
1731 if (dst_kind == 1)
1732 *(int8_t*) dst = (int8_t) real_val;
1733 else if (dst_kind == 2)
1734 *(int16_t*) dst = (int16_t) real_val;
1735 else if (dst_kind == 4)
1736 *(int32_t*) dst = (int32_t) real_val;
1737 else if (dst_kind == 8)
1738 *(int64_t*) dst = (int64_t) real_val;
1739 #ifdef HAVE_GFC_INTEGER_16
1740 else if (dst_kind == 16)
1741 *(int128t*) dst = (int128t) real_val;
1742 #endif
1743 else
1744 goto error;
1745 }
1746 else if (src_type == BT_COMPLEX)
1747 {
1748 if (dst_kind == 1)
1749 *(int8_t*) dst = (int8_t) cmpx_val;
1750 else if (dst_kind == 2)
1751 *(int16_t*) dst = (int16_t) cmpx_val;
1752 else if (dst_kind == 4)
1753 *(int32_t*) dst = (int32_t) cmpx_val;
1754 else if (dst_kind == 8)
1755 *(int64_t*) dst = (int64_t) cmpx_val;
1756 #ifdef HAVE_GFC_INTEGER_16
1757 else if (dst_kind == 16)
1758 *(int128t*) dst = (int128t) cmpx_val;
1759 #endif
1760 else
1761 goto error;
1762 }
1763 else
1764 goto error;
1765 return;
1766 case BT_REAL:
1767 if (src_type == BT_INTEGER)
1768 {
1769 if (dst_kind == 4)
1770 *(float*) dst = (float) int_val;
1771 else if (dst_kind == 8)
1772 *(double*) dst = (double) int_val;
1773 #ifdef HAVE_GFC_REAL_10
1774 else if (dst_kind == 10)
1775 *(long double*) dst = (long double) int_val;
1776 #endif
1777 #ifdef HAVE_GFC_REAL_16
1778 else if (dst_kind == 16)
1779 *(real128t*) dst = (real128t) int_val;
1780 #endif
1781 else
1782 goto error;
1783 }
1784 else if (src_type == BT_REAL)
1785 {
1786 if (dst_kind == 4)
1787 *(float*) dst = (float) real_val;
1788 else if (dst_kind == 8)
1789 *(double*) dst = (double) real_val;
1790 #ifdef HAVE_GFC_REAL_10
1791 else if (dst_kind == 10)
1792 *(long double*) dst = (long double) real_val;
1793 #endif
1794 #ifdef HAVE_GFC_REAL_16
1795 else if (dst_kind == 16)
1796 *(real128t*) dst = (real128t) real_val;
1797 #endif
1798 else
1799 goto error;
1800 }
1801 else if (src_type == BT_COMPLEX)
1802 {
1803 if (dst_kind == 4)
1804 *(float*) dst = (float) cmpx_val;
1805 else if (dst_kind == 8)
1806 *(double*) dst = (double) cmpx_val;
1807 #ifdef HAVE_GFC_REAL_10
1808 else if (dst_kind == 10)
1809 *(long double*) dst = (long double) cmpx_val;
1810 #endif
1811 #ifdef HAVE_GFC_REAL_16
1812 else if (dst_kind == 16)
1813 *(real128t*) dst = (real128t) cmpx_val;
1814 #endif
1815 else
1816 goto error;
1817 }
1818 return;
1819 case BT_COMPLEX:
1820 if (src_type == BT_INTEGER)
1821 {
1822 if (dst_kind == 4)
1823 *(_Complex float*) dst = (_Complex float) int_val;
1824 else if (dst_kind == 8)
1825 *(_Complex double*) dst = (_Complex double) int_val;
1826 #ifdef HAVE_GFC_REAL_10
1827 else if (dst_kind == 10)
1828 *(_Complex long double*) dst = (_Complex long double) int_val;
1829 #endif
1830 #ifdef HAVE_GFC_REAL_16
1831 else if (dst_kind == 16)
1832 *(complex128t*) dst = (complex128t) int_val;
1833 #endif
1834 else
1835 goto error;
1836 }
1837 else if (src_type == BT_REAL)
1838 {
1839 if (dst_kind == 4)
1840 *(_Complex float*) dst = (_Complex float) real_val;
1841 else if (dst_kind == 8)
1842 *(_Complex double*) dst = (_Complex double) real_val;
1843 #ifdef HAVE_GFC_REAL_10
1844 else if (dst_kind == 10)
1845 *(_Complex long double*) dst = (_Complex long double) real_val;
1846 #endif
1847 #ifdef HAVE_GFC_REAL_16
1848 else if (dst_kind == 16)
1849 *(complex128t*) dst = (complex128t) real_val;
1850 #endif
1851 else
1852 goto error;
1853 }
1854 else if (src_type == BT_COMPLEX)
1855 {
1856 if (dst_kind == 4)
1857 *(_Complex float*) dst = (_Complex float) cmpx_val;
1858 else if (dst_kind == 8)
1859 *(_Complex double*) dst = (_Complex double) cmpx_val;
1860 #ifdef HAVE_GFC_REAL_10
1861 else if (dst_kind == 10)
1862 *(_Complex long double*) dst = (_Complex long double) cmpx_val;
1863 #endif
1864 #ifdef HAVE_GFC_REAL_16
1865 else if (dst_kind == 16)
1866 *(complex128t*) dst = (complex128t) cmpx_val;
1867 #endif
1868 else
1869 goto error;
1870 }
1871 else
1872 goto error;
1873 return;
1874 default:
1875 goto error;
1876 }
1877
1878 error:
1879 fprintf(stderr,
1880 "libcaf_mpi RUNTIME ERROR: Cannot convert type %d kind %d "
1881 "to type %d kind %d\n", src_type, src_kind, dst_type, dst_kind);
1882 if (stat)
1883 *stat = 1;
1884 else
1885 abort();
1886 }
1887
1888 static void
convert_with_strides(void * dst,int dst_type,int dst_kind,ptrdiff_t byte_dst_stride,void * src,int src_type,int src_kind,ptrdiff_t byte_src_stride,size_t num,int * stat)1889 convert_with_strides(void *dst, int dst_type, int dst_kind,
1890 ptrdiff_t byte_dst_stride,
1891 void *src, int src_type, int src_kind,
1892 ptrdiff_t byte_src_stride, size_t num, int *stat)
1893 {
1894 /* Compute the step from one item to convert to the next in bytes. The stride
1895 * is expected to be the one or similar to the array.stride, i.e. *_stride is
1896 * expected to be >= 1 to progress from one item to the next. */
1897 for (size_t i = 0; i < num; ++i)
1898 {
1899 convert_type(dst, dst_type, dst_kind, src, src_type, src_kind, stat);
1900 dst += byte_dst_stride;
1901 src += byte_src_stride;
1902 }
1903 }
1904
1905 static void
copy_char_to_self(void * src,int src_type,int src_size,int src_kind,void * dst,int dst_type,int dst_size,int dst_kind,size_t size,bool src_is_scalar)1906 copy_char_to_self(void *src, int src_type, int src_size, int src_kind,
1907 void *dst, int dst_type, int dst_size, int dst_kind,
1908 size_t size, bool src_is_scalar)
1909 {
1910 #ifdef GFC_CAF_CHECK
1911 if (dst_type != BT_CHARACTER || src_type != BT_CHARACTER)
1912 caf_runtime_error("internal error: copy_char_to_self() "
1913 "for non-char types called.");
1914 #endif
1915 const size_t
1916 dst_len = dst_size / dst_kind,
1917 src_len = src_size / src_kind;
1918 const size_t min_len = (src_len < dst_len) ? src_len : dst_len;
1919 /* The address of dest passed by the compiler points on the right memory
1920 * location. No offset summation is needed. */
1921 if (dst_kind == src_kind)
1922 {
1923 for (size_t c = 0; c < size; ++c)
1924 {
1925 memmove(dst, src, min_len * dst_kind);
1926 /* Fill dest when source is too short. */
1927 if (dst_len > src_len)
1928 {
1929 int32_t * dest_addr = (int32_t *)(dst + dst_kind * src_len);
1930 const size_t pad_num = dst_len - src_len;
1931 if (dst_kind == 1)
1932 memset(dest_addr, ' ', pad_num);
1933 else if (dst_kind == 4)
1934 {
1935 const void * end_addr = &(dest_addr[pad_num]);
1936 while (dest_addr != end_addr)
1937 *(dest_addr++) = (int32_t)' ';
1938 }
1939 else
1940 caf_runtime_error(unreachable);
1941 }
1942 dst = (void *)((ptrdiff_t)(dst) + dst_size);
1943 if (!src_is_scalar)
1944 src = (void *)((ptrdiff_t)(src) + src_size);
1945 }
1946 }
1947 else
1948 {
1949 /* Assign using kind-conversion. */
1950 if (dst_kind == 1 && src_kind == 4)
1951 for (size_t c = 0; c < size; ++c)
1952 {
1953 assign_char1_from_char4(dst_len, src_len, dst, src);
1954 dst = (void *)((ptrdiff_t)(dst) + dst_size);
1955 if (!src_is_scalar)
1956 src = (void *)((ptrdiff_t)(src) + src_size);
1957 }
1958 else if (dst_kind == 4 && src_kind == 1)
1959 for (size_t c = 0; c < size; ++c)
1960 {
1961 assign_char4_from_char1(dst_len, src_len, dst, src);
1962 dst = (void *)((ptrdiff_t)(dst) + dst_size);
1963 if (!src_is_scalar)
1964 src = (void *)((ptrdiff_t)(src) + src_size);
1965 }
1966 else
1967 caf_runtime_error("_caf_send(): Unsupported char kinds in same image "
1968 "assignment (kind(lhs)= %d, kind(rhs) = %d)",
1969 dst_kind, src_kind);
1970 }
1971 }
1972
1973 static void
copy_to_self(gfc_descriptor_t * src,int src_kind,gfc_descriptor_t * dest,int dst_kind,size_t size,int * stat)1974 copy_to_self(gfc_descriptor_t *src, int src_kind,
1975 gfc_descriptor_t *dest, int dst_kind, size_t size, int *stat)
1976 {
1977 #ifdef GFC_CAF_CHECK
1978 if (GFC_DESCRIPTOR_TYPE(dest) == BT_CHARACTER
1979 || GFC_DESCRIPTOR_TYPE(src) == BT_CHARACTER)
1980 caf_runtime_error("internal error: copy_to_self() for char types called.");
1981 #endif
1982 /* The address of dest passed by the compiler points on the right
1983 * memory location. No offset summation is needed. */
1984 if (dst_kind == src_kind)
1985 memmove(dest->base_addr, src->base_addr, size * GFC_DESCRIPTOR_SIZE(dest));
1986 else
1987 /* When the rank is 0 then a scalar is copied to a vector and the stride
1988 * is zero. */
1989 convert_with_strides(dest->base_addr, GFC_DESCRIPTOR_TYPE(dest), dst_kind,
1990 GFC_DESCRIPTOR_SIZE(dest), src->base_addr,
1991 GFC_DESCRIPTOR_TYPE(src), src_kind,
1992 (GFC_DESCRIPTOR_RANK(src) > 0)
1993 ? GFC_DESCRIPTOR_SIZE(src) : 0, size, stat);
1994 }
1995
1996 /* token: The token of the array to be written to.
1997 * offset: Difference between the coarray base address and the actual data,
1998 * used for caf(3)[2] = 8 or caf[4]%a(4)%b = 7.
1999 * image_index: Index of the coarray (typically remote,
2000 * though it can also be on this_image).
2001 * data: Pointer to the to-be-transferred data.
2002 * size: The number of bytes to be transferred.
2003 * asynchronous: Return before the data transfer has been complete */
2004
selectType(int size,MPI_Datatype * dt)2005 void selectType(int size, MPI_Datatype *dt)
2006 {
2007 int t_s;
2008
2009 #define SELTYPE(type) MPI_Type_size(type, &t_s); \
2010 if (t_s == size) \
2011 { \
2012 *dt = type; \
2013 return; \
2014 }
2015
2016 SELTYPE(MPI_BYTE)
2017 SELTYPE(MPI_SHORT)
2018 SELTYPE(MPI_INT)
2019 SELTYPE(MPI_DOUBLE)
2020 SELTYPE(MPI_COMPLEX)
2021 SELTYPE(MPI_DOUBLE_COMPLEX)
2022
2023 #undef SELTYPE
2024 }
2025
2026 void
PREFIX(sendget)2027 PREFIX(sendget) (caf_token_t token_s, size_t offset_s, int image_index_s,
2028 gfc_descriptor_t *dest, caf_vector_t *dst_vector,
2029 caf_token_t token_g, size_t offset_g, int image_index_g,
2030 gfc_descriptor_t *src , caf_vector_t *src_vector,
2031 int dst_kind, int src_kind, bool mrt, int *pstat)
2032 {
2033 int j, ierr = 0;
2034 size_t i, size;
2035 ptrdiff_t dimextent;
2036 const int
2037 src_rank = GFC_DESCRIPTOR_RANK(src),
2038 dst_rank = GFC_DESCRIPTOR_RANK(dest);
2039 const size_t
2040 src_size = GFC_DESCRIPTOR_SIZE(src),
2041 dst_size = GFC_DESCRIPTOR_SIZE(dest);
2042 const int
2043 src_type = GFC_DESCRIPTOR_TYPE(src),
2044 dst_type = GFC_DESCRIPTOR_TYPE(dest);
2045 const bool
2046 src_contiguous = PREFIX(is_contiguous) (src),
2047 dst_contiguous = PREFIX(is_contiguous) (dest);
2048 const bool
2049 src_same_image = caf_this_image == image_index_g,
2050 dst_same_image = caf_this_image == image_index_s,
2051 same_type_and_kind = dst_type == src_type && dst_kind == src_kind;
2052
2053 MPI_Win *p = TOKEN(token_g);
2054 ptrdiff_t src_offset = 0, dst_offset = 0;
2055 void *pad_str = NULL;
2056 bool free_pad_str = false;
2057 void *src_t_buff = NULL, *dst_t_buff = NULL;
2058 bool free_src_t_buff = false, free_dst_t_buff = false;
2059 const bool
2060 dest_char_array_is_longer = dst_type == BT_CHARACTER && dst_size > src_size;
2061 int
2062 src_remote_image = image_index_g - 1,
2063 dst_remote_image = image_index_s - 1;
2064
2065 if (!src_same_image)
2066 {
2067 MPI_Group current_team_group, win_group;
2068 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
2069 ierr = MPI_Win_get_group(*p, &win_group); chk_err(ierr);
2070 ierr = MPI_Group_translate_ranks(current_team_group, 1,
2071 (int[]){src_remote_image}, win_group,
2072 &src_remote_image); chk_err(ierr);
2073 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
2074 ierr = MPI_Group_free(&win_group); chk_err(ierr);
2075 }
2076 if (!dst_same_image)
2077 {
2078 MPI_Group current_team_group, win_group;
2079 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
2080 ierr = MPI_Win_get_group(*p, &win_group); chk_err(ierr);
2081 ierr = MPI_Group_translate_ranks(current_team_group, 1,
2082 (int[]){dst_remote_image}, win_group,
2083 &dst_remote_image); chk_err(ierr);
2084 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
2085 ierr = MPI_Group_free(&win_group); chk_err(ierr);
2086 }
2087
2088 /* Ensure stat is always set. */
2089 #ifdef GCC_GE_7
2090 int * stat = pstat;
2091 if (stat)
2092 *stat = 0;
2093 #else
2094 /* Gcc prior to 7.0 does not have stat here. */
2095 int * stat = NULL;
2096 #endif
2097
2098 size = 1;
2099 for (j = 0; j < dst_rank; ++j)
2100 {
2101 dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
2102 if (dimextent < 0)
2103 dimextent = 0;
2104 size *= dimextent;
2105 }
2106
2107 if (size == 0)
2108 return;
2109
2110 dprint("src_vector = %p, dst_vector = %p, src_image_index = %d, "
2111 "dst_image_index = %d, offset_src = %zd, offset_dst = %zd.\n",
2112 src_vector, dst_vector, image_index_g, image_index_s,
2113 offset_g, offset_s);
2114 check_image_health(image_index_g, stat);
2115 check_image_health(image_index_s, stat);
2116
2117 /* For char arrays: create the padding array, when dst is longer than src. */
2118 if (dest_char_array_is_longer)
2119 {
2120 const size_t pad_num = (dst_size / dst_kind) - (src_size / src_kind);
2121 const size_t pad_sz = pad_num * dst_kind;
2122 /* For big arrays alloca() may not be able to get the memory on the stack.
2123 * Use a regular malloc then. */
2124 if ((free_pad_str = ((pad_str = alloca(pad_sz)) == NULL)))
2125 {
2126 pad_str = malloc(pad_sz);
2127 if (src_t_buff == NULL)
2128 caf_runtime_error("Unable to allocate memory "
2129 "for internal buffer in sendget().");
2130 }
2131 if (dst_kind == 1)
2132 {
2133 memset(pad_str, ' ', pad_num);
2134 }
2135 else /* dst_kind == 4. */
2136 {
2137 for (int32_t *it = (int32_t *) pad_str,
2138 *itEnd = ((int32_t *) pad_str) + pad_num; it < itEnd; ++it)
2139 {
2140 *it = (int32_t) ' ';
2141 }
2142 }
2143 }
2144
2145 if (src_contiguous && src_vector == NULL)
2146 {
2147 if (src_same_image)
2148 {
2149 dprint("in caf_this == image_index, size = %zd, "
2150 "dst_kind = %d, src_kind = %d, dst_size = %zd, src_size = %zd\n",
2151 size, dst_kind, src_kind, dst_size, src_size);
2152 src_t_buff = src->base_addr;
2153 if (same_type_and_kind && !dest_char_array_is_longer)
2154 {
2155 dst_t_buff = src_t_buff;
2156 }
2157 else
2158 {
2159 dprint("allocating %zd bytes for dst_t_buff.\n", dst_size * size);
2160 if ((free_dst_t_buff = ((dst_t_buff = alloca(dst_size * size)) == NULL)))
2161 {
2162 dst_t_buff = malloc(dst_size * size);
2163 if (dst_t_buff == NULL)
2164 caf_runtime_error("Unable to allocate memory "
2165 "for internal buffer in sendget().");
2166 }
2167 if (dst_type == BT_CHARACTER)
2168 {
2169 /* The size is encoded in the descriptor's type for char arrays. */
2170 copy_char_to_self(src->base_addr, src_type, src_size, src_kind,
2171 dst_t_buff, dst_type, dst_size, dst_kind,
2172 size, src_rank == 0);
2173 }
2174 else
2175 {
2176 convert_with_strides(dst_t_buff, dst_type, dst_kind, dst_size,
2177 src->base_addr, src_type, src_kind,
2178 (src_rank > 0) ? src_size : 0, size, stat);
2179 }
2180 }
2181 }
2182 else
2183 {
2184 /* When replication is needed, only access the scalar on the remote. */
2185 const size_t src_real_size = src_rank > 0 ?
2186 (src_size * size) : src_size;
2187 if ((free_dst_t_buff = ((dst_t_buff = alloca(dst_size * size)) == NULL)))
2188 {
2189 dst_t_buff = malloc(dst_size * size);
2190 if (dst_t_buff == NULL)
2191 caf_runtime_error("Unable to allocate memory "
2192 "for internal buffer in sendget().");
2193 }
2194
2195 if (dst_kind != src_kind || src_rank == 0 || dest_char_array_is_longer)
2196 {
2197 if ((free_src_t_buff = ((src_t_buff = alloca(src_size * size)) == NULL)))
2198 {
2199 src_t_buff = malloc(src_size * size);
2200 if (src_t_buff == NULL)
2201 caf_runtime_error("Unable to allocate memory "
2202 "for internal buffer in sendget().");
2203 }
2204 }
2205 else
2206 src_t_buff = dst_t_buff;
2207
2208 CAF_Win_lock(MPI_LOCK_SHARED, src_remote_image, *p);
2209 if ((same_type_and_kind && dst_rank == src_rank)
2210 || dst_type == BT_CHARACTER)
2211 {
2212 if (!dest_char_array_is_longer
2213 && (dst_kind == src_kind || dst_type != BT_CHARACTER))
2214 {
2215 const size_t trans_size =
2216 ((dst_size > src_size) ? src_size : dst_size) * size;
2217 ierr = MPI_Get(dst_t_buff, trans_size, MPI_BYTE, src_remote_image,
2218 offset_g, trans_size, MPI_BYTE, *p); chk_err(ierr);
2219 }
2220 else
2221 {
2222 ierr = MPI_Get(src_t_buff, src_real_size, MPI_BYTE, src_remote_image,
2223 offset_g, src_real_size, MPI_BYTE, *p); chk_err(ierr);
2224 dprint("copy_char_to_self(src_size = %zd, src_kind = %d, "
2225 "dst_size = %zd, dst_kind = %d, size = %zd)\n",
2226 src_size, src_kind, dst_size, dst_kind, size);
2227 copy_char_to_self(src_t_buff, src_type, src_size, src_kind,
2228 dst_t_buff, dst_type, dst_size, dst_kind,
2229 size, src_rank == 0);
2230 dprint("|%s|\n", (char *)dst_t_buff);
2231 }
2232 }
2233 else
2234 {
2235 ierr = MPI_Get(src_t_buff, src_real_size, MPI_BYTE, src_remote_image,
2236 offset_g, src_real_size, MPI_BYTE, *p); chk_err(ierr);
2237 convert_with_strides(dst_t_buff, dst_type, dst_kind, dst_size,
2238 src_t_buff, src_type, src_kind,
2239 (src_rank > 0) ? src_size: 0, size, stat);
2240 }
2241 CAF_Win_unlock(src_remote_image, *p);
2242 }
2243 }
2244 #ifdef STRIDED
2245 else if (!src_same_image && same_type_and_kind && dst_type != BT_CHARACTER)
2246 {
2247 /* For strided copy, no type and kind conversion, copy to self or
2248 * character arrays are supported. */
2249 MPI_Datatype dt_s, dt_d, base_type_src, base_type_dst;
2250 int *arr_bl;
2251 int *arr_dsp_s;
2252
2253 if ((free_dst_t_buff = ((dst_t_buff = alloca(dst_size * size)) == NULL)))
2254 {
2255 dst_t_buff = malloc(dst_size * size);
2256 if (dst_t_buff == NULL)
2257 caf_runtime_error("Unable to allocate memory "
2258 "for internal buffer in sendget().");
2259 }
2260
2261 selectType(src_size, &base_type_src);
2262 selectType(dst_size, &base_type_dst);
2263
2264 if (src_rank == 1)
2265 {
2266 if (src_vector == NULL)
2267 {
2268 ierr = MPI_Type_vector(size, 1, src->dim[0]._stride, base_type_src,
2269 &dt_s); chk_err(ierr);
2270 }
2271 else
2272 {
2273 arr_bl = calloc(size, sizeof(int));
2274 arr_dsp_s = calloc(size, sizeof(int));
2275
2276 dprint("Setting up strided vector index.\n");
2277 #define KINDCASE(kind, type) \
2278 case kind: \
2279 for (i = 0; i < size; ++i) \
2280 { \
2281 arr_dsp_s[i] = ((ptrdiff_t) \
2282 ((type *) src_vector->u.v.vector)[i] - src->dim[0].lower_bound); \
2283 arr_bl[i] = 1; \
2284 } \
2285 break
2286
2287 switch (src_vector->u.v.kind)
2288 {
2289 KINDCASE(1, int8_t);
2290 KINDCASE(2, int16_t);
2291 KINDCASE(4, int32_t);
2292 KINDCASE(8, int64_t);
2293 #ifdef HAVE_GFC_INTEGER_16
2294 KINDCASE(16, __int128);
2295 #endif
2296 default:
2297 caf_runtime_error(unreachable);
2298 return;
2299 }
2300 #undef KINDCASE
2301 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
2302 chk_err(ierr);
2303 free(arr_bl);
2304 free(arr_dsp_s);
2305 }
2306 ierr = MPI_Type_vector(size, 1, 1, base_type_dst, &dt_d); chk_err(ierr);
2307 }
2308 else
2309 {
2310 arr_bl = calloc(size, sizeof(int));
2311 arr_dsp_s = calloc(size, sizeof(int));
2312
2313 for (i = 0; i < size; ++i)
2314 {
2315 arr_bl[i] = 1;
2316 }
2317
2318 for (i = 0; i < size; ++i)
2319 {
2320 ptrdiff_t array_offset_sr = 0, extent = 1, tot_ext = 1;
2321 if (src_vector == NULL)
2322 {
2323 for (j = 0; j < src_rank - 1; ++j)
2324 {
2325 extent = src->dim[j]._ubound - src->dim[j].lower_bound + 1;
2326 array_offset_sr += ((i / tot_ext) % extent) * src->dim[j]._stride;
2327 tot_ext *= extent;
2328 }
2329
2330 array_offset_sr += (i / tot_ext) * src->dim[src_rank - 1]._stride;
2331 }
2332 else
2333 {
2334 #define KINDCASE(kind, type) \
2335 case kind: \
2336 array_offset_sr = ((ptrdiff_t) \
2337 ((type *) src_vector->u.v.vector)[i] - src->dim[0].lower_bound); \
2338 break
2339 switch (src_vector->u.v.kind)
2340 {
2341 KINDCASE(1, int8_t);
2342 KINDCASE(2, int16_t);
2343 KINDCASE(4, int32_t);
2344 KINDCASE(8, int64_t);
2345 #ifdef HAVE_GFC_INTEGER_16
2346 KINDCASE(16, __int128);
2347 #endif
2348 default:
2349 caf_runtime_error(unreachable);
2350 return;
2351 }
2352 #undef KINDCASE
2353 }
2354 arr_dsp_s[i] = array_offset_sr;
2355 }
2356
2357 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
2358 chk_err(ierr);
2359 ierr = MPI_Type_vector(size, 1, 1, base_type_dst, &dt_d); chk_err(ierr);
2360
2361 free(arr_bl);
2362 free(arr_dsp_s);
2363 }
2364
2365 ierr = MPI_Type_commit(&dt_s); chk_err(ierr);
2366 ierr = MPI_Type_commit(&dt_d); chk_err(ierr);
2367
2368 CAF_Win_lock(MPI_LOCK_SHARED, src_remote_image, *p);
2369 ierr = MPI_Get(dst_t_buff, 1, dt_d, src_remote_image, offset_g, 1,
2370 dt_s, *p); chk_err(ierr);
2371 CAF_Win_unlock(src_remote_image, *p);
2372
2373 #ifdef WITH_FAILED_IMAGES
2374 check_image_health(image_index_g, stat);
2375
2376 if (!stat && ierr == STAT_FAILED_IMAGE)
2377 terminate_internal(ierr, 1);
2378
2379 if (stat)
2380 *stat = ierr;
2381 #else
2382 if (ierr != 0)
2383 {
2384 terminate_internal(ierr, 1);
2385 return;
2386 }
2387 #endif
2388 ierr = MPI_Type_free(&dt_s); chk_err(ierr);
2389 ierr = MPI_Type_free(&dt_d); chk_err(ierr);
2390 }
2391 #endif // STRIDED
2392 else
2393 {
2394 if ((free_dst_t_buff = ((dst_t_buff = alloca(dst_size * size)) == NULL)))
2395 {
2396 dst_t_buff = malloc(dst_size * size);
2397 if (dst_t_buff == NULL)
2398 caf_runtime_error("Unable to allocate memory "
2399 "for internal buffer in sendget().");
2400 }
2401
2402 if (src_same_image)
2403 src_t_buff = src->base_addr;
2404 else if (!same_type_and_kind)
2405 {
2406 if ((free_src_t_buff = (((src_t_buff = alloca(src_size))) == NULL)))
2407 {
2408 src_t_buff = malloc(src_size);
2409 if (src_t_buff == NULL)
2410 caf_runtime_error("Unable to allocate memory "
2411 "for internal buffer in sendget().");
2412 }
2413 }
2414
2415 if (!src_same_image)
2416 CAF_Win_lock(MPI_LOCK_SHARED, src_remote_image, *p);
2417 for (i = 0; i < size; ++i)
2418 {
2419 ptrdiff_t array_offset_sr = 0, extent = 1, tot_ext = 1;
2420 if (src_vector == NULL)
2421 {
2422 for (j = 0; j < src_rank - 1; ++j)
2423 {
2424 extent = src->dim[j]._ubound - src->dim[j].lower_bound + 1;
2425 array_offset_sr += ((i / tot_ext) % extent) * src->dim[j]._stride;
2426 tot_ext *= extent;
2427 }
2428
2429 array_offset_sr += (i / tot_ext) * src->dim[src_rank - 1]._stride;
2430 }
2431 else
2432 {
2433 #define KINDCASE(kind, type) \
2434 case kind: \
2435 array_offset_sr = ((ptrdiff_t) \
2436 ((type *)src_vector->u.v.vector)[i] - src->dim[0].lower_bound); \
2437 break
2438 switch (src_vector->u.v.kind)
2439 {
2440 KINDCASE(1, int8_t);
2441 KINDCASE(2, int16_t);
2442 KINDCASE(4, int32_t);
2443 KINDCASE(8, int64_t);
2444 #ifdef HAVE_GFC_INTEGER_16
2445 KINDCASE(16, __int128);
2446 #endif
2447 default:
2448 caf_runtime_error(unreachable);
2449 return;
2450 }
2451 }
2452 #undef KINDCASE
2453
2454 src_offset = array_offset_sr * src_size;
2455 void *dst = (void *)((char *) dst_t_buff + i * dst_size);
2456
2457 if (!src_same_image)
2458 {
2459 // Do the more likely first.
2460 dprint("kind(dst) = %d, el_sz(dst) = %zd, "
2461 "kind(src) = %d, el_sz(src) = %zd, lb(dst) = %zd.\n",
2462 dst_kind, dst_size, src_kind, src_size, src->dim[0].lower_bound);
2463 if (same_type_and_kind)
2464 {
2465 const size_t trans_size = (src_size < dst_size) ? src_size : dst_size;
2466 ierr = MPI_Get(dst, trans_size, MPI_BYTE, src_remote_image,
2467 offset_g + src_offset, trans_size, MPI_BYTE, *p);
2468 chk_err(ierr);
2469 if (pad_str)
2470 memcpy((void *)((char *)dst + src_size), pad_str,
2471 dst_size - src_size);
2472 }
2473 else if (dst_type == BT_CHARACTER)
2474 {
2475 ierr = MPI_Get(src_t_buff, src_size, MPI_BYTE, src_remote_image,
2476 offset_g + src_offset, src_size, MPI_BYTE, *p);
2477 chk_err(ierr);
2478 copy_char_to_self(src_t_buff, src_type, src_size, src_kind,
2479 dst, dst_type, dst_size, dst_kind, 1, true);
2480 }
2481 else
2482 {
2483 ierr = MPI_Get(src_t_buff, src_size, MPI_BYTE, src_remote_image,
2484 offset_g + src_offset, src_size, MPI_BYTE, *p);
2485 chk_err(ierr);
2486 convert_type(dst, dst_type, dst_kind, src_t_buff, src_type,
2487 src_kind, stat);
2488 }
2489 }
2490 else
2491 {
2492 if (!mrt)
2493 {
2494 dprint("strided same_image, no temp, for i = %zd, "
2495 "src_offset = %zd, offset = %zd.\n",
2496 i, src_offset, offset_g);
2497 if (same_type_and_kind)
2498 memmove(dst, src->base_addr + src_offset, src_size);
2499 else
2500 convert_type(dst, dst_type, dst_kind,
2501 src->base_addr + src_offset, src_type, src_kind, stat);
2502 }
2503 else
2504 {
2505 dprint("strided same_image, *WITH* temp, for i = %zd.\n", i);
2506 if (same_type_and_kind)
2507 memmove(dst, src->base_addr + src_offset, src_size);
2508 else
2509 convert_type(dst, dst_type, dst_kind,
2510 src->base_addr + src_offset, src_type, src_kind, stat);
2511 }
2512 }
2513
2514 #ifndef WITH_FAILED_IMAGES
2515 if (ierr != 0)
2516 {
2517 caf_runtime_error("MPI Error: %d", ierr);
2518 return;
2519 }
2520 #endif
2521 }
2522 if (!src_same_image)
2523 CAF_Win_unlock(src_remote_image, *p);
2524 }
2525
2526 p = TOKEN(token_s);
2527 /* Now transfer data to the remote dest. */
2528 if (dst_contiguous && dst_vector == NULL)
2529 {
2530 if (dst_same_image)
2531 memmove(dest->base_addr, dst_t_buff, dst_size * size);
2532 else
2533 {
2534 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, dst_remote_image, *p);
2535 const size_t trans_size = size * dst_size;
2536 ierr = MPI_Put(dst_t_buff, trans_size, MPI_BYTE, dst_remote_image,
2537 offset_s, trans_size, MPI_BYTE, *p); chk_err(ierr);
2538 #ifdef CAF_MPI_LOCK_UNLOCK
2539 MPI_Win_unlock(dst_remote_image, *p);
2540 #elif NONBLOCKING_PUT
2541 /* Pending puts init */
2542 if (pending_puts == NULL)
2543 {
2544 pending_puts = calloc(1,sizeof(win_sync));
2545 pending_puts->next = NULL;
2546 pending_puts->win = token_s;
2547 pending_puts->img = dst_remote_image;
2548 last_elem = pending_puts;
2549 last_elem->next = NULL;
2550 }
2551 else
2552 {
2553 last_elem->next = calloc(1,sizeof(win_sync));
2554 last_elem = last_elem->next;
2555 last_elem->win = token_s;
2556 last_elem->img = dst_remote_image;
2557 last_elem->next = NULL;
2558 }
2559 #else
2560 ierr = MPI_Win_flush(dst_remote_image, *p); chk_err(ierr);
2561 #endif // CAF_MPI_LOCK_UNLOCK
2562 }
2563 }
2564 #ifdef STRIDED
2565 else if (!dst_same_image && same_type_and_kind && dst_type != BT_CHARACTER)
2566 {
2567 /* For strided copy, no type and kind conversion, copy to self or
2568 * character arrays are supported. */
2569 MPI_Datatype dt_s, dt_d, base_type_dst;
2570 int *arr_bl, *arr_dsp_d;
2571
2572 selectType(dst_size, &base_type_dst);
2573
2574 if (dst_rank == 1)
2575 {
2576 if (dst_vector == NULL)
2577 {
2578 ierr = MPI_Type_vector(size, 1, dest->dim[0]._stride, base_type_dst,
2579 &dt_d); chk_err(ierr);
2580 }
2581 else
2582 {
2583 arr_bl = calloc(size, sizeof(int));
2584 arr_dsp_d = calloc(size, sizeof(int));
2585
2586 dprint("Setting up strided vector index.\n");
2587 #define KINDCASE(kind, type) \
2588 case kind: \
2589 for (i = 0; i < size; ++i) \
2590 { \
2591 arr_dsp_d[i] = ((ptrdiff_t) \
2592 ((type *) dst_vector->u.v.vector)[i] - dest->dim[0].lower_bound); \
2593 arr_bl[i] = 1; \
2594 } \
2595 break
2596 switch (dst_vector->u.v.kind)
2597 {
2598 KINDCASE(1, int8_t);
2599 KINDCASE(2, int16_t);
2600 KINDCASE(4, int32_t);
2601 KINDCASE(8, int64_t);
2602 #ifdef HAVE_GFC_INTEGER_16
2603 KINDCASE(16, __int128);
2604 #endif
2605 default:
2606 caf_runtime_error(unreachable);
2607 return;
2608 }
2609 #undef KINDCASE
2610 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
2611 chk_err(ierr);
2612 free(arr_bl);
2613 free(arr_dsp_d);
2614 }
2615 ierr = MPI_Type_vector(size, 1, 1, base_type_dst, &dt_s); chk_err(ierr);
2616 }
2617 else
2618 {
2619 arr_bl = calloc(size, sizeof(int));
2620 arr_dsp_d = calloc(size, sizeof(int));
2621
2622 for (i = 0; i < size; ++i)
2623 {
2624 arr_bl[i] = 1;
2625 }
2626
2627 for (i = 0; i < size; ++i)
2628 {
2629 ptrdiff_t array_offset_dst = 0, extent = 1, tot_ext = 1;
2630 if (dst_vector == NULL)
2631 {
2632 for (j = 0; j < dst_rank - 1; ++j)
2633 {
2634 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
2635 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
2636 tot_ext *= extent;
2637 }
2638
2639 array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
2640 }
2641 else
2642 {
2643 #define KINDCASE(kind, type) \
2644 case kind: \
2645 array_offset_dst = ((ptrdiff_t) \
2646 ((type *) dst_vector->u.v.vector)[i] - dest->dim[0].lower_bound); \
2647 break
2648 switch (dst_vector->u.v.kind)
2649 {
2650 KINDCASE(1, int8_t);
2651 KINDCASE(2, int16_t);
2652 KINDCASE(4, int32_t);
2653 KINDCASE(8, int64_t);
2654 #ifdef HAVE_GFC_INTEGER_16
2655 KINDCASE(16, __int128);
2656 #endif
2657 default:
2658 caf_runtime_error(unreachable);
2659 return;
2660 }
2661 #undef KINDCASE
2662 }
2663 arr_dsp_d[i] = array_offset_dst;
2664 }
2665
2666 ierr = MPI_Type_vector(size, 1, 1, base_type_dst, &dt_s); chk_err(ierr);
2667 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
2668 chk_err(ierr);
2669
2670 free(arr_bl);
2671 free(arr_dsp_d);
2672 }
2673
2674 ierr = MPI_Type_commit(&dt_s); chk_err(ierr);
2675 ierr = MPI_Type_commit(&dt_d); chk_err(ierr);
2676
2677 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, dst_remote_image, *p);
2678 ierr = MPI_Put(dst_t_buff, 1, dt_s, dst_remote_image, offset_s, 1,
2679 dt_d, *p); chk_err(ierr);
2680 CAF_Win_unlock(dst_remote_image, *p);
2681
2682 #ifdef WITH_FAILED_IMAGES
2683 check_image_health(image_index_s, stat);
2684
2685 if (!stat && ierr == STAT_FAILED_IMAGE)
2686 terminate_internal(ierr, 1);
2687
2688 if (stat)
2689 *stat = ierr;
2690 #else
2691 if (ierr != 0)
2692 {
2693 terminate_internal(ierr, 1);
2694 return;
2695 }
2696 #endif
2697 ierr = MPI_Type_free(&dt_s); chk_err(ierr);
2698 ierr = MPI_Type_free(&dt_d); chk_err(ierr);
2699 }
2700 #endif // STRIDED
2701 else
2702 {
2703 if (!dst_same_image)
2704 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, dst_remote_image, *p);
2705 for (i = 0; i < size; ++i)
2706 {
2707 ptrdiff_t array_offset_dst = 0, extent = 1, tot_ext = 1;
2708 if (dst_vector == NULL)
2709 {
2710 for (j = 0; j < dst_rank - 1; ++j)
2711 {
2712 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
2713 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
2714 tot_ext *= extent;
2715 }
2716
2717 array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
2718 }
2719 else
2720 {
2721 #define KINDCASE(kind, type) \
2722 case kind: \
2723 array_offset_dst = ((ptrdiff_t) \
2724 ((type *)dst_vector->u.v.vector)[i] - dest->dim[0].lower_bound); \
2725 break
2726 switch (dst_vector->u.v.kind)
2727 {
2728 KINDCASE(1, int8_t);
2729 KINDCASE(2, int16_t);
2730 KINDCASE(4, int32_t);
2731 KINDCASE(8, int64_t);
2732 #ifdef HAVE_GFC_INTEGER_16
2733 KINDCASE(16, __int128);
2734 #endif
2735 default:
2736 caf_runtime_error(unreachable);
2737 return;
2738 }
2739 }
2740 #undef KINDCASE
2741 dst_offset = array_offset_dst * dst_size;
2742
2743 void *sr = (void *)((char *)dst_t_buff + i * dst_size);
2744
2745 if (!dst_same_image)
2746 {
2747 // Do the more likely first.
2748 ierr = MPI_Put(sr, dst_size, MPI_BYTE, dst_remote_image,
2749 offset_s + dst_offset, dst_size, MPI_BYTE, *p);
2750 chk_err(ierr);
2751 }
2752 else
2753 memmove(dest->base_addr + dst_offset, sr, dst_size);
2754
2755 #ifndef WITH_FAILED_IMAGES
2756 if (ierr != 0)
2757 {
2758 caf_runtime_error("MPI Error: %d", ierr);
2759 return;
2760 }
2761 #endif
2762 } /* for */
2763 if (!dst_same_image)
2764 CAF_Win_unlock(dst_remote_image, *p);
2765 }
2766
2767 /* Free memory, when not allocated on stack. */
2768 if (free_src_t_buff)
2769 free(src_t_buff);
2770 if (free_dst_t_buff)
2771 free(dst_t_buff);
2772 if (free_pad_str)
2773 free(pad_str);
2774
2775 #ifdef WITH_FAILED_IMAGES
2776 /* Catch failed images, when failed image support is active. */
2777 check_image_health(image_index_g , stat);
2778 check_image_health(image_index_s , stat);
2779 #endif
2780
2781 if (ierr != MPI_SUCCESS)
2782 {
2783 int mpi_error;
2784 MPI_Error_class(ierr, &mpi_error);
2785 if (stat)
2786 *stat = mpi_error;
2787 else
2788 {
2789 int error_len = 2048;
2790 char error_str[error_len];
2791 strcpy(error_str, "MPI-error: ");
2792 MPI_Error_string(mpi_error, &error_str[11], &error_len);
2793 error_stop_str(error_str, error_len + 11, false);
2794 }
2795 }
2796 }
2797
2798
2799 /* Send array data from src to dest on a remote image.
2800 * The argument mrt means may_require_temporary */
2801
2802 void
PREFIX(send)2803 PREFIX(send) (caf_token_t token, size_t offset, int image_index,
2804 gfc_descriptor_t *dest, caf_vector_t *dst_vector,
2805 gfc_descriptor_t *src, int dst_kind, int src_kind,
2806 bool mrt, int *pstat)
2807 {
2808 int j, ierr = 0;
2809 size_t i, size;
2810 ptrdiff_t dimextent;
2811 const int
2812 src_rank = GFC_DESCRIPTOR_RANK(src),
2813 dst_rank = GFC_DESCRIPTOR_RANK(dest);
2814 const size_t
2815 src_size = GFC_DESCRIPTOR_SIZE(src),
2816 dst_size = GFC_DESCRIPTOR_SIZE(dest);
2817 const int
2818 src_type = GFC_DESCRIPTOR_TYPE(src),
2819 dst_type = GFC_DESCRIPTOR_TYPE(dest);
2820 const bool
2821 src_contiguous = PREFIX(is_contiguous) (src),
2822 dst_contiguous = PREFIX(is_contiguous) (dest);
2823 const bool
2824 same_image = caf_this_image == image_index,
2825 same_type_and_kind = dst_type == src_type && dst_kind == src_kind;
2826
2827 MPI_Win *p = TOKEN(token);
2828 ptrdiff_t dst_offset = 0;
2829 void *pad_str = NULL, *t_buff = NULL;
2830 bool free_pad_str = false, free_t_buff = false;
2831 const bool dest_char_array_is_longer
2832 = dst_type == BT_CHARACTER && dst_size > src_size && !same_image;
2833 int remote_image = image_index - 1;
2834 if (!same_image)
2835 {
2836 MPI_Group current_team_group, win_group;
2837 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
2838 ierr = MPI_Win_get_group(*p, &win_group); chk_err(ierr);
2839 ierr = MPI_Group_translate_ranks(current_team_group, 1,
2840 (int[]){remote_image}, win_group,
2841 &remote_image); chk_err(ierr);
2842 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
2843 ierr = MPI_Group_free(&win_group); chk_err(ierr);
2844 }
2845
2846 /* Ensure stat is always set. */
2847 #ifdef GCC_GE_7
2848 int * stat = pstat;
2849 if (stat)
2850 *stat = 0;
2851 #else
2852 /* Gcc prior to 7.0 does not have stat here. */
2853 int * stat = NULL;
2854 #endif
2855
2856 size = 1;
2857 for (j = 0; j < dst_rank; ++j)
2858 {
2859 dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
2860 if (dimextent < 0)
2861 dimextent = 0;
2862 size *= dimextent;
2863 }
2864
2865 if (size == 0)
2866 return;
2867
2868 dprint("dst_vector = %p, image_index = %d, offset = %zd.\n",
2869 dst_vector, image_index, offset);
2870 check_image_health(image_index, stat);
2871
2872 /* For char arrays: create the padding array, when dst is longer than src. */
2873 if (dest_char_array_is_longer)
2874 {
2875 const size_t pad_num = (dst_size / dst_kind) - (src_size / src_kind);
2876 const size_t pad_sz = pad_num * dst_kind;
2877 /* For big arrays alloca() may not be able to get the memory on the stack.
2878 * Use a regular malloc then. */
2879 if ((free_pad_str = ((pad_str = alloca(pad_sz)) == NULL)))
2880 {
2881 pad_str = malloc(pad_sz);
2882 if (t_buff == NULL)
2883 caf_runtime_error("Unable to allocate memory "
2884 "for internal buffer in send().");
2885 }
2886 if (dst_kind == 1)
2887 memset(pad_str, ' ', pad_num);
2888 else /* dst_kind == 4. */
2889 {
2890 for (int32_t *it = (int32_t *) pad_str,
2891 *itEnd = ((int32_t *) pad_str) + pad_num; it < itEnd; ++it)
2892 {
2893 *it = (int32_t) ' ';
2894 }
2895 }
2896 }
2897
2898 if (src_contiguous && dst_contiguous && dst_vector == NULL)
2899 {
2900 if (same_image)
2901 {
2902 dprint("in caf_this == image_index, size = %zd, dst_kind = %d, "
2903 "src_kind = %d\n", size, dst_kind, src_kind);
2904 if (dst_type == BT_CHARACTER)
2905 /* The size is encoded in the descriptor's type for char arrays. */
2906 copy_char_to_self(src->base_addr, src_type, src_size, src_kind,
2907 dest->base_addr, dst_type, dst_size, dst_kind,
2908 size, src_rank == 0);
2909 else
2910 copy_to_self(src, src_kind, dest, dst_kind, size, stat);
2911 return;
2912 }
2913 else
2914 {
2915 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, remote_image, *p);
2916 if (dst_kind != src_kind || dest_char_array_is_longer || src_rank == 0)
2917 {
2918 if ((free_t_buff = ((t_buff = alloca(dst_size * size)) == NULL)))
2919 {
2920 t_buff = malloc(dst_size * size);
2921 if (t_buff == NULL)
2922 caf_runtime_error("Unable to allocate memory "
2923 "for internal buffer in send().");
2924 }
2925 }
2926
2927 if ((same_type_and_kind && dst_rank == src_rank)
2928 || dst_type == BT_CHARACTER)
2929 {
2930 if (dest_char_array_is_longer
2931 || (dst_kind != src_kind && dst_type == BT_CHARACTER))
2932 {
2933 copy_char_to_self(src->base_addr, src_type, src_size,
2934 src_kind, t_buff, dst_type, dst_size,
2935 dst_kind, size, src_rank == 0);
2936 ierr = MPI_Put(t_buff, dst_size, MPI_BYTE, remote_image,
2937 offset, dst_size, MPI_BYTE, *p); chk_err(ierr);
2938 }
2939 else
2940 {
2941 const size_t trans_size =
2942 ((dst_size > src_size) ? src_size : dst_size) * size;
2943 ierr = MPI_Put(src->base_addr, trans_size, MPI_BYTE, remote_image,
2944 offset, trans_size, MPI_BYTE, *p); chk_err(ierr);
2945 }
2946 }
2947 else
2948 {
2949 convert_with_strides(t_buff, dst_type, dst_kind, dst_size,
2950 src->base_addr, src_type, src_kind,
2951 (src_rank > 0) ? src_size: 0, size, stat);
2952 ierr = MPI_Put(t_buff, dst_size * size, MPI_BYTE, remote_image,
2953 offset, dst_size * size, MPI_BYTE, *p); chk_err(ierr);
2954 }
2955 #ifdef CAF_MPI_LOCK_UNLOCK
2956 ierr = MPI_Win_unlock(remote_image, *p); chk_err(ierr);
2957 #elif NONBLOCKING_PUT
2958 /* Pending puts init */
2959 if (pending_puts == NULL)
2960 {
2961 pending_puts = calloc(1,sizeof(win_sync));
2962 pending_puts->next = NULL;
2963 pending_puts->win = token;
2964 pending_puts->img = remote_image;
2965 last_elem = pending_puts;
2966 last_elem->next = NULL;
2967 }
2968 else
2969 {
2970 last_elem->next = calloc(1,sizeof(win_sync));
2971 last_elem = last_elem->next;
2972 last_elem->win = token;
2973 last_elem->img = remote_image;
2974 last_elem->next = NULL;
2975 }
2976 #else
2977 ierr = MPI_Win_flush(remote_image, *p); chk_err(ierr);
2978 #endif // CAF_MPI_LOCK_UNLOCK
2979 }
2980 }
2981
2982 #ifdef STRIDED
2983 else if (!same_image && same_type_and_kind && dst_type != BT_CHARACTER)
2984 {
2985 /* For strided copy, no type and kind conversion, copy to self or
2986 * character arrays are supported. */
2987 MPI_Datatype dt_s, dt_d, base_type_src, base_type_dst;
2988 int *arr_bl, *arr_dsp_s, *arr_dsp_d;
2989
2990 selectType(src_size, &base_type_src);
2991 selectType(dst_size, &base_type_dst);
2992
2993 if (dst_rank == 1)
2994 {
2995 if (dst_vector == NULL)
2996 {
2997 dprint("Setting up mpi datatype vector with "
2998 "stride %d, size %d and offset %d.\n",
2999 dest->dim[0]._stride, size, offset);
3000 ierr = MPI_Type_vector(size, 1, dest->dim[0]._stride, base_type_dst,
3001 &dt_d); chk_err(ierr);
3002 }
3003 else
3004 {
3005 arr_bl = calloc(size, sizeof(int));
3006 arr_dsp_d = calloc(size, sizeof(int));
3007
3008 dprint("Setting up strided vector index.\n");
3009 #define KINDCASE(kind, type) \
3010 case kind: \
3011 for (i = 0; i < size; ++i) \
3012 { \
3013 arr_dsp_d[i] = ((ptrdiff_t) \
3014 ((type *) dst_vector->u.v.vector)[i] - dest->dim[0].lower_bound); \
3015 arr_bl[i] = 1; \
3016 } \
3017 break
3018 switch (dst_vector->u.v.kind)
3019 {
3020 KINDCASE(1, int8_t);
3021 KINDCASE(2, int16_t);
3022 KINDCASE(4, int32_t);
3023 KINDCASE(8, int64_t);
3024 #ifdef HAVE_GFC_INTEGER_16
3025 KINDCASE(16, __int128);
3026 #endif
3027 default:
3028 caf_runtime_error(unreachable);
3029 return;
3030 }
3031 #undef KINDCASE
3032 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
3033 chk_err(ierr);
3034 free(arr_bl);
3035 free(arr_dsp_d);
3036 }
3037 MPI_Type_vector(size, 1, src->dim[0]._stride, base_type_src, &dt_s);
3038 }
3039 else
3040 {
3041 arr_bl = calloc(size, sizeof(int));
3042 arr_dsp_s = calloc(size, sizeof(int));
3043 arr_dsp_d = calloc(size, sizeof(int));
3044
3045 for (i = 0; i < size; ++i)
3046 {
3047 arr_bl[i] = 1;
3048 }
3049
3050 for (i = 0; i < size; ++i)
3051 {
3052 ptrdiff_t array_offset_dst = 0, extent = 1, tot_ext = 1;
3053 if (dst_vector == NULL)
3054 {
3055 for (j = 0; j < dst_rank - 1; ++j)
3056 {
3057 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
3058 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
3059 tot_ext *= extent;
3060 }
3061
3062 array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
3063 }
3064 else
3065 {
3066 #define KINDCASE(kind, type) \
3067 case kind: \
3068 array_offset_dst = ((ptrdiff_t) \
3069 ((type *) dst_vector->u.v.vector)[i] - dest->dim[0].lower_bound); \
3070 break
3071 switch (dst_vector->u.v.kind)
3072 {
3073 KINDCASE(1, int8_t);
3074 KINDCASE(2, int16_t);
3075 KINDCASE(4, int32_t);
3076 KINDCASE(8, int64_t);
3077 #ifdef HAVE_GFC_INTEGER_16
3078 KINDCASE(16, __int128);
3079 #endif
3080 default:
3081 caf_runtime_error(unreachable);
3082 return;
3083 }
3084 #undef KINDCASE
3085 }
3086 arr_dsp_d[i] = array_offset_dst;
3087
3088 if (src_rank != 0)
3089 {
3090 ptrdiff_t array_offset_sr = 0;
3091 extent = 1;
3092 tot_ext = 1;
3093 for (j = 0; j < src_rank - 1; ++j)
3094 {
3095 extent = src->dim[j]._ubound - src->dim[j].lower_bound + 1;
3096 array_offset_sr += ((i / tot_ext) % extent) * src->dim[j]._stride;
3097 tot_ext *= extent;
3098 }
3099
3100 array_offset_sr += (i / tot_ext) * src->dim[src_rank - 1]._stride;
3101 arr_dsp_s[i] = array_offset_sr;
3102 }
3103 else
3104 arr_dsp_s[i] = 0;
3105 }
3106
3107 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
3108 chk_err(ierr);
3109 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
3110 chk_err(ierr);
3111
3112 free(arr_bl);
3113 free(arr_dsp_s);
3114 free(arr_dsp_d);
3115 }
3116
3117 ierr = MPI_Type_commit(&dt_s); chk_err(ierr);
3118 ierr = MPI_Type_commit(&dt_d); chk_err(ierr);
3119
3120 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, remote_image, *p);
3121 ierr = MPI_Put(src->base_addr, 1, dt_s, remote_image, offset, 1, dt_d, *p);
3122 chk_err(ierr);
3123 CAF_Win_unlock(remote_image, *p);
3124
3125 #ifdef WITH_FAILED_IMAGES
3126 check_image_health(image_index, stat);
3127
3128 if (!stat && ierr == STAT_FAILED_IMAGE)
3129 terminate_internal(ierr, 1);
3130
3131 if (stat)
3132 *stat = ierr;
3133 #else
3134 if (ierr != 0)
3135 {
3136 terminate_internal(ierr, 1);
3137 return;
3138 }
3139 #endif
3140 ierr = MPI_Type_free(&dt_s); chk_err(ierr);
3141 ierr = MPI_Type_free(&dt_d); chk_err(ierr);
3142 }
3143 #endif // STRIDED
3144 else
3145 {
3146 if (same_image && mrt)
3147 {
3148 if ((free_t_buff = (((t_buff = alloca(dst_size * size))) == NULL)))
3149 {
3150 t_buff = malloc(dst_size * size);
3151 if (t_buff == NULL)
3152 caf_runtime_error("Unable to allocate memory "
3153 "for internal buffer in send().");
3154 }
3155 }
3156 else if (!same_type_and_kind && !same_image)
3157 {
3158 if ((free_t_buff = (((t_buff = alloca(dst_size))) == NULL)))
3159 {
3160 t_buff = malloc(dst_size);
3161 if (t_buff == NULL)
3162 caf_runtime_error("Unable to allocate memory "
3163 "for internal buffer in send().");
3164 }
3165 }
3166
3167 if (!same_image)
3168 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, remote_image, *p);
3169 for (i = 0; i < size; ++i)
3170 {
3171 ptrdiff_t array_offset_dst = 0, extent = 1, tot_ext = 1;
3172 if (!same_image || !mrt)
3173 {
3174 /* For same image and may require temp, the dst_offset is
3175 * computed on storage. */
3176 if (dst_vector == NULL)
3177 {
3178 for (j = 0; j < dst_rank - 1; ++j)
3179 {
3180 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
3181 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
3182 tot_ext *= extent;
3183 }
3184 array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
3185 }
3186 else
3187 {
3188 #define KINDCASE(kind, type) \
3189 case kind: \
3190 array_offset_dst = ((ptrdiff_t) \
3191 ((type *)dst_vector->u.v.vector)[i] - dest->dim[0].lower_bound); \
3192 break
3193 switch (dst_vector->u.v.kind)
3194 {
3195 KINDCASE(1, int8_t);
3196 KINDCASE(2, int16_t);
3197 KINDCASE(4, int32_t);
3198 KINDCASE(8, int64_t);
3199 #ifdef HAVE_GFC_INTEGER_16
3200 KINDCASE(16, __int128);
3201 #endif
3202 default:
3203 caf_runtime_error(unreachable);
3204 return;
3205 }
3206 }
3207 dst_offset = array_offset_dst * dst_size;
3208 }
3209
3210 void *sr;
3211 if (src_rank != 0)
3212 {
3213 ptrdiff_t array_offset_sr = 0;
3214 extent = 1;
3215 tot_ext = 1;
3216 for (j = 0; j < src_rank - 1; ++j)
3217 {
3218 extent = src->dim[j]._ubound - src->dim[j].lower_bound + 1;
3219 array_offset_sr += ((i / tot_ext) % extent) * src->dim[j]._stride;
3220 tot_ext *= extent;
3221 }
3222
3223 array_offset_sr += (i / tot_ext) * src->dim[dst_rank - 1]._stride;
3224 sr = (void *)((char *)src->base_addr + array_offset_sr * src_size);
3225 }
3226 else
3227 sr = src->base_addr;
3228
3229 if (!same_image)
3230 {
3231 // Do the more likely first.
3232 dprint("kind(dst) = %d, el_sz(dst) = %zd, "
3233 "kind(src) = %d, el_sz(src) = %zd, lb(dst) = %zd.\n",
3234 dst_kind, dst_size, src_kind,
3235 src_size, dest->dim[0].lower_bound);
3236 if (same_type_and_kind)
3237 {
3238 const size_t trans_size = (src_size < dst_size) ? src_size : dst_size;
3239 ierr = MPI_Put(sr, trans_size, MPI_BYTE, remote_image,
3240 offset + dst_offset, trans_size, MPI_BYTE, *p);
3241 chk_err(ierr);
3242 if (pad_str)
3243 {
3244 ierr = MPI_Put(pad_str, dst_size - src_size, MPI_BYTE,
3245 remote_image, offset + dst_offset + src_size,
3246 dst_size - src_size, MPI_BYTE, *p); chk_err(ierr);
3247 }
3248 }
3249 else if (dst_type == BT_CHARACTER)
3250 {
3251 copy_char_to_self(sr, src_type, src_size, src_kind,
3252 t_buff, dst_type, dst_size, dst_kind, 1, true);
3253 ierr = MPI_Put(t_buff, dst_size, MPI_BYTE, remote_image,
3254 offset + dst_offset, dst_size, MPI_BYTE, *p);
3255 chk_err(ierr);
3256 }
3257 else
3258 {
3259 convert_type(t_buff, dst_type, dst_kind,
3260 sr, src_type, src_kind, stat);
3261 ierr = MPI_Put(t_buff, dst_size, MPI_BYTE, remote_image,
3262 offset + dst_offset, dst_size, MPI_BYTE, *p);
3263 chk_err(ierr);
3264 }
3265 }
3266 else
3267 {
3268 if (!mrt)
3269 {
3270 dprint("strided same_image, no temp, for i = %zd, "
3271 "dst_offset = %zd, offset = %zd.\n",
3272 i, dst_offset, offset);
3273 if (same_type_and_kind)
3274 memmove(dest->base_addr + dst_offset, sr, src_size);
3275 else
3276 convert_type(dest->base_addr + dst_offset, dst_type,
3277 dst_kind, sr, src_type, src_kind, stat);
3278 }
3279 else
3280 {
3281 dprint("strided same_image, *WITH* temp, for i = %zd.\n", i);
3282 if (same_type_and_kind)
3283 memmove(t_buff + i * dst_size, sr, src_size);
3284 else
3285 convert_type(t_buff + i * dst_size, dst_type, dst_kind,
3286 sr, src_type, src_kind, stat);
3287 }
3288 }
3289
3290 #ifndef WITH_FAILED_IMAGES
3291 if (ierr != 0)
3292 {
3293 caf_runtime_error("MPI Error: %d", ierr);
3294 return;
3295 }
3296 #endif
3297 }
3298 if (!same_image)
3299 CAF_Win_unlock(remote_image, *p);
3300
3301 if (same_image && mrt)
3302 {
3303 for (i = 0; i < size; ++i)
3304 {
3305 ptrdiff_t array_offset_dst = 0, extent = 1, tot_ext = 1;
3306 if (dst_vector == NULL)
3307 {
3308 for (j = 0; j < dst_rank - 1; ++j)
3309 {
3310 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
3311 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
3312 tot_ext *= extent;
3313 }
3314
3315 array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
3316 }
3317 else
3318 {
3319 switch (dst_vector->u.v.kind)
3320 {
3321 // KINDCASE is defined above.
3322 KINDCASE(1, int8_t);
3323 KINDCASE(2, int16_t);
3324 KINDCASE(4, int32_t);
3325 KINDCASE(8, int64_t);
3326 #ifdef HAVE_GFC_INTEGER_16
3327 KINDCASE(16, __int128);
3328 #endif
3329 default:
3330 caf_runtime_error(unreachable);
3331 return;
3332 }
3333 #undef KINDCASE
3334 }
3335 dst_offset = array_offset_dst * dst_size;
3336 memmove(dest->base_addr + dst_offset, t_buff + i * dst_size, dst_size);
3337 }
3338 }
3339 }
3340
3341 /* Free memory, when not allocated on stack. */
3342 if (free_t_buff)
3343 free(t_buff);
3344 if (free_pad_str)
3345 free(pad_str);
3346
3347 #ifdef WITH_FAILED_IMAGES
3348 /* Catch failed images, when failed image support is active. */
3349 check_image_health(image_index , stat);
3350 #endif
3351
3352 if (ierr != MPI_SUCCESS)
3353 {
3354 int mpi_error;
3355 MPI_Error_class(ierr, &mpi_error);
3356 if (stat)
3357 *stat = mpi_error;
3358 else
3359 {
3360 int error_len = 2048;
3361 char error_str[error_len];
3362 strcpy(error_str, "MPI-error: ");
3363 MPI_Error_string(mpi_error, &error_str[11], &error_len);
3364 error_stop_str(error_str, error_len + 11, false);
3365 }
3366 }
3367 }
3368
3369
3370 /* Get array data from a remote src to a local dest. */
3371
3372 void
PREFIX(get)3373 PREFIX(get) (caf_token_t token, size_t offset, int image_index,
3374 gfc_descriptor_t *src, caf_vector_t *src_vector,
3375 gfc_descriptor_t *dest, int src_kind, int dst_kind,
3376 bool mrt, int *pstat)
3377 {
3378 int j, ierr = 0;
3379 size_t i, size;
3380 const int
3381 src_rank = GFC_DESCRIPTOR_RANK(src),
3382 dst_rank = GFC_DESCRIPTOR_RANK(dest);
3383 const size_t
3384 src_size = GFC_DESCRIPTOR_SIZE(src),
3385 dst_size = GFC_DESCRIPTOR_SIZE(dest);
3386 const int
3387 src_type = GFC_DESCRIPTOR_TYPE(src),
3388 dst_type = GFC_DESCRIPTOR_TYPE(dest);
3389 const bool
3390 src_contiguous = PREFIX(is_contiguous) (src),
3391 dst_contiguous = PREFIX(is_contiguous) (dest);
3392 const bool
3393 same_image = caf_this_image == image_index,
3394 same_type_and_kind = dst_type == src_type && dst_kind == src_kind;
3395
3396 MPI_Win *p = TOKEN(token);
3397 ptrdiff_t dimextent, src_offset = 0;
3398 void *pad_str = NULL, *t_buff = NULL;
3399 bool free_pad_str = false, free_t_buff = false;
3400 const bool dest_char_array_is_longer
3401 = dst_type == BT_CHARACTER && dst_size > src_size && !same_image;
3402 int remote_image = image_index - 1;
3403 if (!same_image)
3404 {
3405 MPI_Group current_team_group, win_group;
3406 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
3407 ierr = MPI_Win_get_group(*p, &win_group); chk_err(ierr);
3408 ierr = MPI_Group_translate_ranks(current_team_group, 1,
3409 (int[]){remote_image}, win_group,
3410 &remote_image); chk_err(ierr);
3411 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
3412 ierr = MPI_Group_free(&win_group); chk_err(ierr);
3413 }
3414
3415 /* Ensure stat is always set. */
3416 #ifdef GCC_GE_7
3417 int * stat = pstat;
3418 if (stat)
3419 *stat = 0;
3420 #else
3421 /* Gcc prior to 7.0 does not have stat here. */
3422 int * stat = NULL;
3423 #endif
3424
3425 size = 1;
3426 for (j = 0; j < dst_rank; ++j)
3427 {
3428 dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
3429 if (dimextent < 0)
3430 dimextent = 0;
3431 size *= dimextent;
3432 }
3433
3434 if (size == 0)
3435 return;
3436
3437 dprint("src_vector = %p, image_index = %d, offset = %zd.\n",
3438 src_vector, image_index, offset);
3439 check_image_health(image_index, stat);
3440
3441 /* For char arrays: create the padding array, when dst is longer than src. */
3442 if (dest_char_array_is_longer)
3443 {
3444 const size_t pad_num = (dst_size / dst_kind) - (src_size / src_kind);
3445 const size_t pad_sz = pad_num * dst_kind;
3446 /* For big arrays alloca() may not be able to get the memory on the stack.
3447 * Use a regular malloc then. */
3448 if ((free_pad_str = ((pad_str = alloca(pad_sz)) == NULL)))
3449 {
3450 pad_str = malloc(pad_sz);
3451 if (t_buff == NULL)
3452 caf_runtime_error("Unable to allocate memory "
3453 "for internal buffer in get().");
3454 }
3455 if (dst_kind == 1)
3456 memset(pad_str, ' ', pad_num);
3457 else /* dst_kind == 4. */
3458 {
3459 for (int32_t *it = (int32_t *) pad_str,
3460 *itEnd = ((int32_t *) pad_str) + pad_num; it < itEnd; ++it)
3461 {
3462 *it = (int32_t) ' ';
3463 }
3464 }
3465 }
3466
3467 if (src_contiguous && dst_contiguous && src_vector == NULL)
3468 {
3469 if (same_image)
3470 {
3471 dprint("in caf_this == image_index, size = %zd, "
3472 "dst_kind = %d, src_kind = %d\n",
3473 size, dst_kind, src_kind);
3474 if (dst_type == BT_CHARACTER)
3475 /* The size is encoded in the descriptor's type for char arrays. */
3476 copy_char_to_self(src->base_addr, src_type, src_size, src_kind,
3477 dest->base_addr, dst_type, dst_size, dst_kind,
3478 size, src_rank == 0);
3479 else
3480 copy_to_self(src, src_kind, dest, dst_kind, size, stat);
3481 return;
3482 }
3483 else
3484 {
3485 CAF_Win_lock(MPI_LOCK_SHARED, remote_image, *p);
3486 if (dst_kind != src_kind || dest_char_array_is_longer || src_rank == 0)
3487 {
3488 if ((free_t_buff = ((t_buff = alloca(src_size * size)) == NULL)))
3489 {
3490 t_buff = malloc(src_size * size);
3491 if (t_buff == NULL)
3492 caf_runtime_error("Unable to allocate memory "
3493 "for internal buffer in get().");
3494 }
3495 }
3496
3497 if ((same_type_and_kind && dst_rank == src_rank)
3498 || dst_type == BT_CHARACTER)
3499 {
3500 if (!dest_char_array_is_longer
3501 && (dst_kind == src_kind || dst_type != BT_CHARACTER))
3502 {
3503 const size_t trans_size =
3504 ((dst_size > src_size) ? src_size : dst_size) * size;
3505 ierr = MPI_Get(dest->base_addr, trans_size, MPI_BYTE, remote_image,
3506 offset, trans_size, MPI_BYTE, *p); chk_err(ierr);
3507 }
3508 else
3509 {
3510 ierr = MPI_Get(t_buff, src_size, MPI_BYTE, remote_image,
3511 offset, src_size, MPI_BYTE, *p); chk_err(ierr);
3512 copy_char_to_self(t_buff, src_type, src_size, src_kind,
3513 dest->base_addr, dst_type, dst_size,
3514 dst_kind, size, src_rank == 0);
3515 }
3516 }
3517 else
3518 {
3519 ierr = MPI_Get(t_buff, src_size * size, MPI_BYTE, remote_image, offset,
3520 src_size * size, MPI_BYTE, *p); chk_err(ierr);
3521 convert_with_strides(dest->base_addr, dst_type, dst_kind, dst_size,
3522 t_buff, src_type, src_kind,
3523 (src_rank > 0) ? src_size: 0, size, stat);
3524 }
3525 CAF_Win_unlock(remote_image, *p);
3526 }
3527 }
3528 #ifdef STRIDED
3529 else if (!same_image && same_type_and_kind && dst_type != BT_CHARACTER)
3530 {
3531 /* For strided copy, no type and kind conversion, copy to self or
3532 * character arrays are supported. */
3533 MPI_Datatype dt_s, dt_d, base_type_src, base_type_dst;
3534 int *arr_bl;
3535 int *arr_dsp_s, *arr_dsp_d;
3536
3537 selectType(src_size, &base_type_src);
3538 selectType(dst_size, &base_type_dst);
3539
3540 if (src_rank == 1)
3541 {
3542 if (src_vector == NULL)
3543 {
3544 dprint("Setting up mpi datatype vector with stride %d, "
3545 "size %d and offset %d.\n",
3546 src->dim[0]._stride, size, offset);
3547 ierr = MPI_Type_vector(size, 1, src->dim[0]._stride, base_type_src,
3548 &dt_s); chk_err(ierr);
3549 }
3550 else
3551 {
3552 arr_bl = calloc(size, sizeof(int));
3553 arr_dsp_s = calloc(size, sizeof(int));
3554
3555 dprint("Setting up strided vector index.\n",
3556 caf_this_image, caf_num_images, __FUNCTION__);
3557 #define KINDCASE(kind, type) \
3558 case kind: \
3559 for (i = 0; i < size; ++i) \
3560 { \
3561 arr_dsp_s[i] = ((ptrdiff_t) \
3562 ((type *) src_vector->u.v.vector)[i] - src->dim[0].lower_bound); \
3563 arr_bl[i] = 1; \
3564 } \
3565 break
3566 switch (src_vector->u.v.kind)
3567 {
3568 KINDCASE(1, int8_t);
3569 KINDCASE(2, int16_t);
3570 KINDCASE(4, int32_t);
3571 KINDCASE(8, int64_t);
3572 #ifdef HAVE_GFC_INTEGER_16
3573 KINDCASE(16, __int128);
3574 #endif
3575 default:
3576 caf_runtime_error(unreachable);
3577 return;
3578 }
3579 #undef KINDCASE
3580 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
3581 chk_err(ierr);
3582 free(arr_bl);
3583 free(arr_dsp_s);
3584 }
3585 ierr = MPI_Type_vector(size, 1, dest->dim[0]._stride, base_type_dst,
3586 &dt_d); chk_err(ierr);
3587 }
3588 else
3589 {
3590 arr_bl = calloc(size, sizeof(int));
3591 arr_dsp_s = calloc(size, sizeof(int));
3592 arr_dsp_d = calloc(size, sizeof(int));
3593
3594 for (i = 0; i < size; ++i)
3595 {
3596 arr_bl[i] = 1;
3597 }
3598
3599 for (i = 0; i < size; ++i)
3600 {
3601 ptrdiff_t array_offset_sr = 0, extent = 1, tot_ext = 1;
3602 if (src_vector == NULL)
3603 {
3604 for (j = 0; j < src_rank - 1; ++j)
3605 {
3606 extent = src->dim[j]._ubound - src->dim[j].lower_bound + 1;
3607 array_offset_sr += ((i / tot_ext) % extent) * src->dim[j]._stride;
3608 tot_ext *= extent;
3609 }
3610
3611 array_offset_sr += (i / tot_ext) * src->dim[src_rank - 1]._stride;
3612 }
3613 else
3614 {
3615 #define KINDCASE(kind, type) \
3616 case kind: \
3617 array_offset_sr = ((ptrdiff_t) \
3618 ((type *) src_vector->u.v.vector)[i] - src->dim[0].lower_bound); \
3619 break
3620 switch (src_vector->u.v.kind)
3621 {
3622 KINDCASE(1, int8_t);
3623 KINDCASE(2, int16_t);
3624 KINDCASE(4, int32_t);
3625 KINDCASE(8, int64_t);
3626 #ifdef HAVE_GFC_INTEGER_16
3627 KINDCASE(16, __int128);
3628 #endif
3629 default:
3630 caf_runtime_error(unreachable);
3631 return;
3632 }
3633 #undef KINDCASE
3634 }
3635 arr_dsp_s[i] = array_offset_sr;
3636
3637 if (dst_rank != 0)
3638 {
3639 ptrdiff_t array_offset_dst = 0;
3640 extent = 1;
3641 tot_ext = 1;
3642 for (j = 0; j < dst_rank - 1; ++j)
3643 {
3644 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
3645 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
3646 tot_ext *= extent;
3647 }
3648
3649 array_offset_dst += (i / tot_ext) * dest->dim[src_rank - 1]._stride;
3650 arr_dsp_d[i] = array_offset_dst;
3651 }
3652 else
3653 arr_dsp_d[i] = 0;
3654 }
3655
3656 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
3657 chk_err(ierr);
3658 ierr = MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
3659 chk_err(ierr);
3660
3661 free(arr_bl);
3662 free(arr_dsp_s);
3663 free(arr_dsp_d);
3664 }
3665
3666 ierr = MPI_Type_commit(&dt_s); chk_err(ierr);
3667 ierr = MPI_Type_commit(&dt_d); chk_err(ierr);
3668
3669 CAF_Win_lock(MPI_LOCK_SHARED, remote_image, *p);
3670 ierr = MPI_Get(dest->base_addr, 1, dt_d, remote_image, offset, 1, dt_s, *p);
3671 chk_err(ierr);
3672 CAF_Win_unlock(remote_image, *p);
3673
3674 #ifdef WITH_FAILED_IMAGES
3675 check_image_health(image_index, stat);
3676
3677 if (!stat && ierr == STAT_FAILED_IMAGE)
3678 terminate_internal(ierr, 1);
3679
3680 if (stat)
3681 *stat = ierr;
3682 #else
3683 if (ierr != 0)
3684 {
3685 terminate_internal(ierr, 1);
3686 return;
3687 }
3688 #endif
3689 ierr = MPI_Type_free(&dt_s); chk_err(ierr);
3690 ierr = MPI_Type_free(&dt_d); chk_err(ierr);
3691 }
3692 #endif // STRIDED
3693 else
3694 {
3695 if (same_image && mrt)
3696 {
3697 if ((free_t_buff = (((t_buff = alloca(src_size * size))) == NULL)))
3698 {
3699 t_buff = malloc(src_size * size);
3700 if (t_buff == NULL)
3701 caf_runtime_error("Unable to allocate memory "
3702 "for internal buffer in get().");
3703 }
3704 }
3705 else if (!same_type_and_kind && !same_image)
3706 {
3707 if ((free_t_buff = (((t_buff = alloca(src_size))) == NULL)))
3708 {
3709 t_buff = malloc(src_size);
3710 if (t_buff == NULL)
3711 caf_runtime_error("Unable to allocate memory "
3712 "for internal buffer in get().");
3713 }
3714 }
3715
3716 if (!same_image)
3717 CAF_Win_lock(MPI_LOCK_SHARED, remote_image, *p);
3718 for (i = 0; i < size; ++i)
3719 {
3720 ptrdiff_t array_offset_sr = 0, extent = 1, tot_ext = 1;
3721 if (src_vector == NULL)
3722 {
3723 for (j = 0; j < src_rank - 1; ++j)
3724 {
3725 extent = src->dim[j]._ubound - src->dim[j].lower_bound + 1;
3726 array_offset_sr += ((i / tot_ext) % extent) * src->dim[j]._stride;
3727 tot_ext *= extent;
3728 }
3729
3730 array_offset_sr += (i / tot_ext) * src->dim[src_rank - 1]._stride;
3731 }
3732 else
3733 {
3734 #define KINDCASE(kind, type) \
3735 case kind: \
3736 array_offset_sr = ((ptrdiff_t) \
3737 ((type *)src_vector->u.v.vector)[i] - src->dim[0].lower_bound); \
3738 break
3739 switch (src_vector->u.v.kind)
3740 {
3741 KINDCASE(1, int8_t);
3742 KINDCASE(2, int16_t);
3743 KINDCASE(4, int32_t);
3744 KINDCASE(8, int64_t);
3745 #ifdef HAVE_GFC_INTEGER_16
3746 KINDCASE(16, __int128);
3747 #endif
3748 default:
3749 caf_runtime_error(unreachable);
3750 return;
3751 }
3752 }
3753 src_offset = array_offset_sr * src_size;
3754 #undef KINDCASE
3755
3756 void *dst;
3757 if (!same_image || !mrt)
3758 {
3759 if (dst_rank != 0)
3760 {
3761 ptrdiff_t array_offset_dst = 0;
3762 extent = 1;
3763 tot_ext = 1;
3764 for (j = 0; j < dst_rank - 1; ++j)
3765 {
3766 extent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
3767 array_offset_dst += ((i / tot_ext) % extent) * dest->dim[j]._stride;
3768 tot_ext *= extent;
3769 }
3770
3771 array_offset_dst += (i / tot_ext) * dest->dim[dst_rank - 1]._stride;
3772 dst = (void *)((char *)dest->base_addr + array_offset_dst * dst_size);
3773 }
3774 else
3775 dst = dest->base_addr;
3776 }
3777
3778 if (!same_image)
3779 {
3780 // Do the more likely first.
3781 dprint("kind(dst) = %d, el_sz(dst) = %zd, "
3782 "kind(src) = %d, el_sz(src) = %zd, lb(dst) = %zd.\n",
3783 dst_kind, dst_size, src_kind, src_size, src->dim[0].lower_bound);
3784 if (same_type_and_kind)
3785 {
3786 const size_t trans_size = (src_size < dst_size) ? src_size : dst_size;
3787 ierr = MPI_Get(dst, trans_size, MPI_BYTE, remote_image,
3788 offset + src_offset, trans_size, MPI_BYTE, *p);
3789 chk_err(ierr);
3790 if (pad_str)
3791 memcpy((void *)((char *)dst + src_size), pad_str,
3792 dst_size - src_size);
3793 }
3794 else if (dst_type == BT_CHARACTER)
3795 {
3796 ierr = MPI_Get(t_buff, src_size, MPI_BYTE, remote_image,
3797 offset + src_offset, src_size, MPI_BYTE, *p);
3798 chk_err(ierr);
3799 copy_char_to_self(t_buff, src_type, src_size, src_kind,
3800 dst, dst_type, dst_size, dst_kind, 1, true);
3801 }
3802 else
3803 {
3804 ierr = MPI_Get(t_buff, src_size, MPI_BYTE, remote_image,
3805 offset + src_offset, src_size, MPI_BYTE, *p);
3806 chk_err(ierr);
3807 convert_type(dst, dst_type, dst_kind, t_buff,
3808 src_type, src_kind, stat);
3809 }
3810 }
3811 else
3812 {
3813 if (!mrt)
3814 {
3815 dprint("strided same_image, no temp, for i = %zd, "
3816 "src_offset = %zd, offset = %zd.\n",
3817 i, src_offset, offset);
3818 if (same_type_and_kind)
3819 memmove(dst, src->base_addr + src_offset, src_size);
3820 else
3821 convert_type(dst, dst_type, dst_kind,
3822 src->base_addr + src_offset, src_type, src_kind, stat);
3823 }
3824 else
3825 {
3826 dprint("strided same_image, *WITH* temp, for i = %zd.\n", i);
3827 if (same_type_and_kind)
3828 memmove(t_buff + i * dst_size,
3829 src->base_addr + src_offset, src_size);
3830 else
3831 convert_type(t_buff + i * dst_size, dst_type, dst_kind,
3832 src->base_addr + src_offset, src_type, src_kind, stat);
3833 }
3834 }
3835
3836 #ifndef WITH_FAILED_IMAGES
3837 if (ierr != 0)
3838 {
3839 caf_runtime_error("MPI Error: %d", ierr);
3840 return;
3841 }
3842 #endif
3843 }
3844 if (!same_image)
3845 CAF_Win_unlock(remote_image, *p);
3846
3847 if (same_image && mrt)
3848 {
3849 dprint("Same image temporary move.\n");
3850 memmove(dest->base_addr, t_buff, size * dst_size);
3851 }
3852 }
3853
3854 /* Free memory, when not allocated on stack. */
3855 if (free_t_buff)
3856 free(t_buff);
3857 if (free_pad_str)
3858 free(pad_str);
3859
3860 #ifdef WITH_FAILED_IMAGES
3861 /* Catch failed images, when failed image support is active. */
3862 check_image_health(image_index , stat);
3863 #endif
3864
3865 if (ierr != MPI_SUCCESS)
3866 {
3867 int mpi_error;
3868 MPI_Error_class(ierr, &mpi_error);
3869 if (stat)
3870 *stat = mpi_error;
3871 else
3872 {
3873 int error_len = 2048 - 11;
3874 char error_str[error_len + 11];
3875 strcpy(error_str, "MPI-error: ");
3876 MPI_Error_string(mpi_error, &error_str[11], &error_len);
3877 error_stop_str(error_str, error_len + 11, false);
3878 }
3879 }
3880 }
3881
3882
3883 #ifdef GCC_GE_7
3884 /* Get a chunk of data from one image to the current one, with type conversion.
3885 *
3886 * Copied from the gcc:libgfortran/caf/single.c. Can't say much about it. */
3887 static void
get_data(void * ds,mpi_caf_token_t * token,MPI_Aint offset,int dst_type,int src_type,int dst_kind,int src_kind,size_t dst_size,size_t src_size,size_t num,int * stat,int image_index)3888 get_data(void *ds, mpi_caf_token_t *token, MPI_Aint offset, int dst_type,
3889 int src_type, int dst_kind, int src_kind, size_t dst_size,
3890 size_t src_size, size_t num, int *stat, int image_index)
3891 {
3892 size_t k;
3893 int ierr;
3894 MPI_Win win = (token == NULL) ? global_dynamic_win : token->memptr_win;
3895 #ifdef EXTRA_DEBUG_OUTPUT
3896 if (token)
3897 dprint("%p = win(%d): %d -> offset: %zd of size %zd -> %zd, "
3898 "dst type %d(%d), src type %d(%d)\n",
3899 ds, win, image_index + 1, offset, src_size, dst_size,
3900 dst_type, dst_kind, src_type, src_kind);
3901 else
3902 dprint("%p = global_win(%d) offset: %zd (%zd) of size %zd -> %zd, "
3903 "dst type %d(%d), src type %d(%d)\n",
3904 ds, image_index + 1, offset, offset, src_size, dst_size,
3905 dst_type, dst_kind, src_type, src_kind);
3906 #endif
3907 if (dst_type == src_type && dst_kind == src_kind)
3908 {
3909 size_t sz = ((dst_size > src_size) ? src_size : dst_size) * num;
3910 ierr = MPI_Get(ds, sz, MPI_BYTE, image_index, offset, sz, MPI_BYTE, win);
3911 chk_err(ierr);
3912 if ((dst_type == BT_CHARACTER || src_type == BT_CHARACTER)
3913 && dst_size > src_size)
3914 {
3915 if (dst_kind == 1)
3916 {
3917 memset((void*)(char*) ds + src_size, ' ', dst_size - src_size);
3918 }
3919 else /* dst_kind == 4. */
3920 {
3921 for (k = src_size / 4; k < dst_size / 4; k++)
3922 ((int32_t*) ds)[k] = (int32_t) ' ';
3923 }
3924 }
3925 }
3926 else if (dst_type == BT_CHARACTER && dst_kind == 1)
3927 {
3928 /* Get the required amount of memory on the stack. */
3929 void *srh = alloca(src_size);
3930 ierr = MPI_Get(srh, src_size, MPI_BYTE, image_index, offset, src_size,
3931 MPI_BYTE, win); chk_err(ierr);
3932 /* Get of the data needs to be finished before converting the data. */
3933 ierr = MPI_Win_flush(image_index, win); chk_err(ierr);
3934 assign_char1_from_char4(dst_size, src_size, ds, srh);
3935 }
3936 else if (dst_type == BT_CHARACTER)
3937 {
3938 /* Get the required amount of memory on the stack. */
3939 void *srh = alloca(src_size);
3940 ierr = MPI_Get(srh, src_size, MPI_BYTE, image_index, offset, src_size,
3941 MPI_BYTE, win); chk_err(ierr);
3942 /* Get of the data needs to be finished before converting the data. */
3943 ierr = MPI_Win_flush(image_index, win); chk_err(ierr);
3944 assign_char4_from_char1(dst_size, src_size, ds, srh);
3945 }
3946 else
3947 {
3948 /* Get the required amount of memory on the stack. */
3949 void *srh = alloca(src_size * num);
3950 dprint("type/kind convert %zd items: "
3951 "type %d(%d) -> type %d(%d), local buffer: %p\n",
3952 num, src_type, src_kind, dst_type, dst_kind, srh);
3953 ierr = MPI_Get(srh, src_size * num, MPI_BYTE, image_index, offset,
3954 src_size * num, MPI_BYTE, win); chk_err(ierr);
3955 /* Get of the data needs to be finished before converting the data. */
3956 ierr = MPI_Win_flush(image_index, win); chk_err(ierr);
3957 dprint("srh[0] = %d, ierr = %d\n", (int)((char *)srh)[0], ierr);
3958 for (k = 0; k < num; ++k)
3959 {
3960 convert_type(ds, dst_type, dst_kind, srh, src_type, src_kind, stat);
3961 ds += dst_size;
3962 srh += src_size;
3963 }
3964 }
3965 }
3966
3967
3968 /* Compute the number of items referenced.
3969 *
3970 * Computes the number of items between lower bound (lb) and upper bound (ub)
3971 * with respect to the stride taking corner cases into account. */
3972 #define COMPUTE_NUM_ITEMS(num, stride, lb, ub) \
3973 do \
3974 { \
3975 ptrdiff_t abs_stride = (stride) > 0 ? (stride) : -(stride); \
3976 num = (stride) > 0 ? (ub) + 1 - (lb) : (lb) + 1 - (ub); \
3977 if (num <= 0 || abs_stride < 1) return; \
3978 num = (abs_stride > 1) ? (1 + (num - 1) / abs_stride) : num; \
3979 } while (0)
3980
3981
3982 /* Convenience macro to get the extent of a descriptor in a certain dimension
3983 *
3984 * Copied from gcc:libgfortran/libgfortran.h. */
3985 #define GFC_DESCRIPTOR_EXTENT(desc,i) \
3986 ((desc)->dim[i]._ubound + 1 - (desc)->dim[i].lower_bound)
3987
3988
3989 #define sizeof_desc_for_rank(rank) \
3990 (sizeof(gfc_descriptor_t) + (rank) * sizeof(descriptor_dimension))
3991
3992 /* Define the descriptor of max rank.
3993 *
3994 * This typedef is made to allow storing a copy of a remote descriptor on the
3995 * stack without having to care about the rank. */
3996 typedef struct gfc_max_dim_descriptor_t
3997 {
3998 gfc_descriptor_t base;
3999 descriptor_dimension dim[GFC_MAX_DIMENSIONS];
4000 } gfc_max_dim_descriptor_t;
4001
4002 typedef struct gfc_dim1_descriptor_t
4003 {
4004 gfc_descriptor_t base;
4005 descriptor_dimension dim[1];
4006 } gfc_dim1_descriptor_t;
4007
4008 static void
get_for_ref(caf_reference_t * ref,size_t * i,size_t dst_index,mpi_caf_token_t * mpi_token,gfc_descriptor_t * dst,gfc_descriptor_t * src,void * ds,void * sr,ptrdiff_t sr_byte_offset,ptrdiff_t desc_byte_offset,int dst_kind,int src_kind,size_t dst_dim,size_t src_dim,size_t num,int * stat,int global_dynamic_win_rank,int memptr_win_rank,bool sr_global,bool desc_global,int src_type)4009 get_for_ref(caf_reference_t *ref, size_t *i, size_t dst_index,
4010 mpi_caf_token_t *mpi_token, gfc_descriptor_t *dst,
4011 gfc_descriptor_t *src, void *ds, void *sr,
4012 ptrdiff_t sr_byte_offset, ptrdiff_t desc_byte_offset,
4013 int dst_kind, int src_kind, size_t dst_dim, size_t src_dim,
4014 size_t num, int *stat,
4015 int global_dynamic_win_rank, int memptr_win_rank,
4016 bool sr_global, /* access sr through global_dynamic_win */
4017 bool desc_global /* access desc through global_dynamic_win */
4018 #ifdef GCC_GE_8
4019 , int src_type)
4020 {
4021 #else
4022 )
4023 {
4024 int src_type = -1;
4025 #endif
4026 ptrdiff_t extent_src = 1, array_offset_src = 0, stride_src;
4027 size_t next_dst_dim, ref_rank;
4028 gfc_max_dim_descriptor_t src_desc_data;
4029 int ierr;
4030
4031 if (unlikely(ref == NULL))
4032 {
4033 /* May be we should issue an error here, because this case should not
4034 * occur. */
4035 return;
4036 }
4037
4038 dprint("sr_offset = %zd, sr = %p, desc_offset = %zd, src = %p, "
4039 "sr_glb = %d, desc_glb = %d\n",
4040 sr_byte_offset, sr, desc_byte_offset, src, sr_global, desc_global);
4041
4042 if (ref->next == NULL)
4043 {
4044 size_t dst_size = GFC_DESCRIPTOR_SIZE(dst);
4045
4046 switch (ref->type)
4047 {
4048 case CAF_REF_COMPONENT:
4049 dprint("caf_offset = %zd\n", ref->u.c.caf_token_offset);
4050 if (ref->u.c.caf_token_offset > 0)
4051 {
4052 sr_byte_offset += ref->u.c.offset;
4053 if (sr_global)
4054 {
4055 ierr = MPI_Get(&sr, stdptr_size, MPI_BYTE, global_dynamic_win_rank,
4056 MPI_Aint_add((MPI_Aint)sr, sr_byte_offset),
4057 stdptr_size, MPI_BYTE, global_dynamic_win);
4058 chk_err(ierr);
4059 desc_global = true;
4060 }
4061 else
4062 {
4063 ierr = MPI_Get(&sr, stdptr_size, MPI_BYTE, memptr_win_rank,
4064 sr_byte_offset, stdptr_size, MPI_BYTE,
4065 mpi_token->memptr_win); chk_err(ierr);
4066 sr_global = true;
4067 }
4068 sr_byte_offset = 0;
4069 }
4070 else
4071 sr_byte_offset += ref->u.c.offset;
4072 if (sr_global)
4073 {
4074 get_data(ds, NULL, MPI_Aint_add((MPI_Aint)sr, sr_byte_offset),
4075 GFC_DESCRIPTOR_TYPE(dst),
4076 #ifdef GCC_GE_8
4077 (src_type != -1) ? src_type : GFC_DESCRIPTOR_TYPE (dst),
4078 #else
4079 GFC_DESCRIPTOR_TYPE(dst),
4080 #endif
4081 dst_kind, src_kind, dst_size, ref->item_size, 1, stat,
4082 global_dynamic_win_rank);
4083 }
4084 else
4085 {
4086 get_data(ds, mpi_token, sr_byte_offset, GFC_DESCRIPTOR_TYPE(dst),
4087 #ifdef GCC_GE_8
4088 src_type,
4089 #else
4090 GFC_DESCRIPTOR_TYPE(src),
4091 #endif
4092 dst_kind, src_kind, dst_size, ref->item_size, 1, stat,
4093 memptr_win_rank);
4094 }
4095 ++(*i);
4096 return;
4097 case CAF_REF_STATIC_ARRAY:
4098 src_type = ref->u.a.static_array_type;
4099 /* Intentionally fall through. */
4100 case CAF_REF_ARRAY:
4101 if (ref->u.a.mode[src_dim] == CAF_ARR_REF_NONE)
4102 {
4103 if (sr_global)
4104 {
4105 get_data(ds + dst_index * dst_size, NULL,
4106 MPI_Aint_add((MPI_Aint)sr, sr_byte_offset),
4107 GFC_DESCRIPTOR_TYPE(dst),
4108 #ifdef GCC_GE_8
4109 (src_type != -1) ? src_type : GFC_DESCRIPTOR_TYPE (src),
4110 #else
4111 (src_type == -1) ? GFC_DESCRIPTOR_TYPE(src) : src_type,
4112 #endif
4113 dst_kind, src_kind, dst_size, ref->item_size, num,
4114 stat, global_dynamic_win_rank);
4115 }
4116 else
4117 {
4118 get_data(ds + dst_index * dst_size, mpi_token,
4119 sr_byte_offset, GFC_DESCRIPTOR_TYPE(dst),
4120 #ifdef GCC_GE_8
4121 (src_type != -1) ? src_type : GFC_DESCRIPTOR_TYPE (src),
4122 #else
4123 (src_type == -1) ? GFC_DESCRIPTOR_TYPE(src) : src_type,
4124 #endif
4125 dst_kind, src_kind, dst_size, ref->item_size, num,
4126 stat, memptr_win_rank);
4127 }
4128 *i += num;
4129 return;
4130 }
4131 break;
4132 default:
4133 caf_runtime_error(unreachable);
4134 }
4135 }
4136
4137 switch (ref->type)
4138 {
4139 case CAF_REF_COMPONENT:
4140 if (ref->u.c.caf_token_offset > 0)
4141 {
4142 sr_byte_offset += ref->u.c.offset;
4143 desc_byte_offset = sr_byte_offset;
4144 if (sr_global)
4145 {
4146 ierr = MPI_Get(&sr, stdptr_size, MPI_BYTE, global_dynamic_win_rank,
4147 MPI_Aint_add((MPI_Aint)sr, sr_byte_offset),
4148 stdptr_size, MPI_BYTE, global_dynamic_win);
4149 chk_err(ierr);
4150 desc_global = true;
4151 }
4152 else
4153 {
4154 ierr = MPI_Get(&sr, stdptr_size, MPI_BYTE, memptr_win_rank,
4155 sr_byte_offset, stdptr_size, MPI_BYTE,
4156 mpi_token->memptr_win); chk_err(ierr);
4157 sr_global = true;
4158 }
4159 sr_byte_offset = 0;
4160 }
4161 else
4162 {
4163 sr_byte_offset += ref->u.c.offset;
4164 desc_byte_offset += ref->u.c.offset;
4165 }
4166 get_for_ref(ref->next, i, dst_index, mpi_token, dst, NULL, ds,
4167 sr, sr_byte_offset, desc_byte_offset, dst_kind, src_kind,
4168 dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank,
4169 sr_global, desc_global
4170 #ifdef GCC_GE_8
4171 , src_type
4172 #endif
4173 );
4174 return;
4175 case CAF_REF_ARRAY:
4176 if (ref->u.a.mode[src_dim] == CAF_ARR_REF_NONE)
4177 {
4178 get_for_ref(ref->next, i, dst_index, mpi_token, dst, src, ds, sr,
4179 sr_byte_offset, desc_byte_offset, dst_kind, src_kind,
4180 dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank,
4181 sr_global, desc_global
4182 #ifdef GCC_GE_8
4183 , src_type
4184 #endif
4185 );
4186 return;
4187 }
4188 /* Only when on the left most index switch the data pointer to the
4189 * array's data pointer. */
4190 if (src_dim == 0)
4191 {
4192 if (sr_global)
4193 {
4194 for (ref_rank = 0; ref->u.a.mode[ref_rank] != CAF_ARR_REF_NONE;
4195 ++ref_rank) ;
4196 /* Get the remote descriptor. */
4197 if (desc_global)
4198 {
4199 ierr = MPI_Get(&src_desc_data, sizeof_desc_for_rank(ref_rank),
4200 MPI_BYTE, global_dynamic_win_rank,
4201 MPI_Aint_add((MPI_Aint)sr, desc_byte_offset),
4202 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
4203 global_dynamic_win); chk_err(ierr);
4204 }
4205 else
4206 {
4207 ierr = MPI_Get(&src_desc_data,
4208 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
4209 memptr_win_rank, desc_byte_offset,
4210 sizeof_desc_for_rank(ref_rank),
4211 MPI_BYTE, mpi_token->memptr_win); chk_err(ierr);
4212 desc_global = true;
4213 }
4214 src = (gfc_descriptor_t *)&src_desc_data;
4215 }
4216 else
4217 src = mpi_token->desc;
4218 sr_byte_offset = 0;
4219 desc_byte_offset = 0;
4220 #ifdef EXTRA_DEBUG_OUTPUT
4221 dprint("remote desc rank: %zd (ref_rank: %zd)\n",
4222 GFC_DESCRIPTOR_RANK(src), ref_rank);
4223 for (int r = 0; r < GFC_DESCRIPTOR_RANK(src); ++r)
4224 {
4225 dprint("remote desc dim[%d] = (lb=%zd, ub=%zd, stride=%zd)\n",
4226 r, src->dim[r].lower_bound, src->dim[r]._ubound,
4227 src->dim[r]._stride);
4228 }
4229 #endif
4230 }
4231 switch (ref->u.a.mode[src_dim])
4232 {
4233 case CAF_ARR_REF_VECTOR:
4234 extent_src = GFC_DESCRIPTOR_EXTENT(src, src_dim);
4235 array_offset_src = 0;
4236 for (size_t idx = 0; idx < ref->u.a.dim[src_dim].v.nvec; ++idx)
4237 {
4238 #define KINDCASE(kind, type) \
4239 case kind: \
4240 array_offset_src = (((ptrdiff_t) \
4241 ((type *)ref->u.a.dim[src_dim].v.vector)[idx]) \
4242 - src->dim[src_dim].lower_bound * src->dim[src_dim]._stride); \
4243 break
4244
4245 switch (ref->u.a.dim[src_dim].v.kind)
4246 {
4247 KINDCASE(1, int8_t);
4248 KINDCASE(2, int16_t);
4249 KINDCASE(4, int32_t);
4250 KINDCASE(8, int64_t);
4251 #ifdef HAVE_GFC_INTEGER_16
4252 KINDCASE(16, __int128);
4253 #endif
4254 default:
4255 caf_runtime_error(unreachable);
4256 return;
4257 }
4258 #undef KINDCASE
4259
4260 dprint("vector-index computed to: %zd\n", array_offset_src);
4261 get_for_ref(ref, i, dst_index, mpi_token, dst, src, ds, sr,
4262 sr_byte_offset + array_offset_src * ref->item_size,
4263 desc_byte_offset + array_offset_src * ref->item_size,
4264 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4265 1, stat, global_dynamic_win_rank, memptr_win_rank,
4266 sr_global, desc_global
4267 #ifdef GCC_GE_8
4268 , src_type
4269 #endif
4270 );
4271 dst_index += dst->dim[dst_dim]._stride;
4272 }
4273 return;
4274 case CAF_ARR_REF_FULL:
4275 COMPUTE_NUM_ITEMS(extent_src,
4276 ref->u.a.dim[src_dim].s.stride,
4277 src->dim[src_dim].lower_bound,
4278 src->dim[src_dim]._ubound);
4279 stride_src =
4280 src->dim[src_dim]._stride * ref->u.a.dim[src_dim].s.stride;
4281 array_offset_src = 0;
4282 for (ptrdiff_t idx = 0; idx < extent_src;
4283 ++idx, array_offset_src += stride_src)
4284 {
4285 get_for_ref(ref, i, dst_index, mpi_token, dst, src, ds, sr,
4286 sr_byte_offset + array_offset_src * ref->item_size,
4287 desc_byte_offset + array_offset_src * ref->item_size,
4288 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4289 1, stat, global_dynamic_win_rank, memptr_win_rank,
4290 sr_global, desc_global
4291 #ifdef GCC_GE_8
4292 , src_type
4293 #endif
4294 );
4295 dst_index += dst->dim[dst_dim]._stride;
4296 }
4297 return;
4298 case CAF_ARR_REF_RANGE:
4299 COMPUTE_NUM_ITEMS(extent_src,
4300 ref->u.a.dim[src_dim].s.stride,
4301 ref->u.a.dim[src_dim].s.start,
4302 ref->u.a.dim[src_dim].s.end);
4303 array_offset_src =
4304 (ref->u.a.dim[src_dim].s.start - src->dim[src_dim].lower_bound)
4305 * src->dim[src_dim]._stride;
4306 stride_src =
4307 src->dim[src_dim]._stride * ref->u.a.dim[src_dim].s.stride;
4308 /* Increase the dst_dim only, when the src_extent is greater than one
4309 * or src and dst extent are both one. Don't increase when the scalar
4310 * source is not present in the dst. */
4311 next_dst_dim = (
4312 (extent_src > 1) ||
4313 (GFC_DESCRIPTOR_EXTENT(dst, dst_dim) == 1 && extent_src == 1)
4314 ) ? (dst_dim + 1) : dst_dim;
4315 for (ptrdiff_t idx = 0; idx < extent_src; ++idx)
4316 {
4317 get_for_ref(ref, i, dst_index, mpi_token, dst, src, ds, sr,
4318 sr_byte_offset + array_offset_src * ref->item_size,
4319 desc_byte_offset + array_offset_src * ref->item_size,
4320 dst_kind, src_kind, next_dst_dim, src_dim + 1,
4321 1, stat, global_dynamic_win_rank, memptr_win_rank,
4322 sr_global, desc_global
4323 #ifdef GCC_GE_8
4324 , src_type
4325 #endif
4326 );
4327 dst_index += dst->dim[dst_dim]._stride;
4328 array_offset_src += stride_src;
4329 }
4330 return;
4331 case CAF_ARR_REF_SINGLE:
4332 array_offset_src =
4333 (ref->u.a.dim[src_dim].s.start - src->dim[src_dim].lower_bound)
4334 * src->dim[src_dim]._stride;
4335 get_for_ref(ref, i, dst_index, mpi_token, dst, src, ds, sr,
4336 sr_byte_offset + array_offset_src * ref->item_size,
4337 desc_byte_offset + array_offset_src * ref->item_size,
4338 dst_kind, src_kind, dst_dim, src_dim + 1, 1,
4339 stat, global_dynamic_win_rank, memptr_win_rank,
4340 sr_global, desc_global
4341 #ifdef GCC_GE_8
4342 , src_type
4343 #endif
4344 );
4345 return;
4346 case CAF_ARR_REF_OPEN_END:
4347 COMPUTE_NUM_ITEMS(extent_src,
4348 ref->u.a.dim[src_dim].s.stride,
4349 ref->u.a.dim[src_dim].s.start,
4350 src->dim[src_dim]._ubound);
4351 stride_src =
4352 src->dim[src_dim]._stride * ref->u.a.dim[src_dim].s.stride;
4353 array_offset_src = (ref->u.a.dim[src_dim].s.start
4354 - src->dim[src_dim].lower_bound)
4355 * src->dim[src_dim]._stride;
4356 for (ptrdiff_t idx = 0; idx < extent_src; ++idx)
4357 {
4358 get_for_ref(ref, i, dst_index, mpi_token, dst, src, ds, sr,
4359 sr_byte_offset + array_offset_src * ref->item_size,
4360 desc_byte_offset + array_offset_src * ref->item_size,
4361 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4362 1, stat, global_dynamic_win_rank, memptr_win_rank,
4363 sr_global, desc_global
4364 #ifdef GCC_GE_8
4365 , src_type
4366 #endif
4367 );
4368 dst_index += dst->dim[dst_dim]._stride;
4369 array_offset_src += stride_src;
4370 }
4371 return;
4372 case CAF_ARR_REF_OPEN_START:
4373 COMPUTE_NUM_ITEMS(extent_src,
4374 ref->u.a.dim[src_dim].s.stride,
4375 src->dim[src_dim].lower_bound,
4376 ref->u.a.dim[src_dim].s.end);
4377 stride_src =
4378 src->dim[src_dim]._stride * ref->u.a.dim[src_dim].s.stride;
4379 array_offset_src = 0;
4380 for (ptrdiff_t idx = 0; idx < extent_src; ++idx)
4381 {
4382 get_for_ref(ref, i, dst_index, mpi_token, dst, src, ds, sr,
4383 sr_byte_offset + array_offset_src * ref->item_size,
4384 desc_byte_offset + array_offset_src * ref->item_size,
4385 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4386 1, stat, global_dynamic_win_rank, memptr_win_rank,
4387 sr_global, desc_global
4388 #ifdef GCC_GE_8
4389 , src_type
4390 #endif
4391 );
4392 dst_index += dst->dim[dst_dim]._stride;
4393 array_offset_src += stride_src;
4394 }
4395 return;
4396 default:
4397 caf_runtime_error(unreachable);
4398 }
4399 return;
4400 case CAF_REF_STATIC_ARRAY:
4401 if (ref->u.a.mode[src_dim] == CAF_ARR_REF_NONE)
4402 {
4403 get_for_ref(ref->next, i, dst_index, mpi_token, dst, NULL, ds, sr,
4404 sr_byte_offset, desc_byte_offset, dst_kind, src_kind,
4405 dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank,
4406 sr_global, desc_global
4407 #ifdef GCC_GE_8
4408 , src_type
4409 #endif
4410 );
4411 return;
4412 }
4413 switch (ref->u.a.mode[src_dim])
4414 {
4415 case CAF_ARR_REF_VECTOR:
4416 array_offset_src = 0;
4417 for (size_t idx = 0; idx < ref->u.a.dim[src_dim].v.nvec; ++idx)
4418 {
4419 #define KINDCASE(kind, type) \
4420 case kind: \
4421 array_offset_src = ((type *)ref->u.a.dim[src_dim].v.vector)[idx]; \
4422 break
4423
4424 switch (ref->u.a.dim[src_dim].v.kind)
4425 {
4426 KINDCASE(1, int8_t);
4427 KINDCASE(2, int16_t);
4428 KINDCASE(4, int32_t);
4429 KINDCASE(8, int64_t);
4430 #ifdef HAVE_GFC_INTEGER_16
4431 KINDCASE(16, __int128);
4432 #endif
4433 default:
4434 caf_runtime_error(unreachable);
4435 return;
4436 }
4437 #undef KINDCASE
4438
4439 get_for_ref(ref, i, dst_index, mpi_token, dst, NULL, ds, sr,
4440 sr_byte_offset + array_offset_src * ref->item_size,
4441 desc_byte_offset + array_offset_src * ref->item_size,
4442 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4443 1, stat, global_dynamic_win_rank, memptr_win_rank,
4444 sr_global, desc_global
4445 #ifdef GCC_GE_8
4446 , src_type
4447 #endif
4448 );
4449 dst_index += dst->dim[dst_dim]._stride;
4450 }
4451 return;
4452 case CAF_ARR_REF_FULL:
4453 for (array_offset_src = 0 ;
4454 array_offset_src <= ref->u.a.dim[src_dim].s.end;
4455 array_offset_src += ref->u.a.dim[src_dim].s.stride)
4456 {
4457 get_for_ref(ref, i, dst_index, mpi_token, dst, NULL, ds, sr,
4458 sr_byte_offset + array_offset_src * ref->item_size,
4459 desc_byte_offset + array_offset_src * ref->item_size,
4460 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4461 1, stat, global_dynamic_win_rank, memptr_win_rank,
4462 sr_global, desc_global
4463 #ifdef GCC_GE_8
4464 , src_type
4465 #endif
4466 );
4467 dst_index += dst->dim[dst_dim]._stride;
4468 }
4469 return;
4470 case CAF_ARR_REF_RANGE:
4471 COMPUTE_NUM_ITEMS(extent_src,
4472 ref->u.a.dim[src_dim].s.stride,
4473 ref->u.a.dim[src_dim].s.start,
4474 ref->u.a.dim[src_dim].s.end);
4475 array_offset_src = ref->u.a.dim[src_dim].s.start;
4476 for (ptrdiff_t idx = 0; idx < extent_src; ++idx)
4477 {
4478 get_for_ref(ref, i, dst_index, mpi_token, dst, NULL, ds, sr,
4479 sr_byte_offset + array_offset_src * ref->item_size,
4480 desc_byte_offset + array_offset_src * ref->item_size,
4481 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
4482 1, stat, global_dynamic_win_rank, memptr_win_rank,
4483 sr_global, desc_global
4484 #ifdef GCC_GE_8
4485 , src_type
4486 #endif
4487 );
4488 dst_index += dst->dim[dst_dim]._stride;
4489 array_offset_src += ref->u.a.dim[src_dim].s.stride;
4490 }
4491 return;
4492 case CAF_ARR_REF_SINGLE:
4493 array_offset_src = ref->u.a.dim[src_dim].s.start;
4494 get_for_ref(ref, i, dst_index, mpi_token, dst, NULL, ds, sr,
4495 sr_byte_offset + array_offset_src * ref->item_size,
4496 desc_byte_offset + array_offset_src * ref->item_size,
4497 dst_kind, src_kind, dst_dim, src_dim + 1, 1,
4498 stat, global_dynamic_win_rank, memptr_win_rank,
4499 sr_global, desc_global
4500 #ifdef GCC_GE_8
4501 , src_type
4502 #endif
4503 );
4504 return;
4505 /* The OPEN_* are mapped to a RANGE and therefore can not occur. */
4506 case CAF_ARR_REF_OPEN_END:
4507 case CAF_ARR_REF_OPEN_START:
4508 default:
4509 caf_runtime_error(unreachable);
4510 }
4511 return;
4512 default:
4513 caf_runtime_error(unreachable);
4514 }
4515 }
4516
4517 void
4518 PREFIX(get_by_ref) (caf_token_t token, int image_index,
4519 gfc_descriptor_t *dst, caf_reference_t *refs,
4520 int dst_kind, int src_kind,
4521 bool may_require_tmp __attribute__((unused)),
4522 bool dst_reallocatable, int *stat
4523 #ifdef GCC_GE_8
4524 , int src_type
4525 #endif
4526 )
4527 {
4528 const char vecrefunknownkind[] =
4529 "libcaf_mpi::caf_get_by_ref(): unknown kind in vector-ref.\n";
4530 const char unknownreftype[] =
4531 "libcaf_mpi::caf_get_by_ref(): unknown reference type.\n";
4532 const char unknownarrreftype[] =
4533 "libcaf_mpi::caf_get_by_ref(): unknown array reference type.\n";
4534 const char rankoutofrange[] =
4535 "libcaf_mpi::caf_get_by_ref(): rank out of range.\n";
4536 const char extentoutofrange[] =
4537 "libcaf_mpi::caf_get_by_ref(): extent out of range.\n";
4538 const char cannotallocdst[] =
4539 "libcaf_mpi::caf_get_by_ref(): can not allocate %d bytes of memory.\n";
4540 const char nonallocextentmismatch[] =
4541 "libcaf_mpi::caf_get_by_ref(): extent of non-allocatable arrays "
4542 "mismatch (%lu != %lu).\n";
4543 const char doublearrayref[] =
4544 "libcaf_mpi::caf_get_by_ref(): two or more array part references "
4545 "are not supported.\n";
4546 size_t size, i, ref_rank, dst_index, src_size;
4547 int ierr, dst_rank = GFC_DESCRIPTOR_RANK(dst), dst_cur_dim = 0;
4548 mpi_caf_token_t *mpi_token = (mpi_caf_token_t *) token;
4549 void *remote_memptr = mpi_token->memptr, *remote_base_memptr = NULL;
4550 gfc_max_dim_descriptor_t src_desc;
4551 gfc_descriptor_t *src = (gfc_descriptor_t *)&src_desc;
4552 caf_reference_t *riter = refs;
4553 long delta;
4554 ptrdiff_t data_offset = 0, desc_offset = 0;
4555 /* Reallocation of dst.data is needed (e.g., array to small). */
4556 bool realloc_needed;
4557 /* Reallocation of dst.data is required, because data is not alloced at
4558 * all. */
4559 bool realloc_required, extent_mismatch = false;
4560 /* Set when the first non-scalar array reference is encountered. */
4561 bool in_array_ref = false, array_extent_fixed = false;
4562 /* Set when remote data is to be accessed through the
4563 * global dynamic window. */
4564 bool access_data_through_global_win = false;
4565 /* Set when the remote descriptor is to accessed through the global window. */
4566 bool access_desc_through_global_win = false;
4567 caf_array_ref_t array_ref;
4568
4569 realloc_needed = realloc_required = dst->base_addr == NULL;
4570
4571 if (stat)
4572 *stat = 0;
4573
4574 MPI_Group current_team_group, win_group;
4575 int global_dynamic_win_rank, memptr_win_rank;
4576 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
4577 ierr = MPI_Win_get_group(global_dynamic_win, &win_group); chk_err(ierr);
4578 ierr = MPI_Group_translate_ranks(current_team_group, 1,
4579 (int[]){image_index - 1}, win_group,
4580 &global_dynamic_win_rank); chk_err(ierr);
4581 ierr = MPI_Group_free(&win_group); chk_err(ierr);
4582 ierr = MPI_Win_get_group(mpi_token->memptr_win, &win_group); chk_err(ierr);
4583 ierr = MPI_Group_translate_ranks(current_team_group, 1,
4584 (int[]){image_index - 1}, win_group,
4585 &memptr_win_rank); chk_err(ierr);
4586 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
4587 ierr = MPI_Group_free(&win_group); chk_err(ierr);
4588
4589 check_image_health(global_dynamic_win_rank, stat);
4590
4591 dprint("Entering get_by_ref(may_require_tmp = %d).\n", may_require_tmp);
4592
4593 /* Compute the size of the result. In the beginning size just counts the
4594 * number of elements. */
4595 size = 1;
4596 /* Shared lock both windows to prevent bother in the sub-routines. */
4597 CAF_Win_lock(MPI_LOCK_SHARED, global_dynamic_win_rank, global_dynamic_win);
4598 CAF_Win_lock(MPI_LOCK_SHARED, memptr_win_rank, mpi_token->memptr_win);
4599 while (riter)
4600 {
4601 dprint("offset = %zd, remote_mem = %p, access_data(global_win) = %d\n",
4602 data_offset, remote_memptr, access_data_through_global_win);
4603 switch (riter->type)
4604 {
4605 case CAF_REF_COMPONENT:
4606 if (riter->u.c.caf_token_offset > 0)
4607 {
4608 if (access_data_through_global_win)
4609 {
4610 data_offset += riter->u.c.offset;
4611 remote_base_memptr = remote_memptr;
4612 ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE, global_dynamic_win_rank,
4613 MPI_Aint_add((MPI_Aint)remote_memptr, data_offset),
4614 stdptr_size, MPI_BYTE, global_dynamic_win);
4615 chk_err(ierr);
4616 /* On the second indirection access also the remote descriptor
4617 * using the global window. */
4618 access_desc_through_global_win = true;
4619 }
4620 else
4621 {
4622 data_offset += riter->u.c.offset;
4623 ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE, memptr_win_rank,
4624 data_offset, stdptr_size, MPI_BYTE,
4625 mpi_token->memptr_win); chk_err(ierr);
4626 dprint("get(custom_token %d), offset = %zd, res. remote_mem = %p\n",
4627 mpi_token->memptr_win, data_offset, remote_memptr);
4628 /* All future access is through the global dynamic window. */
4629 access_data_through_global_win = true;
4630 }
4631 desc_offset = data_offset;
4632 data_offset = 0;
4633 }
4634 else
4635 {
4636 data_offset += riter->u.c.offset;
4637 desc_offset += riter->u.c.offset;
4638 }
4639 break;
4640 case CAF_REF_ARRAY:
4641 /* When there has been no CAF_REF_COMP before hand, then the
4642 * descriptor is stored in the token and the extends are the same on
4643 * all images, which is taken care of in the else part. */
4644 if (access_data_through_global_win)
4645 {
4646 for (ref_rank = 0; riter->u.a.mode[ref_rank] != CAF_ARR_REF_NONE;
4647 ++ref_rank) ;
4648 /* Get the remote descriptor and use the stack to store it. Note,
4649 * src may be pointing to mpi_token->desc therefore it needs to be
4650 * reset here. */
4651 src = (gfc_descriptor_t *)&src_desc;
4652 if (access_desc_through_global_win)
4653 {
4654 dprint("remote desc fetch from %p, offset = %zd\n",
4655 remote_base_memptr, desc_offset);
4656 MPI_Get(src, sizeof_desc_for_rank(ref_rank), MPI_BYTE, global_dynamic_win_rank,
4657 MPI_Aint_add((MPI_Aint)remote_base_memptr, desc_offset),
4658 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
4659 global_dynamic_win);
4660 }
4661 else
4662 {
4663 dprint("remote desc fetch from win %d, offset = %zd\n",
4664 mpi_token->memptr_win, desc_offset);
4665 MPI_Get(src, sizeof_desc_for_rank(ref_rank), MPI_BYTE, memptr_win_rank,
4666 desc_offset, sizeof_desc_for_rank(ref_rank), MPI_BYTE,
4667 mpi_token->memptr_win);
4668 access_desc_through_global_win = true;
4669 }
4670 }
4671 else
4672 src = mpi_token->desc;
4673
4674 #ifdef EXTRA_DEBUG_OUTPUT
4675 dprint("remote desc rank: %zd (ref_rank: %zd)\n",
4676 GFC_DESCRIPTOR_RANK(src), ref_rank);
4677 for (i = 0; i < GFC_DESCRIPTOR_RANK(src); ++i)
4678 {
4679 dprint("remote desc dim[%zd] = (lb=%zd, ub=%zd, stride=%zd)\n",
4680 i, src->dim[i].lower_bound, src->dim[i]._ubound,
4681 src->dim[i]._stride);
4682 }
4683 #endif
4684 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
4685 {
4686 array_ref = riter->u.a.mode[i];
4687 switch (array_ref)
4688 {
4689 case CAF_ARR_REF_VECTOR:
4690 delta = riter->u.a.dim[i].v.nvec;
4691 #define KINDCASE(kind, type) \
4692 case kind: \
4693 remote_memptr += (((ptrdiff_t) \
4694 ((type *)riter->u.a.dim[i].v.vector)[0]) - src->dim[i].lower_bound) \
4695 * src->dim[i]._stride * riter->item_size; \
4696 break
4697
4698 switch (riter->u.a.dim[i].v.kind)
4699 {
4700 KINDCASE(1, int8_t);
4701 KINDCASE(2, int16_t);
4702 KINDCASE(4, int32_t);
4703 KINDCASE(8, int64_t);
4704 #ifdef HAVE_GFC_INTEGER_16
4705 KINDCASE(16, __int128);
4706 #endif
4707 default:
4708 caf_runtime_error(vecrefunknownkind, stat, NULL, 0);
4709 return;
4710 }
4711 #undef KINDCASE
4712 break;
4713 case CAF_ARR_REF_FULL:
4714 COMPUTE_NUM_ITEMS(delta,
4715 riter->u.a.dim[i].s.stride,
4716 src->dim[i].lower_bound,
4717 src->dim[i]._ubound);
4718 /* The memptr stays unchanged when ref'ing the first element
4719 * in a dimension. */
4720 break;
4721 case CAF_ARR_REF_RANGE:
4722 COMPUTE_NUM_ITEMS(delta,
4723 riter->u.a.dim[i].s.stride,
4724 riter->u.a.dim[i].s.start,
4725 riter->u.a.dim[i].s.end);
4726 remote_memptr +=
4727 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
4728 * src->dim[i]._stride * riter->item_size;
4729 break;
4730 case CAF_ARR_REF_SINGLE:
4731 delta = 1;
4732 remote_memptr +=
4733 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
4734 * src->dim[i]._stride * riter->item_size;
4735 break;
4736 case CAF_ARR_REF_OPEN_END:
4737 COMPUTE_NUM_ITEMS(delta,
4738 riter->u.a.dim[i].s.stride,
4739 riter->u.a.dim[i].s.start,
4740 src->dim[i]._ubound);
4741 remote_memptr +=
4742 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
4743 * src->dim[i]._stride * riter->item_size;
4744 break;
4745 case CAF_ARR_REF_OPEN_START:
4746 COMPUTE_NUM_ITEMS(delta,
4747 riter->u.a.dim[i].s.stride,
4748 src->dim[i].lower_bound,
4749 riter->u.a.dim[i].s.end);
4750 /* The memptr stays unchanged when ref'ing the first element
4751 * in a dimension. */
4752 break;
4753 default:
4754 caf_runtime_error(unknownarrreftype, stat, NULL, 0);
4755 return;
4756 }
4757 dprint("i = %zd, array_ref = %s, delta = %ld\n", i,
4758 caf_array_ref_str[array_ref], delta);
4759 if (delta <= 0)
4760 return;
4761 /* Check the various properties of the destination array.
4762 * Is an array expected and present? */
4763 if (delta > 1 && dst_rank == 0)
4764 {
4765 /* No, an array is required, but not provided. */
4766 caf_runtime_error(extentoutofrange, stat, NULL, 0);
4767 return;
4768 }
4769 /* When dst is an array. */
4770 if (dst_rank > 0)
4771 {
4772 /* Check that dst_cur_dim is valid for dst. Can be superceeded
4773 * only by scalar data. */
4774 if (dst_cur_dim >= dst_rank && delta != 1)
4775 {
4776 caf_runtime_error(rankoutofrange, stat, NULL, 0);
4777 return;
4778 }
4779 /* Do further checks, when the source is not scalar. */
4780 else if (delta != 1 || realloc_required)
4781 {
4782 /* Check that the extent is not scalar and we are not in an array
4783 * ref for the dst side. */
4784 if (!in_array_ref)
4785 {
4786 /* Check that this is the non-scalar extent. */
4787 if (!array_extent_fixed)
4788 {
4789 /* In an array extent now. */
4790 in_array_ref = true;
4791 /* Check that we haven't skipped any scalar dimensions yet
4792 * and that the dst is compatible. */
4793 if (i > 0 && dst_rank == GFC_DESCRIPTOR_RANK(src))
4794 {
4795 if (dst_reallocatable)
4796 {
4797 /* Dst is reallocatable, which means that the bounds are
4798 * not set. Set them. */
4799 for (dst_cur_dim = 0; dst_cur_dim < (int)i; ++dst_cur_dim)
4800 {
4801 dst->dim[dst_cur_dim].lower_bound = 1;
4802 dst->dim[dst_cur_dim]._ubound = 1;
4803 dst->dim[dst_cur_dim]._stride = 1;
4804 }
4805 }
4806 else
4807 dst_cur_dim = i;
4808 }
4809 /* Else press thumbs, that there are enough dimensional refs
4810 * to come. Checked below. */
4811 }
4812 else
4813 {
4814 caf_runtime_error(doublearrayref, stat, NULL, 0);
4815 return;
4816 }
4817 }
4818 /* When the realloc is required, then no extent may have
4819 * been set. */
4820 extent_mismatch = realloc_required ||
4821 GFC_DESCRIPTOR_EXTENT(dst, dst_cur_dim) != delta;
4822 /* When it already known, that a realloc is needed or the extent
4823 * does not match the needed one. */
4824 if (realloc_needed || extent_mismatch)
4825 {
4826 /* Check whether dst is reallocatable. */
4827 if (unlikely(!dst_reallocatable))
4828 {
4829 caf_runtime_error(nonallocextentmismatch, stat,
4830 NULL, 0, delta,
4831 GFC_DESCRIPTOR_EXTENT(dst, dst_cur_dim));
4832 return;
4833 }
4834 /* Only report an error, when the extent needs to be modified,
4835 * which is not allowed. */
4836 else if (!dst_reallocatable && extent_mismatch)
4837 {
4838 caf_runtime_error(extentoutofrange, stat, NULL, 0);
4839 return;
4840 }
4841 realloc_needed = true;
4842 }
4843 /* Only change the extent when it does not match. This is to
4844 * prevent resetting given array bounds. */
4845 if (extent_mismatch)
4846 {
4847 dst->dim[dst_cur_dim].lower_bound = 1;
4848 dst->dim[dst_cur_dim]._ubound = delta;
4849 dst->dim[dst_cur_dim]._stride = size;
4850 if (realloc_required)
4851 dst->offset = -1;
4852 }
4853 }
4854
4855 /* Only increase the dim counter, when in an array ref */
4856 if (in_array_ref && dst_cur_dim < dst_rank)
4857 {
4858 /* Mode != CAF_ARR_REF_SINGLE(delta == 1), and no rank
4859 * reduction */
4860 if (!(delta == 1 && dst_rank != GFC_DESCRIPTOR_RANK(src)))
4861 ++dst_cur_dim;
4862 }
4863 }
4864 size *= (ptrdiff_t)delta;
4865 }
4866 if (in_array_ref)
4867 {
4868 array_extent_fixed = true;
4869 in_array_ref = false;
4870 }
4871 break;
4872 case CAF_REF_STATIC_ARRAY:
4873 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
4874 {
4875 array_ref = riter->u.a.mode[i];
4876 switch (array_ref)
4877 {
4878 case CAF_ARR_REF_VECTOR:
4879 delta = riter->u.a.dim[i].v.nvec;
4880 #define KINDCASE(kind, type) \
4881 case kind: \
4882 remote_memptr += \
4883 ((type *)riter->u.a.dim[i].v.vector)[0] * riter->item_size; \
4884 break
4885
4886 switch (riter->u.a.dim[i].v.kind)
4887 {
4888 KINDCASE(1, int8_t);
4889 KINDCASE(2, int16_t);
4890 KINDCASE(4, int32_t);
4891 KINDCASE(8, int64_t);
4892 #ifdef HAVE_GFC_INTEGER_16
4893 KINDCASE(16, __int128);
4894 #endif
4895 default:
4896 caf_runtime_error(vecrefunknownkind, stat, NULL, 0);
4897 return;
4898 }
4899 #undef KINDCASE
4900 break;
4901 case CAF_ARR_REF_FULL:
4902 delta = riter->u.a.dim[i].s.end / riter->u.a.dim[i].s.stride
4903 + 1;
4904 /* The memptr stays unchanged when ref'ing the first element in a
4905 * dimension. */
4906 break;
4907 case CAF_ARR_REF_RANGE:
4908 COMPUTE_NUM_ITEMS(delta,
4909 riter->u.a.dim[i].s.stride,
4910 riter->u.a.dim[i].s.start,
4911 riter->u.a.dim[i].s.end);
4912 remote_memptr += riter->u.a.dim[i].s.start
4913 * riter->u.a.dim[i].s.stride
4914 * riter->item_size;
4915 break;
4916 case CAF_ARR_REF_SINGLE:
4917 delta = 1;
4918 remote_memptr += riter->u.a.dim[i].s.start
4919 * riter->u.a.dim[i].s.stride
4920 * riter->item_size;
4921 break;
4922 case CAF_ARR_REF_OPEN_END:
4923 /* This and OPEN_START are mapped to a RANGE and therefore can
4924 * not occur here. */
4925 case CAF_ARR_REF_OPEN_START:
4926 default:
4927 caf_runtime_error(unknownarrreftype, stat, NULL, 0);
4928 return;
4929 }
4930 dprint("i = %zd, array_ref = %s, delta = %ld\n",
4931 i, caf_array_ref_str[array_ref], delta);
4932 if (delta <= 0)
4933 return;
4934 /* Check the various properties of the destination array.
4935 * Is an array expected and present? */
4936 if (delta > 1 && dst_rank == 0)
4937 {
4938 /* No, an array is required, but not provided. */
4939 caf_runtime_error(extentoutofrange, stat, NULL, 0);
4940 return;
4941 }
4942 /* When dst is an array. */
4943 if (dst_rank > 0)
4944 {
4945 /* Check that dst_cur_dim is valid for dst. Can be superceeded
4946 * only by scalar data. */
4947 if (dst_cur_dim >= dst_rank && delta != 1)
4948 {
4949 caf_runtime_error(rankoutofrange, stat, NULL, 0);
4950 return;
4951 }
4952 /* Do further checks, when the source is not scalar. */
4953 else if (delta != 1 || realloc_required)
4954 {
4955 /* Check that the extent is not scalar and we are not in an array
4956 * ref for the dst side. */
4957 if (!in_array_ref)
4958 {
4959 /* Check that this is the non-scalar extent. */
4960 if (!array_extent_fixed)
4961 {
4962 /* In an array extent now. */
4963 in_array_ref = true;
4964 /* The dst is not reallocatable, so nothing more to do,
4965 * then correct the dim counter. */
4966 dst_cur_dim = i;
4967 }
4968 else
4969 {
4970 caf_runtime_error(doublearrayref, stat, NULL, 0);
4971 return;
4972 }
4973 }
4974 /* When the realloc is required, then no extent may have
4975 * been set. */
4976 extent_mismatch = realloc_required ||
4977 GFC_DESCRIPTOR_EXTENT(dst, dst_cur_dim) != delta;
4978 /* When it is already known, that a realloc is needed or
4979 * the extent does not match the needed one. */
4980 if (realloc_needed || extent_mismatch)
4981 {
4982 /* Check whether dst is reallocatable. */
4983 if (unlikely(!dst_reallocatable))
4984 {
4985 caf_runtime_error(nonallocextentmismatch, stat,
4986 NULL, 0, delta,
4987 GFC_DESCRIPTOR_EXTENT(dst, dst_cur_dim));
4988 return;
4989 }
4990 /* Only report an error, when the extent needs to be modified,
4991 * which is not allowed. */
4992 else if (!dst_reallocatable && extent_mismatch)
4993 {
4994 caf_runtime_error(extentoutofrange, stat, NULL, 0);
4995 return;
4996 }
4997 realloc_needed = true;
4998 }
4999 /* Only change the extent when it does not match. This is to
5000 * prevent resetting given array bounds. */
5001 if (extent_mismatch)
5002 {
5003 dst->dim[dst_cur_dim].lower_bound = 1;
5004 dst->dim[dst_cur_dim]._ubound = delta;
5005 dst->dim[dst_cur_dim]._stride = size;
5006 if (realloc_required)
5007 dst->offset = -1;
5008 }
5009 }
5010 /* Only increase the dim counter, when in an array ref */
5011 if (in_array_ref && dst_cur_dim < dst_rank)
5012 {
5013 /* Mode != CAF_ARR_REF_SINGLE(delta == 1), and no rank
5014 * reduction */
5015 if (!(delta == 1 && dst_rank != GFC_DESCRIPTOR_RANK(src)))
5016 ++dst_cur_dim;
5017 }
5018 }
5019 size *= (ptrdiff_t)delta;
5020 }
5021 if (in_array_ref)
5022 {
5023 array_extent_fixed = true;
5024 in_array_ref = false;
5025 }
5026 break;
5027 default:
5028 caf_runtime_error(unknownreftype, stat, NULL, 0);
5029 return;
5030 }
5031 src_size = riter->item_size;
5032 riter = riter->next;
5033 }
5034 if (size == 0 || src_size == 0)
5035 return;
5036 /* Postcondition:
5037 * - size contains the number of elements to store in the destination array,
5038 * - src_size gives the size in bytes of each item in the destination array.
5039 */
5040
5041 if (realloc_needed)
5042 {
5043 if (!array_extent_fixed)
5044 {
5045 /* This can happen only, when the result is scalar. */
5046 for (dst_cur_dim = 0; dst_cur_dim < dst_rank; ++dst_cur_dim)
5047 {
5048 dst->dim[dst_cur_dim].lower_bound = 1;
5049 dst->dim[dst_cur_dim]._ubound = 1;
5050 dst->dim[dst_cur_dim]._stride = 1;
5051 }
5052 }
5053 dst->base_addr = malloc(size * GFC_DESCRIPTOR_SIZE(dst));
5054 if (unlikely(dst->base_addr == NULL))
5055 {
5056 caf_runtime_error(cannotallocdst, stat, size * GFC_DESCRIPTOR_SIZE(dst));
5057 return;
5058 }
5059 }
5060
5061 /* Reset the token. */
5062 mpi_token = (mpi_caf_token_t *) token;
5063 remote_memptr = mpi_token->memptr;
5064 dst_index = 0;
5065 #ifdef EXTRA_DEBUG_OUTPUT
5066 dprint("dst_rank: %zd\n", dst_rank);
5067 for (i = 0; i < dst_rank; ++i)
5068 {
5069 dprint("dst_dim[%zd] = (%zd, %zd)\n",
5070 i, dst->dim[i].lower_bound, dst->dim[i]._ubound);
5071 }
5072 #endif
5073 i = 0;
5074 dprint("get_by_ref() calling get_for_ref.\n");
5075 get_for_ref(refs, &i, dst_index, mpi_token, dst, mpi_token->desc,
5076 dst->base_addr, remote_memptr, 0, 0, dst_kind, src_kind, 0, 0,
5077 1, stat, global_dynamic_win_rank, memptr_win_rank, false, false
5078 #ifdef GCC_GE_8
5079 , src_type
5080 #endif
5081 );
5082 CAF_Win_unlock(global_dynamic_win_rank, global_dynamic_win);
5083 CAF_Win_unlock(memptr_win_rank, mpi_token->memptr_win);
5084 }
5085
5086 static void
5087 put_data(mpi_caf_token_t *token, MPI_Aint offset, void *sr, int dst_type,
5088 int src_type, int dst_kind, int src_kind, size_t dst_size,
5089 size_t src_size, size_t num, int *stat, int image_index)
5090 {
5091 size_t k;
5092 int ierr;
5093 MPI_Win win = (token == NULL) ? global_dynamic_win : token->memptr_win;
5094 #ifdef EXTRA_DEBUG_OUTPUT
5095 if (token)
5096 dprint("(win: %d, image: %d, offset: %zd) <- %p, "
5097 "num: %zd, size %zd -> %zd, dst type %d(%d), src type %d(%d)\n",
5098 win, image_index + 1, offset, sr, num, src_size, dst_size,
5099 dst_type, dst_kind, src_type, src_kind);
5100 else
5101 dprint("(global_win: %x, image: %d, offset: %zd (%zd)) <- %p, "
5102 "num: %zd, size %zd -> %zd, dst type %d(%d), src type %d(%d)\n",
5103 win, image_index + 1, offset, offset, sr, num, src_size,
5104 dst_size, dst_type, dst_kind, src_type, src_kind);
5105 #endif
5106 if (dst_type == src_type && dst_kind == src_kind)
5107 {
5108 size_t sz = (dst_size > src_size ? src_size : dst_size) * num;
5109 ierr = MPI_Put(sr, sz, MPI_BYTE, image_index, offset, sz, MPI_BYTE, win);
5110 chk_err(ierr);
5111 dprint("sr[] = %d, num = %zd, num bytes = %zd\n",
5112 (int)((char*)sr)[0], num, sz);
5113 if ((dst_type == BT_CHARACTER || src_type == BT_CHARACTER)
5114 && dst_size > src_size)
5115 {
5116 const size_t trans_size = dst_size / dst_kind - src_size / src_kind;
5117 void *pad = alloca(trans_size * dst_kind);
5118 if (dst_kind == 1)
5119 {
5120 memset((void*)(char*) pad, ' ', trans_size);
5121 }
5122 else /* dst_kind == 4. */
5123 {
5124 for (k = 0; k < trans_size; ++k)
5125 {
5126 ((int32_t*) pad)[k] = (int32_t) ' ';
5127 }
5128 }
5129 ierr = MPI_Put(pad, trans_size * dst_kind, MPI_BYTE, image_index,
5130 offset + (src_size / src_kind) * dst_kind,
5131 trans_size * dst_kind, MPI_BYTE, win); chk_err(ierr);
5132 }
5133 }
5134 else if (dst_type == BT_CHARACTER && dst_kind == 1)
5135 {
5136 /* Get the required amount of memory on the stack. */
5137 void *dsh = alloca(dst_size);
5138 assign_char1_from_char4(dst_size, src_size, dsh, sr);
5139 ierr = MPI_Put(dsh, dst_size, MPI_BYTE, image_index, offset, dst_size,
5140 MPI_BYTE, win); chk_err(ierr);
5141 }
5142 else if (dst_type == BT_CHARACTER)
5143 {
5144 /* Get the required amount of memory on the stack. */
5145 void *dsh = alloca(dst_size);
5146 assign_char4_from_char1(dst_size, src_size, dsh, sr);
5147 ierr = MPI_Put(dsh, dst_size, MPI_BYTE, image_index, offset, dst_size,
5148 MPI_BYTE, win); chk_err(ierr);
5149 }
5150 else
5151 {
5152 /* Get the required amount of memory on the stack. */
5153 void *dsh = alloca(dst_size * num), *dsh_iter = dsh;
5154 dprint("type/kind convert %zd items: "
5155 "type %d(%d) -> type %d(%d), local buffer: %p\n",
5156 num, src_type, src_kind, dst_type, dst_kind, dsh);
5157 for (k = 0; k < num; ++k)
5158 {
5159 convert_type(dsh_iter, dst_type, dst_kind, sr, src_type, src_kind, stat);
5160 dsh_iter += dst_size;
5161 sr += src_size;
5162 }
5163 // dprint("dsh[0] = %d\n", ((int *)dsh)[0]);
5164 ierr = MPI_Put(dsh, dst_size * num, MPI_BYTE, image_index, offset,
5165 dst_size * num, MPI_BYTE, win); chk_err(ierr);
5166 }
5167 ierr = MPI_Win_flush(image_index, win); chk_err(ierr);
5168 }
5169
5170
5171 static void
5172 send_for_ref(caf_reference_t *ref, size_t *i, size_t src_index,
5173 mpi_caf_token_t *mpi_token, gfc_descriptor_t *dst,
5174 gfc_descriptor_t *src, void *ds, void *sr,
5175 ptrdiff_t dst_byte_offset, ptrdiff_t desc_byte_offset,
5176 int dst_kind, int src_kind, size_t dst_dim, size_t src_dim,
5177 size_t num, int *stat, int global_dynamic_win_rank, int memptr_win_rank,
5178 bool ds_global, /* access ds through global_dynamic_win */
5179 bool desc_global /* access desc through global_dynamic_win */
5180 #ifdef GCC_GE_8
5181 , int dst_type)
5182 {
5183 #else
5184 )
5185 {
5186 int dst_type = -1;
5187 #endif
5188 ptrdiff_t extent_dst = 1, array_offset_dst = 0, dst_stride, src_stride;
5189 size_t next_dst_dim, ref_rank;
5190 gfc_max_dim_descriptor_t dst_desc_data;
5191 caf_ref_type_t ref_type = ref->type;
5192 caf_array_ref_t array_ref_src = ref->u.a.mode[src_dim];
5193 int ierr;
5194
5195 if (unlikely(ref == NULL))
5196 {
5197 /* May be we should issue an error here, because this case should not
5198 * occur. */
5199 return;
5200 }
5201
5202 dprint("Entering send_for_ref: [i = %zd] src_index = %zd, "
5203 "dst_offset = %zd, desc_offset = %zd, ds_glb = %d, desc_glb = %d\n",
5204 *i, src_index, dst_byte_offset, desc_byte_offset,
5205 ds_global, desc_global);
5206
5207 if (ref->next == NULL)
5208 {
5209 size_t src_size = GFC_DESCRIPTOR_SIZE(src);
5210 dprint("[next == NULL]: src_size = %zd, ref_type = %s\n",
5211 src_size, caf_ref_type_str[ref_type]);
5212
5213 switch (ref_type)
5214 {
5215 case CAF_REF_COMPONENT:
5216 dst_byte_offset += ref->u.c.offset;
5217 if (ref->u.c.caf_token_offset > 0)
5218 {
5219 if (ds_global)
5220 {
5221 ierr = MPI_Get(&ds, stdptr_size, MPI_BYTE, global_dynamic_win_rank,
5222 MPI_Aint_add((MPI_Aint)ds, dst_byte_offset),
5223 stdptr_size, MPI_BYTE, global_dynamic_win);
5224 chk_err(ierr);
5225 desc_global = true;
5226 }
5227 else
5228 {
5229 ierr = MPI_Get(&ds, stdptr_size, MPI_BYTE, memptr_win_rank,
5230 dst_byte_offset, stdptr_size, MPI_BYTE,
5231 mpi_token->memptr_win); chk_err(ierr);
5232 ds_global = true;
5233 }
5234 dst_byte_offset = 0;
5235 }
5236
5237 if (ds_global)
5238 {
5239 put_data(NULL, MPI_Aint_add((MPI_Aint)ds, dst_byte_offset), sr,
5240 #ifdef GCC_GE_8
5241 dst_type,
5242 #else
5243 GFC_DESCRIPTOR_TYPE(src),
5244 #endif
5245 GFC_DESCRIPTOR_TYPE(src), dst_kind, src_kind,
5246 ref->item_size, src_size, 1, stat, global_dynamic_win_rank);
5247 }
5248 else
5249 {
5250 put_data(mpi_token, dst_byte_offset, sr,
5251 #ifdef GCC_GE_8
5252 dst_type,
5253 #else
5254 GFC_DESCRIPTOR_TYPE(dst),
5255 #endif
5256 GFC_DESCRIPTOR_TYPE(src), dst_kind, src_kind,
5257 ref->item_size, src_size, 1, stat, memptr_win_rank);
5258 }
5259 ++(*i);
5260 return;
5261 case CAF_REF_STATIC_ARRAY:
5262 dst_type = ref->u.a.static_array_type;
5263 /* Intentionally fall through. */
5264 case CAF_REF_ARRAY:
5265 if (array_ref_src == CAF_ARR_REF_NONE)
5266 {
5267 if (ds_global)
5268 {
5269 put_data(NULL, MPI_Aint_add((MPI_Aint)ds, dst_byte_offset),
5270 sr + src_index * src_size,
5271 #ifdef GCC_GE_8
5272 dst_type, GFC_DESCRIPTOR_TYPE(src),
5273 #else
5274 GFC_DESCRIPTOR_TYPE(dst),
5275 (dst_type == -1) ? GFC_DESCRIPTOR_TYPE(src) : dst_type,
5276 #endif
5277 dst_kind, src_kind, ref->item_size, src_size, num,
5278 stat, global_dynamic_win_rank);
5279 }
5280 else
5281 {
5282 put_data(mpi_token, dst_byte_offset, sr + src_index * src_size,
5283 #ifdef GCC_GE_8
5284 dst_type, GFC_DESCRIPTOR_TYPE(src),
5285 #else
5286 GFC_DESCRIPTOR_TYPE(dst),
5287 (dst_type == -1) ? GFC_DESCRIPTOR_TYPE(src) : dst_type,
5288 #endif
5289 dst_kind, src_kind, ref->item_size, src_size, num,
5290 stat, memptr_win_rank);
5291 }
5292 *i += num;
5293 return;
5294 }
5295 break;
5296 default:
5297 caf_runtime_error(unreachable);
5298 }
5299 }
5300 caf_array_ref_t array_ref_dst = ref->u.a.mode[dst_dim];
5301
5302 #if 0
5303 dprint("image_index = %d, num = %zd, src_dim = %zd, dst_dim = %zd, "
5304 "ref_type = %s, array_ref_src = %s\n",
5305 image_index, num, src_dim, dst_dim,
5306 caf_ref_type_str[ref_type],
5307 caf_array_ref_str[array_ref_src]);
5308 #endif
5309
5310 switch (ref_type)
5311 {
5312 case CAF_REF_COMPONENT:
5313 if (ref->u.c.caf_token_offset > 0)
5314 {
5315 dst_byte_offset += ref->u.c.offset;
5316 desc_byte_offset = dst_byte_offset;
5317 if (ds_global)
5318 {
5319 ierr = MPI_Get(&ds, stdptr_size, MPI_BYTE, global_dynamic_win_rank,
5320 MPI_Aint_add((MPI_Aint)ds, dst_byte_offset),
5321 stdptr_size, MPI_BYTE, global_dynamic_win);
5322 chk_err(ierr);
5323 desc_global = true;
5324 }
5325 else
5326 {
5327 ierr = MPI_Get(&ds, stdptr_size, MPI_BYTE, memptr_win_rank,
5328 dst_byte_offset, stdptr_size, MPI_BYTE,
5329 mpi_token->memptr_win); chk_err(ierr);
5330 ds_global = true;
5331 }
5332 dst_byte_offset = 0;
5333 }
5334 else
5335 {
5336 dst_byte_offset += ref->u.c.offset;
5337 desc_byte_offset += ref->u.c.offset;
5338 }
5339 send_for_ref(ref->next, i, src_index, mpi_token, dst, src, ds,
5340 sr, dst_byte_offset, desc_byte_offset, dst_kind, src_kind,
5341 dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank,
5342 ds_global, desc_global
5343 #ifdef GCC_GE_8
5344 , dst_type
5345 #endif
5346 );
5347 return;
5348 case CAF_REF_ARRAY:
5349 if (array_ref_src == CAF_ARR_REF_NONE)
5350 {
5351 send_for_ref(ref->next, i, src_index, mpi_token, dst, src, ds, sr,
5352 dst_byte_offset, desc_byte_offset, dst_kind, src_kind,
5353 dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank,
5354 ds_global, desc_global
5355 #ifdef GCC_GE_8
5356 , dst_type
5357 #endif
5358 );
5359 return;
5360 }
5361 /* Only when on the left most index switch the data pointer to
5362 * the array's data pointer. */
5363 if (src_dim == 0)
5364 {
5365 if (ds_global)
5366 {
5367 for (ref_rank = 0; ref->u.a.mode[ref_rank] != CAF_ARR_REF_NONE;
5368 ++ref_rank) ;
5369 /* Get the remote descriptor. */
5370 if (desc_global)
5371 {
5372 ierr = MPI_Get(&dst_desc_data, sizeof_desc_for_rank(ref_rank),
5373 MPI_BYTE, global_dynamic_win_rank,
5374 MPI_Aint_add((MPI_Aint)ds, desc_byte_offset),
5375 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
5376 global_dynamic_win); chk_err(ierr);
5377 }
5378 else
5379 {
5380 ierr = MPI_Get(&dst_desc_data, sizeof_desc_for_rank(ref_rank),
5381 MPI_BYTE, memptr_win_rank, desc_byte_offset,
5382 sizeof_desc_for_rank(ref_rank),
5383 MPI_BYTE, mpi_token->memptr_win); chk_err(ierr);
5384 desc_global = true;
5385 }
5386 dst = (gfc_descriptor_t *)&dst_desc_data;
5387 }
5388 else
5389 {
5390 dst = mpi_token->desc;
5391 }
5392 dst_byte_offset = 0;
5393 desc_byte_offset = 0;
5394 #ifdef EXTRA_DEBUG_OUTPUT
5395 dprint("remote desc rank: %zd (ref_rank: %zd)\n",
5396 GFC_DESCRIPTOR_RANK(src), ref_rank);
5397 for (int r = 0; r < GFC_DESCRIPTOR_RANK(src); ++r)
5398 {
5399 dprint("remote desc dim[%d] = (lb=%zd, ub=%zd, stride=%zd)\n",
5400 r, src->dim[r].lower_bound, src->dim[r]._ubound,
5401 src->dim[r]._stride);
5402 }
5403 #endif
5404 }
5405 dprint("array_ref_dst[%zd] = %s := array_ref_src[%zd] = %s",
5406 dst_dim, caf_array_ref_str[array_ref_dst],
5407 src_dim, caf_array_ref_str[array_ref_src]);
5408 switch (array_ref_dst)
5409 {
5410 case CAF_ARR_REF_VECTOR:
5411 extent_dst = GFC_DESCRIPTOR_EXTENT(dst, dst_dim);
5412 array_offset_dst = 0;
5413 for (size_t idx = 0; idx < ref->u.a.dim[dst_dim].v.nvec; ++idx)
5414 {
5415 #define KINDCASE(kind, type) \
5416 case kind: \
5417 array_offset_dst = (((ptrdiff_t) \
5418 ((type *)ref->u.a.dim[dst_dim].v.vector)[idx]) \
5419 - dst->dim[dst_dim].lower_bound \
5420 * dst->dim[dst_dim]._stride); \
5421 break
5422
5423 switch (ref->u.a.dim[dst_dim].v.kind)
5424 {
5425 KINDCASE(1, int8_t);
5426 KINDCASE(2, int16_t);
5427 KINDCASE(4, int32_t);
5428 KINDCASE(8, int64_t);
5429 #ifdef HAVE_GFC_INTEGER_16
5430 KINDCASE(16, __int128);
5431 #endif
5432 default:
5433 caf_runtime_error(unreachable);
5434 return;
5435 }
5436 #undef KINDCASE
5437
5438 dprint("vector-index computed to: %zd\n", array_offset_dst);
5439 send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr,
5440 dst_byte_offset + array_offset_dst * ref->item_size,
5441 desc_byte_offset + array_offset_dst * ref->item_size,
5442 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5443 1, stat, global_dynamic_win_rank, memptr_win_rank,
5444 ds_global, desc_global
5445 #ifdef GCC_GE_8
5446 , dst_type
5447 #endif
5448 );
5449 src_index += dst->dim[dst_dim]._stride;
5450 }
5451 return;
5452 case CAF_ARR_REF_FULL:
5453 COMPUTE_NUM_ITEMS(extent_dst,
5454 ref->u.a.dim[dst_dim].s.stride,
5455 dst->dim[dst_dim].lower_bound,
5456 dst->dim[dst_dim]._ubound);
5457 dst_stride = dst->dim[dst_dim]._stride
5458 * ref->u.a.dim[dst_dim].s.stride;
5459 array_offset_dst = 0;
5460 src_stride = (GFC_DESCRIPTOR_RANK(src) > 0) ?
5461 src->dim[src_dim]._stride : 0;
5462 dprint("CAF_ARR_REF_FULL: src_stride = %zd, dst_stride = %zd\n",
5463 src_stride, dst_stride);
5464 for (ptrdiff_t idx = 0; idx < extent_dst;
5465 ++idx, array_offset_dst += dst_stride)
5466 {
5467 send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr,
5468 dst_byte_offset + array_offset_dst * ref->item_size,
5469 desc_byte_offset + array_offset_dst * ref->item_size,
5470 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5471 1, stat, global_dynamic_win_rank, memptr_win_rank,
5472 ds_global, desc_global
5473 #ifdef GCC_GE_8
5474 , dst_type
5475 #endif
5476 );
5477 src_index += src_stride;
5478 }
5479 // dprint("CAF_ARR_REF_FULL: return, i = %zd\n", *i);
5480 return;
5481
5482 case CAF_ARR_REF_RANGE:
5483 COMPUTE_NUM_ITEMS(extent_dst,
5484 ref->u.a.dim[dst_dim].s.stride,
5485 ref->u.a.dim[dst_dim].s.start,
5486 ref->u.a.dim[dst_dim].s.end);
5487 array_offset_dst =
5488 (ref->u.a.dim[dst_dim].s.start - dst->dim[dst_dim].lower_bound)
5489 * dst->dim[dst_dim]._stride;
5490 dst_stride = dst->dim[dst_dim]._stride
5491 * ref->u.a.dim[dst_dim].s.stride;
5492 src_stride = (GFC_DESCRIPTOR_RANK(src) > 0) ?
5493 src->dim[src_dim]._stride : 0;
5494 /* Increase the dst_dim only, when the src_extent is greater than one
5495 * or src and dst extent are both one. Don't increase when the
5496 * scalar source is not present in the dst. */
5497 next_dst_dim = (
5498 (extent_dst > 1) ||
5499 (GFC_DESCRIPTOR_EXTENT(src, src_dim) == 1 && extent_dst == 1)
5500 ) ? (dst_dim + 1) : dst_dim;
5501 for (ptrdiff_t idx = 0; idx < extent_dst; ++idx)
5502 {
5503 send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr,
5504 dst_byte_offset + array_offset_dst * ref->item_size,
5505 desc_byte_offset + array_offset_dst * ref->item_size,
5506 dst_kind, src_kind, next_dst_dim, src_dim + 1,
5507 1, stat, global_dynamic_win_rank, memptr_win_rank,
5508 ds_global, desc_global
5509 #ifdef GCC_GE_8
5510 , dst_type
5511 #endif
5512 );
5513 src_index += src_stride;
5514 array_offset_dst += dst_stride;
5515 }
5516 // dprint("CAF_ARR_REF_RANGE: return, i = %zd\n", *i);
5517 return;
5518
5519 case CAF_ARR_REF_SINGLE:
5520 array_offset_dst =
5521 (ref->u.a.dim[dst_dim].s.start - dst->dim[dst_dim].lower_bound)
5522 * dst->dim[dst_dim]._stride;
5523 // FIXME: issue #552
5524 // next_dst_dim = (
5525 // (extent_dst > 1) ||
5526 // (GFC_DESCRIPTOR_EXTENT(src, src_dim) == 1 && extent_dst == 1)
5527 // ) ? (dst_dim + 1) : dst_dim;
5528 next_dst_dim = dst_dim;
5529 send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr,
5530 dst_byte_offset + array_offset_dst * ref->item_size,
5531 desc_byte_offset + array_offset_dst * ref->item_size,
5532 dst_kind, src_kind, next_dst_dim, src_dim + 1,
5533 1, stat, global_dynamic_win_rank, memptr_win_rank,
5534 ds_global, desc_global
5535 #ifdef GCC_GE_8
5536 , dst_type
5537 #endif
5538 );
5539
5540 // dprint("CAF_ARR_REF_SINGLE: return, i = %zd\n", *i);
5541 return;
5542 case CAF_ARR_REF_OPEN_END:
5543 COMPUTE_NUM_ITEMS(extent_dst,
5544 ref->u.a.dim[dst_dim].s.stride,
5545 ref->u.a.dim[dst_dim].s.start,
5546 dst->dim[dst_dim]._ubound);
5547 dst_stride = dst->dim[dst_dim]._stride
5548 * ref->u.a.dim[dst_dim].s.stride;
5549 src_stride = (GFC_DESCRIPTOR_RANK(src) > 0) ?
5550 src->dim[src_dim]._stride : 0;
5551 array_offset_dst =
5552 (ref->u.a.dim[dst_dim].s.start - dst->dim[dst_dim].lower_bound)
5553 * dst->dim[dst_dim]._stride;
5554 for (ptrdiff_t idx = 0; idx < extent_dst; ++idx)
5555 {
5556 send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr,
5557 dst_byte_offset + array_offset_dst * ref->item_size,
5558 desc_byte_offset + array_offset_dst * ref->item_size,
5559 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5560 1, stat, global_dynamic_win_rank, memptr_win_rank,
5561 ds_global, desc_global
5562 #ifdef GCC_GE_8
5563 , dst_type
5564 #endif
5565 );
5566 src_index += src_stride;
5567 array_offset_dst += dst_stride;
5568 }
5569 return;
5570 case CAF_ARR_REF_OPEN_START:
5571 COMPUTE_NUM_ITEMS(extent_dst,
5572 ref->u.a.dim[dst_dim].s.stride,
5573 dst->dim[dst_dim].lower_bound,
5574 ref->u.a.dim[dst_dim].s.end);
5575 dst_stride =
5576 dst->dim[dst_dim]._stride * ref->u.a.dim[dst_dim].s.stride;
5577 src_stride = (GFC_DESCRIPTOR_RANK(src) > 0) ?
5578 src->dim[src_dim]._stride : 0;
5579 array_offset_dst = 0;
5580 for (ptrdiff_t idx = 0; idx < extent_dst; ++idx)
5581 {
5582 send_for_ref(ref, i, src_index, mpi_token, dst, src, ds, sr,
5583 dst_byte_offset + array_offset_dst * ref->item_size,
5584 desc_byte_offset + array_offset_dst * ref->item_size,
5585 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5586 1, stat, global_dynamic_win_rank, memptr_win_rank,
5587 ds_global, desc_global
5588 #ifdef GCC_GE_8
5589 , dst_type
5590 #endif
5591 );
5592 src_index += src_stride;
5593 array_offset_dst += dst_stride;
5594 }
5595 return;
5596 default:
5597 caf_runtime_error(unreachable);
5598 }
5599 return;
5600 case CAF_REF_STATIC_ARRAY:
5601 if (array_ref_dst == CAF_ARR_REF_NONE)
5602 {
5603 send_for_ref(ref->next, i, src_index, mpi_token, dst, NULL, ds, sr,
5604 dst_byte_offset, desc_byte_offset, dst_kind, src_kind,
5605 dst_dim, 0, 1, stat, global_dynamic_win_rank, memptr_win_rank,
5606 ds_global, desc_global
5607 #ifdef GCC_GE_8
5608 , dst_type
5609 #endif
5610 );
5611 return;
5612 }
5613 switch (array_ref_dst)
5614 {
5615 case CAF_ARR_REF_VECTOR:
5616 array_offset_dst = 0;
5617 for (size_t idx = 0; idx < ref->u.a.dim[dst_dim].v.nvec; ++idx)
5618 {
5619 #define KINDCASE(kind, type) \
5620 case kind: \
5621 array_offset_dst = ((type *)ref->u.a.dim[dst_dim].v.vector)[idx]; \
5622 break
5623
5624 switch (ref->u.a.dim[dst_dim].v.kind)
5625 {
5626 KINDCASE(1, int8_t);
5627 KINDCASE(2, int16_t);
5628 KINDCASE(4, int32_t);
5629 KINDCASE(8, int64_t);
5630 #ifdef HAVE_GFC_INTEGER_16
5631 KINDCASE(16, __int128);
5632 #endif
5633 default:
5634 caf_runtime_error(unreachable);
5635 return;
5636 }
5637 #undef KINDCASE
5638
5639 send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr,
5640 dst_byte_offset + array_offset_dst * ref->item_size,
5641 desc_byte_offset + array_offset_dst * ref->item_size,
5642 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5643 1, stat, global_dynamic_win_rank, memptr_win_rank,
5644 ds_global, desc_global
5645 #ifdef GCC_GE_8
5646 , dst_type
5647 #endif
5648 );
5649 src_index += src->dim[src_dim]._stride;
5650 }
5651 return;
5652 case CAF_ARR_REF_FULL:
5653 src_stride = (GFC_DESCRIPTOR_RANK(src) > 0) ?
5654 src->dim[src_dim]._stride : 0;
5655 for (array_offset_dst = 0 ;
5656 array_offset_dst <= ref->u.a.dim[dst_dim].s.end;
5657 array_offset_dst += ref->u.a.dim[dst_dim].s.stride)
5658 {
5659 send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr,
5660 dst_byte_offset + array_offset_dst * ref->item_size,
5661 desc_byte_offset + array_offset_dst * ref->item_size,
5662 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5663 1, stat, global_dynamic_win_rank, memptr_win_rank,
5664 ds_global, desc_global
5665 #ifdef GCC_GE_8
5666 , dst_type
5667 #endif
5668 );
5669 src_index += src_stride;
5670 }
5671 return;
5672 case CAF_ARR_REF_RANGE:
5673 COMPUTE_NUM_ITEMS(extent_dst,
5674 ref->u.a.dim[dst_dim].s.stride,
5675 ref->u.a.dim[dst_dim].s.start,
5676 ref->u.a.dim[dst_dim].s.end);
5677 src_stride = (GFC_DESCRIPTOR_RANK (src) > 0) ?
5678 src->dim[src_dim]._stride : 0;
5679 array_offset_dst = ref->u.a.dim[dst_dim].s.start;
5680 for (ptrdiff_t idx = 0; idx < extent_dst; ++idx)
5681 {
5682 send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr,
5683 dst_byte_offset + array_offset_dst * ref->item_size,
5684 desc_byte_offset + array_offset_dst * ref->item_size,
5685 dst_kind, src_kind, dst_dim + 1, src_dim + 1,
5686 1, stat, global_dynamic_win_rank, memptr_win_rank,
5687 ds_global, desc_global
5688 #ifdef GCC_GE_8
5689 , dst_type
5690 #endif
5691 );
5692 src_index += src_stride;
5693 array_offset_dst += ref->u.a.dim[dst_dim].s.stride;
5694 }
5695 return;
5696 case CAF_ARR_REF_SINGLE:
5697 array_offset_dst = ref->u.a.dim[dst_dim].s.start;
5698 send_for_ref(ref, i, src_index, mpi_token, dst, NULL, ds, sr,
5699 dst_byte_offset + array_offset_dst * ref->item_size,
5700 desc_byte_offset + array_offset_dst * ref->item_size,
5701 dst_kind, src_kind, dst_dim, src_dim + 1,
5702 1, stat, global_dynamic_win_rank, memptr_win_rank,
5703 ds_global, desc_global
5704 #ifdef GCC_GE_8
5705 , dst_type
5706 #endif
5707 );
5708 return;
5709 /* The OPEN_* are mapped to a RANGE and therefore can not occur. */
5710 case CAF_ARR_REF_OPEN_END:
5711 case CAF_ARR_REF_OPEN_START:
5712 default:
5713 caf_runtime_error(unreachable);
5714 }
5715 return;
5716 default:
5717 caf_runtime_error(unreachable);
5718 }
5719 }
5720
5721
5722 void
5723 PREFIX(send_by_ref) (caf_token_t token, int image_index,
5724 gfc_descriptor_t *src, caf_reference_t *refs,
5725 int dst_kind, int src_kind, bool may_require_tmp,
5726 bool dst_reallocatable, int *stat
5727 #ifdef GCC_GE_8
5728 , int dst_type
5729 #endif
5730 )
5731 {
5732 const char vecrefunknownkind[] =
5733 "libcaf_mpi::caf_send_by_ref(): unknown kind in vector-ref.\n";
5734 const char unknownreftype[] =
5735 "libcaf_mpi::caf_send_by_ref(): unknown reference type.\n";
5736 const char unknownarrreftype[] =
5737 "libcaf_mpi::caf_send_by_ref(): unknown array reference type.\n";
5738 const char rankoutofrange[] =
5739 "libcaf_mpi::caf_send_by_ref(): rank out of range.\n";
5740 const char extentoutofrange[] =
5741 "libcaf_mpi::caf_send_by_ref(): extent out of range.\n";
5742 const char cannotallocdst[] =
5743 "libcaf_mpi::caf_send_by_ref(): can not allocate %d bytes of memory.\n";
5744 const char unabletoallocdst[] =
5745 "libcaf_mpi::caf_send_by_ref(): "
5746 "unable to allocate memory on remote image.\n";
5747 const char nonallocextentmismatch[] =
5748 "libcaf_mpi::caf_send_by_ref(): "
5749 "extent of non-allocatable arrays mismatch (%lu != %lu).\n";
5750
5751 size_t size, i, ref_rank = 0, src_index, dst_size;
5752 int dst_rank = -1, src_cur_dim = 0, ierr;
5753 mpi_caf_token_t *mpi_token = (mpi_caf_token_t *) token;
5754 void *remote_memptr = mpi_token->memptr, *remote_base_memptr = NULL;
5755 gfc_max_dim_descriptor_t dst_desc, temp_src;
5756 gfc_descriptor_t *dst = (gfc_descriptor_t *)&dst_desc;
5757 caf_reference_t *riter = refs;
5758 long delta;
5759 ptrdiff_t data_offset = 0, desc_offset = 0;
5760 /* Reallocation of data on remote is needed (e.g., array to small). This is
5761 * used for error tracking only. It is not (yet) possible to allocate memory
5762 * on the remote image. */
5763 bool realloc_dst = false, extent_mismatch = false;
5764 /* Set when the first non-scalar array reference is encountered. */
5765 bool in_array_ref = false;
5766 /* Set when remote data is to be accessed through the
5767 * global dynamic window. */
5768 bool access_data_through_global_win = false;
5769 /* Set when the remote descriptor is to accessed through the global window. */
5770 bool access_desc_through_global_win = false;
5771 bool free_temp_src = false;
5772 caf_array_ref_t array_ref;
5773
5774 if (stat)
5775 *stat = 0;
5776
5777 MPI_Group current_team_group, win_group;
5778 int global_dynamic_win_rank, memptr_win_rank;
5779 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
5780 ierr = MPI_Win_get_group(global_dynamic_win, &win_group); chk_err(ierr);
5781 ierr = MPI_Group_translate_ranks(current_team_group, 1,
5782 (int[]){image_index - 1}, win_group,
5783 &global_dynamic_win_rank); chk_err(ierr);
5784 ierr = MPI_Group_free(&win_group); chk_err(ierr);
5785 ierr = MPI_Win_get_group(mpi_token->memptr_win, &win_group); chk_err(ierr);
5786 ierr = MPI_Group_translate_ranks(current_team_group, 1,
5787 (int[]){image_index - 1}, win_group,
5788 &memptr_win_rank); chk_err(ierr);
5789 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
5790 ierr = MPI_Group_free(&win_group); chk_err(ierr);
5791
5792 check_image_health(global_dynamic_win_rank, stat);
5793
5794 #ifdef GCC_GE_8
5795 dprint("Entering send_by_ref(may_require_tmp = %d, dst_type = %d)\n",
5796 may_require_tmp, dst_type);
5797 #else
5798 dprint("Entering send_by_ref(may_require_tmp = %d)\n", may_require_tmp);
5799 #endif
5800
5801 /* Compute the size of the result. In the beginning size just counts the
5802 * number of elements. */
5803 size = 1;
5804 /* Shared lock both windows to prevent bother in the sub-routines. */
5805 CAF_Win_lock(MPI_LOCK_SHARED, global_dynamic_win_rank, global_dynamic_win);
5806 CAF_Win_lock(MPI_LOCK_SHARED, memptr_win_rank, mpi_token->memptr_win);
5807 while (riter)
5808 {
5809 dprint("remote_image = %d, offset = %zd, remote_mem = %p\n",
5810 global_dynamic_win_rank, data_offset, remote_memptr);
5811 switch (riter->type)
5812 {
5813 case CAF_REF_COMPONENT:
5814 if (riter->u.c.caf_token_offset > 0)
5815 {
5816 if (access_data_through_global_win)
5817 {
5818 data_offset += riter->u.c.offset;
5819 remote_base_memptr = remote_memptr;
5820 ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE, global_dynamic_win_rank,
5821 MPI_Aint_add((MPI_Aint)remote_memptr, data_offset),
5822 stdptr_size, MPI_BYTE, global_dynamic_win);
5823 chk_err(ierr);
5824 /* On the second indirection access also the remote descriptor
5825 * using the global window. */
5826 access_desc_through_global_win = true;
5827 }
5828 else
5829 {
5830 data_offset += riter->u.c.offset;
5831 ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE, memptr_win_rank,
5832 data_offset, stdptr_size, MPI_BYTE,
5833 mpi_token->memptr_win); chk_err(ierr);
5834 /* All future access is through the global dynamic window. */
5835 access_data_through_global_win = true;
5836 }
5837 desc_offset = data_offset;
5838 data_offset = 0;
5839 }
5840 else
5841 {
5842 data_offset += riter->u.c.offset;
5843 desc_offset += riter->u.c.offset;
5844 }
5845 break;
5846 case CAF_REF_ARRAY:
5847 /* When there has been no CAF_REF_COMP before hand, then the descriptor
5848 * is stored in the token and the extends are the same on all images,
5849 * which is taken care of in the else part. */
5850 if (access_data_through_global_win)
5851 {
5852 for (ref_rank = 0;
5853 riter->u.a.mode[ref_rank] != CAF_ARR_REF_NONE; ++ref_rank) ;
5854 /* Get the remote descriptor and use the stack to store it
5855 * Note, dst may be pointing to mpi_token->desc therefore it
5856 * needs to be reset here. */
5857 dst = (gfc_descriptor_t *)&dst_desc;
5858 if (access_desc_through_global_win)
5859 {
5860 dprint("remote desc fetch from %p, offset = %zd\n",
5861 remote_base_memptr, desc_offset);
5862 ierr = MPI_Get(dst, sizeof_desc_for_rank(ref_rank), MPI_BYTE,
5863 global_dynamic_win_rank,
5864 MPI_Aint_add(
5865 (MPI_Aint)remote_base_memptr, desc_offset),
5866 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
5867 global_dynamic_win); chk_err(ierr);
5868 }
5869 else
5870 {
5871 dprint("remote desc fetch from win %d, offset = %zd\n",
5872 mpi_token->memptr_win, desc_offset);
5873 ierr = MPI_Get(dst, sizeof_desc_for_rank(ref_rank), MPI_BYTE,
5874 memptr_win_rank, desc_offset,
5875 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
5876 mpi_token->memptr_win); chk_err(ierr);
5877 access_desc_through_global_win = true;
5878 }
5879 }
5880 else
5881 dst = mpi_token->desc;
5882 #ifdef EXTRA_DEBUG_OUTPUT
5883 dprint("remote desc rank: %zd (ref_rank: %zd)\n",
5884 GFC_DESCRIPTOR_RANK(dst), ref_rank);
5885 for (i = 0; i < GFC_DESCRIPTOR_RANK(dst); ++i)
5886 {
5887 dprint("remote desc dim[%zd] = (lb=%zd, ub=%zd, stride=%zd)\n",
5888 i, dst->dim[i].lower_bound, dst->dim[i]._ubound,
5889 dst->dim[i]._stride);
5890 }
5891 #endif
5892 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
5893 {
5894 array_ref = riter->u.a.mode[i];
5895 switch (array_ref)
5896 {
5897 case CAF_ARR_REF_VECTOR:
5898 delta = riter->u.a.dim[i].v.nvec;
5899 #define KINDCASE(kind, type) \
5900 case kind: \
5901 remote_memptr += (((ptrdiff_t) \
5902 ((type *)riter->u.a.dim[i].v.vector)[0]) - src->dim[i].lower_bound) \
5903 * src->dim[i]._stride * riter->item_size; \
5904 break
5905 switch (riter->u.a.dim[i].v.kind)
5906 {
5907 KINDCASE(1, int8_t);
5908 KINDCASE(2, int16_t);
5909 KINDCASE(4, int32_t);
5910 KINDCASE(8, int64_t);
5911 #ifdef HAVE_GFC_INTEGER_16
5912 KINDCASE(16, __int128);
5913 #endif
5914 default:
5915 caf_runtime_error(vecrefunknownkind, stat, NULL, 0);
5916 return;
5917 }
5918 #undef KINDCASE
5919 break;
5920 case CAF_ARR_REF_FULL:
5921 COMPUTE_NUM_ITEMS(delta,
5922 riter->u.a.dim[i].s.stride,
5923 dst->dim[i].lower_bound,
5924 dst->dim[i]._ubound);
5925 /* The memptr stays unchanged when ref'ing the first element in
5926 * a dimension. */
5927 break;
5928 case CAF_ARR_REF_RANGE:
5929 COMPUTE_NUM_ITEMS(delta,
5930 riter->u.a.dim[i].s.stride,
5931 riter->u.a.dim[i].s.start,
5932 riter->u.a.dim[i].s.end);
5933 remote_memptr +=
5934 (riter->u.a.dim[i].s.start - dst->dim[i].lower_bound)
5935 * dst->dim[i]._stride * riter->item_size;
5936 break;
5937 case CAF_ARR_REF_SINGLE:
5938 delta = 1;
5939 remote_memptr +=
5940 (riter->u.a.dim[i].s.start - dst->dim[i].lower_bound)
5941 * dst->dim[i]._stride * riter->item_size;
5942 break;
5943 case CAF_ARR_REF_OPEN_END:
5944 COMPUTE_NUM_ITEMS(delta,
5945 riter->u.a.dim[i].s.stride,
5946 riter->u.a.dim[i].s.start,
5947 dst->dim[i]._ubound);
5948 remote_memptr +=
5949 (riter->u.a.dim[i].s.start - dst->dim[i].lower_bound)
5950 * dst->dim[i]._stride * riter->item_size;
5951 break;
5952 case CAF_ARR_REF_OPEN_START:
5953 COMPUTE_NUM_ITEMS(delta,
5954 riter->u.a.dim[i].s.stride,
5955 dst->dim[i].lower_bound,
5956 riter->u.a.dim[i].s.end);
5957 /* The memptr stays unchanged when ref'ing the first element in
5958 * a dimension. */
5959 break;
5960 default:
5961 caf_runtime_error(unknownarrreftype, stat, NULL, 0);
5962 return;
5963 } // switch
5964 dprint("i = %zd, array_ref = %s, delta = %ld\n",
5965 i, caf_array_ref_str[array_ref], delta);
5966 if (delta <= 0)
5967 return;
5968 if (dst != NULL)
5969 dst_rank = GFC_DESCRIPTOR_RANK(dst);
5970 /* Check the various properties of the destination array.
5971 * Is an array expected and present? */
5972 if (delta > 1 && dst_rank == 0)
5973 {
5974 /* No, an array is required, but not provided. */
5975 caf_runtime_error(extentoutofrange, stat, NULL, 0);
5976 return;
5977 }
5978 /* When dst is an array. */
5979 if (dst_rank > 0)
5980 {
5981 /* Check that src_cur_dim is valid for dst. Can be superceeded
5982 * only by scalar data. */
5983 if (src_cur_dim >= dst_rank && delta != 1)
5984 {
5985 caf_runtime_error(rankoutofrange, stat, NULL, 0);
5986 return;
5987 }
5988 /* Do further checks, when the source is not scalar. */
5989 else if (delta != 1)
5990 {
5991 in_array_ref = true;
5992 /* When the realloc is required, then no extent may have
5993 * been set. */
5994 extent_mismatch = GFC_DESCRIPTOR_EXTENT(dst, src_cur_dim) < delta;
5995 /* When it already known, that a realloc is needed or
5996 * the extent does not match the needed one. */
5997 if (realloc_dst || extent_mismatch)
5998 {
5999 /* Check whether dst is reallocatable. */
6000 if (unlikely(!dst_reallocatable))
6001 {
6002 caf_runtime_error(nonallocextentmismatch, stat,
6003 NULL, 0, delta,
6004 GFC_DESCRIPTOR_EXTENT(dst, src_cur_dim));
6005 return;
6006 }
6007 /* Only report an error, when the extent needs to be
6008 * modified, which is not allowed. */
6009 else if (!dst_reallocatable && extent_mismatch)
6010 {
6011 caf_runtime_error(extentoutofrange, stat, NULL, 0);
6012 return;
6013 }
6014 dprint("extent(dst, %d): %zd != delta: %ld.\n", src_cur_dim,
6015 GFC_DESCRIPTOR_EXTENT(dst, src_cur_dim), delta);
6016 realloc_dst = true;
6017 }
6018 }
6019
6020 if (src_cur_dim < GFC_DESCRIPTOR_RANK(src))
6021 ++src_cur_dim;
6022 }
6023 size *= (ptrdiff_t)delta;
6024 }
6025 in_array_ref = false;
6026 break;
6027 case CAF_REF_STATIC_ARRAY:
6028 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6029 {
6030 array_ref = riter->u.a.mode[i];
6031 switch (array_ref)
6032 {
6033 case CAF_ARR_REF_VECTOR:
6034 delta = riter->u.a.dim[i].v.nvec;
6035 #define KINDCASE(kind, type) \
6036 case kind: \
6037 remote_memptr += \
6038 ((type *)riter->u.a.dim[i].v.vector)[0] * riter->item_size; \
6039 break
6040
6041 switch (riter->u.a.dim[i].v.kind)
6042 {
6043 KINDCASE(1, int8_t);
6044 KINDCASE(2, int16_t);
6045 KINDCASE(4, int32_t);
6046 KINDCASE(8, int64_t);
6047 #ifdef HAVE_GFC_INTEGER_16
6048 KINDCASE(16, __int128);
6049 #endif
6050 default:
6051 caf_runtime_error(vecrefunknownkind, stat, NULL, 0);
6052 return;
6053 }
6054 #undef KINDCASE
6055 break;
6056 case CAF_ARR_REF_FULL:
6057 delta =
6058 riter->u.a.dim[i].s.end / riter->u.a.dim[i].s.stride + 1;
6059 /* The memptr stays unchanged when ref'ing the first element
6060 * in a dimension. */
6061 break;
6062 case CAF_ARR_REF_RANGE:
6063 COMPUTE_NUM_ITEMS(delta,
6064 riter->u.a.dim[i].s.stride,
6065 riter->u.a.dim[i].s.start,
6066 riter->u.a.dim[i].s.end);
6067 remote_memptr += riter->u.a.dim[i].s.start
6068 * riter->u.a.dim[i].s.stride * riter->item_size;
6069 break;
6070 case CAF_ARR_REF_SINGLE:
6071 delta = 1;
6072 remote_memptr += riter->u.a.dim[i].s.start
6073 * riter->u.a.dim[i].s.stride
6074 * riter->item_size;
6075 break;
6076 case CAF_ARR_REF_OPEN_END:
6077 /* This and OPEN_START are mapped to a RANGE and therefore
6078 * can not occur here. */
6079 case CAF_ARR_REF_OPEN_START:
6080 default:
6081 caf_runtime_error(unknownarrreftype, stat, NULL, 0);
6082 return;
6083 } // switch
6084 dprint("i = %zd, array_ref = %s, delta = %ld\n",
6085 i, caf_array_ref_str[array_ref], delta);
6086 if (delta <= 0)
6087 return;
6088 if (dst != NULL)
6089 dst_rank = GFC_DESCRIPTOR_RANK(dst);
6090 /* Check the various properties of the destination array.
6091 * Is an array expected and present? */
6092 if (delta > 1 && dst_rank == 0)
6093 {
6094 /* No, an array is required, but not provided. */
6095 caf_runtime_error(extentoutofrange, stat, NULL, 0);
6096 return;
6097 }
6098 /* When dst is an array. */
6099 if (dst_rank > 0)
6100 {
6101 /* Check that src_cur_dim is valid for dst. Can be superceeded
6102 * only by scalar data. */
6103 if (src_cur_dim >= dst_rank && delta != 1)
6104 {
6105 caf_runtime_error(rankoutofrange, stat, NULL, 0);
6106 return;
6107 }
6108 /* Do further checks, when the source is not scalar. */
6109 else if (delta != 1)
6110 {
6111 in_array_ref = true;
6112 /* When the realloc is required, then no extent may have
6113 * been set. */
6114 extent_mismatch = GFC_DESCRIPTOR_EXTENT(dst, src_cur_dim) < delta;
6115 /* When it is already known, that a realloc is needed or
6116 * the extent does not match the needed one. */
6117 if (realloc_dst || extent_mismatch)
6118 {
6119 caf_runtime_error(unabletoallocdst, stat);
6120 return;
6121 }
6122 }
6123 if (src_cur_dim < GFC_DESCRIPTOR_RANK(src))
6124 ++src_cur_dim;
6125 }
6126 size *= (ptrdiff_t)delta;
6127 }
6128 in_array_ref = false;
6129 break;
6130 default:
6131 caf_runtime_error(unknownreftype, stat, NULL, 0);
6132 return;
6133 }
6134 dst_size = riter->item_size;
6135 riter = riter->next;
6136 }
6137 if (size == 0 || dst_size == 0)
6138 return;
6139 /* Postcondition:
6140 * - size contains the number of elements to store in the destination array,
6141 * - dst_size gives the size in bytes of each item in the destination array.
6142 */
6143
6144 if (realloc_dst)
6145 {
6146 caf_runtime_error(unabletoallocdst, stat);
6147 return;
6148 }
6149
6150 /* Reset the token. */
6151 mpi_token = (mpi_caf_token_t *) token;
6152 remote_memptr = mpi_token->memptr;
6153 src_index = 0;
6154 #ifdef EXTRA_DEBUG_OUTPUT
6155 dprint("src_rank: %zd\n", GFC_DESCRIPTOR_RANK(src));
6156 for (i = 0; i < GFC_DESCRIPTOR_RANK(src); ++i)
6157 {
6158 dprint("src_dim[%zd] = (%zd, %zd)\n",
6159 i, src->dim[i].lower_bound, src->dim[i]._ubound);
6160 }
6161 #endif
6162 /* When accessing myself and may_require_tmp is set, then copy the source
6163 * array. */
6164 if (caf_this_image == image_index && may_require_tmp)
6165 {
6166 dprint("preparing temporary source.\n");
6167 memcpy(&temp_src, src, sizeof_desc_for_rank(GFC_DESCRIPTOR_RANK(src)));
6168 size_t cap = 0;
6169 for (int r = 0; r < GFC_DESCRIPTOR_RANK(src); ++r)
6170 {
6171 cap += GFC_DESCRIPTOR_EXTENT(src, r);
6172 }
6173
6174 cap *= GFC_DESCRIPTOR_SIZE(src);
6175 temp_src.base.base_addr = alloca(cap);
6176 if ((free_temp_src = (temp_src.base.base_addr == NULL)))
6177 {
6178 temp_src.base.base_addr = malloc(cap);
6179 if (temp_src.base.base_addr == NULL)
6180 {
6181 caf_runtime_error(cannotallocdst, stat, NULL, cap);
6182 return;
6183 }
6184 }
6185 memcpy(temp_src.base.base_addr, src->base_addr, cap);
6186 src = (gfc_descriptor_t *)&temp_src;
6187 }
6188
6189 i = 0;
6190 dprint("calling send_for_ref. num elems: size = %zd, elem size in bytes: "
6191 "dst_size = %zd\n", size, dst_size);
6192 send_for_ref(refs, &i, src_index, mpi_token, mpi_token->desc, src,
6193 remote_memptr, src->base_addr, 0, 0, dst_kind, src_kind, 0, 0,
6194 1, stat, global_dynamic_win_rank, memptr_win_rank,
6195 false, false
6196 #ifdef GCC_GE_8
6197 , dst_type
6198 #endif
6199 );
6200 if (free_temp_src)
6201 {
6202 free(temp_src.base.base_addr);
6203 }
6204 CAF_Win_unlock(global_dynamic_win_rank, global_dynamic_win);
6205 CAF_Win_unlock(memptr_win_rank, mpi_token->memptr_win);
6206 }
6207
6208
6209 void
6210 PREFIX(sendget_by_ref) (caf_token_t dst_token, int dst_image_index,
6211 caf_reference_t *dst_refs, caf_token_t src_token,
6212 int src_image_index, caf_reference_t *src_refs,
6213 int dst_kind, int src_kind,
6214 bool may_require_tmp, int *dst_stat, int *src_stat
6215 #ifdef GCC_GE_8
6216 , int dst_type, int src_type
6217 #endif
6218 )
6219 {
6220 const char vecrefunknownkind[] =
6221 "libcaf_mpi::caf_sendget_by_ref(): unknown kind in vector-ref.\n";
6222 const char unknownreftype[] =
6223 "libcaf_mpi::caf_sendget_by_ref(): unknown reference type.\n";
6224 const char unknownarrreftype[] =
6225 "libcaf_mpi::caf_sendget_by_ref(): unknown array reference type.\n";
6226 const char cannotallocdst[] =
6227 "libcaf_mpi::caf_sendget_by_ref(): can not allocate %d bytes of memory.\n";
6228 size_t size, i, ref_rank, dst_index, src_index = 0, src_size;
6229 int dst_rank, ierr;
6230 mpi_caf_token_t
6231 *src_mpi_token = (mpi_caf_token_t *) src_token,
6232 *dst_mpi_token = (mpi_caf_token_t *) dst_token;
6233 void *remote_memptr = src_mpi_token->memptr, *remote_base_memptr = NULL;
6234 gfc_max_dim_descriptor_t src_desc;
6235 gfc_max_dim_descriptor_t temp_src_desc;
6236 gfc_descriptor_t *src = (gfc_descriptor_t *)&src_desc;
6237 caf_reference_t *riter = src_refs;
6238 long delta;
6239 ptrdiff_t data_offset = 0, desc_offset = 0;
6240 MPI_Group current_team_group, win_group;
6241 int global_dst_rank, global_src_rank, memptr_dst_rank, memptr_src_rank;
6242 /* Set when the first non-scalar array reference is encountered. */
6243 bool in_array_ref = false;
6244 bool array_extent_fixed = false;
6245 /* Set when remote data is to be accessed through the
6246 * global dynamic window. */
6247 bool access_data_through_global_win = false;
6248 /* Set when the remote descriptor is to accessed through the global window. */
6249 bool access_desc_through_global_win = false;
6250 caf_array_ref_t array_ref;
6251 #ifndef GCC_GE_8
6252 int dst_type = -1, src_type = -1;
6253 #endif
6254
6255 if (src_stat)
6256 *src_stat = 0;
6257
6258 ierr = MPI_Comm_group(CAF_COMM_WORLD, ¤t_team_group); chk_err(ierr);
6259
6260 ierr = MPI_Win_get_group(global_dynamic_win, &win_group); chk_err(ierr);
6261 ierr = MPI_Group_translate_ranks(current_team_group, 1,
6262 (int[]){src_image_index - 1}, win_group,
6263 &global_src_rank); chk_err(ierr);
6264 ierr = MPI_Group_translate_ranks(current_team_group, 1,
6265 (int[]){dst_image_index - 1}, win_group,
6266 &global_dst_rank); chk_err(ierr);
6267 ierr = MPI_Group_free(&win_group); chk_err(ierr);
6268
6269 ierr = MPI_Win_get_group(src_mpi_token->memptr_win, &win_group); chk_err(ierr);
6270 ierr = MPI_Group_translate_ranks(current_team_group, 1,
6271 (int[]){src_image_index - 1}, win_group,
6272 &memptr_src_rank); chk_err(ierr);
6273 ierr = MPI_Group_free(&win_group); chk_err(ierr);
6274 ierr = MPI_Win_get_group(dst_mpi_token->memptr_win, &win_group); chk_err(ierr);
6275 ierr = MPI_Group_translate_ranks(current_team_group, 1,
6276 (int[]){dst_image_index - 1}, win_group,
6277 &memptr_dst_rank); chk_err(ierr);
6278 ierr = MPI_Group_free(&win_group); chk_err(ierr);
6279 ierr = MPI_Group_free(¤t_team_group); chk_err(ierr);
6280
6281 check_image_health(global_src_rank, src_stat);
6282
6283 dprint("Entering get_by_ref(may_require_tmp = %d, dst_type = %d(%d), "
6284 "src_type = %d(%d)).\n",
6285 may_require_tmp, dst_type, dst_kind, src_type, src_kind);
6286
6287 /* Compute the size of the result. In the beginning size just counts the
6288 * number of elements. */
6289 size = 1;
6290 /* Shared lock both windows to prevent bother in the sub-routines. */
6291 CAF_Win_lock(MPI_LOCK_SHARED, global_src_rank, global_dynamic_win);
6292 CAF_Win_lock(MPI_LOCK_SHARED, memptr_src_rank, src_mpi_token->memptr_win);
6293 while (riter)
6294 {
6295 dprint("offset = %zd, remote_mem = %p\n", data_offset, remote_memptr);
6296 switch (riter->type)
6297 {
6298 case CAF_REF_COMPONENT:
6299 if (riter->u.c.caf_token_offset > 0)
6300 {
6301 if (access_data_through_global_win)
6302 {
6303 data_offset += riter->u.c.offset;
6304 remote_base_memptr = remote_memptr;
6305 ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE,
6306 global_src_rank,
6307 MPI_Aint_add((MPI_Aint)remote_memptr, data_offset),
6308 stdptr_size, MPI_BYTE, global_dynamic_win);
6309 chk_err(ierr);
6310 /* On the second indirection access also the remote descriptor
6311 * using the global window. */
6312 access_desc_through_global_win = true;
6313 }
6314 else
6315 {
6316 data_offset += riter->u.c.offset;
6317 ierr = MPI_Get(&remote_memptr, stdptr_size, MPI_BYTE,
6318 memptr_src_rank, data_offset, stdptr_size, MPI_BYTE,
6319 src_mpi_token->memptr_win); chk_err(ierr);
6320 /* All future access is through the global dynamic window. */
6321 access_data_through_global_win = true;
6322 }
6323 desc_offset = data_offset;
6324 data_offset = 0;
6325 }
6326 else
6327 {
6328 data_offset += riter->u.c.offset;
6329 desc_offset += riter->u.c.offset;
6330 }
6331 break;
6332 case CAF_REF_ARRAY:
6333 /* When there has been no CAF_REF_COMP before hand, then the
6334 * descriptor is stored in the token and the extends are the same on all
6335 * images, which is taken care of in the else part. */
6336 if (access_data_through_global_win)
6337 {
6338 for (ref_rank = 0; riter->u.a.mode[ref_rank] != CAF_ARR_REF_NONE;
6339 ++ref_rank) ;
6340 /* Get the remote descriptor and use the stack to store it. Note,
6341 * src may be pointing to mpi_token->desc therefore it needs to be
6342 * reset here. */
6343 src = (gfc_descriptor_t *)&src_desc;
6344 if (access_desc_through_global_win)
6345 {
6346 dprint("remote desc fetch from %p, offset = %zd\n",
6347 remote_base_memptr, desc_offset);
6348 ierr = MPI_Get(src, sizeof_desc_for_rank(ref_rank), MPI_BYTE,
6349 global_src_rank,
6350 MPI_Aint_add(
6351 (MPI_Aint)remote_base_memptr, desc_offset),
6352 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
6353 global_dynamic_win); chk_err(ierr);
6354 }
6355 else
6356 {
6357 dprint("remote desc fetch from win %d, offset = %zd\n",
6358 src_mpi_token->memptr_win, desc_offset);
6359 ierr = MPI_Get(src, sizeof_desc_for_rank(ref_rank), MPI_BYTE,
6360 memptr_src_rank, desc_offset,
6361 sizeof_desc_for_rank(ref_rank),
6362 MPI_BYTE, src_mpi_token->memptr_win); chk_err(ierr);
6363 access_desc_through_global_win = true;
6364 }
6365 }
6366 else
6367 {
6368 src = src_mpi_token->desc;
6369 }
6370 #ifdef EXTRA_DEBUG_OUTPUT
6371 dprint("remote desc rank: %zd (ref_rank: %zd)\n",
6372 GFC_DESCRIPTOR_RANK(src), ref_rank);
6373 for (i = 0; i < GFC_DESCRIPTOR_RANK(src); ++i)
6374 {
6375 dprint("remote desc dim[%zd] = (lb=%zd, ub=%zd, stride=%zd)\n",
6376 i, src->dim[i].lower_bound, src->dim[i]._ubound,
6377 src->dim[i]._stride);
6378 }
6379 #endif
6380 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6381 {
6382 array_ref = riter->u.a.mode[i];
6383 switch (array_ref)
6384 {
6385 case CAF_ARR_REF_VECTOR:
6386 delta = riter->u.a.dim[i].v.nvec;
6387 #define KINDCASE(kind, type) \
6388 case kind: \
6389 remote_memptr += (((ptrdiff_t) \
6390 ((type *)riter->u.a.dim[i].v.vector)[0]) - src->dim[i].lower_bound) \
6391 * src->dim[i]._stride * riter->item_size; \
6392 break
6393 switch (riter->u.a.dim[i].v.kind)
6394 {
6395 KINDCASE(1, int8_t);
6396 KINDCASE(2, int16_t);
6397 KINDCASE(4, int32_t);
6398 KINDCASE(8, int64_t);
6399 #ifdef HAVE_GFC_INTEGER_16
6400 KINDCASE(16, __int128);
6401 #endif
6402 default:
6403 caf_runtime_error(vecrefunknownkind, src_stat, NULL, 0);
6404 return;
6405 }
6406 #undef KINDCASE
6407 break;
6408 case CAF_ARR_REF_FULL:
6409 COMPUTE_NUM_ITEMS(delta,
6410 riter->u.a.dim[i].s.stride,
6411 src->dim[i].lower_bound,
6412 src->dim[i]._ubound);
6413 /* The memptr stays unchanged when ref'ing the first element
6414 * in a dimension. */
6415 break;
6416 case CAF_ARR_REF_RANGE:
6417 COMPUTE_NUM_ITEMS(delta,
6418 riter->u.a.dim[i].s.stride,
6419 riter->u.a.dim[i].s.start,
6420 riter->u.a.dim[i].s.end);
6421 remote_memptr +=
6422 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
6423 * src->dim[i]._stride * riter->item_size;
6424 break;
6425 case CAF_ARR_REF_SINGLE:
6426 delta = 1;
6427 remote_memptr +=
6428 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
6429 * src->dim[i]._stride * riter->item_size;
6430 break;
6431 case CAF_ARR_REF_OPEN_END:
6432 COMPUTE_NUM_ITEMS(delta,
6433 riter->u.a.dim[i].s.stride,
6434 riter->u.a.dim[i].s.start,
6435 src->dim[i]._ubound);
6436 remote_memptr +=
6437 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
6438 * src->dim[i]._stride * riter->item_size;
6439 break;
6440 case CAF_ARR_REF_OPEN_START:
6441 COMPUTE_NUM_ITEMS(delta,
6442 riter->u.a.dim[i].s.stride,
6443 src->dim[i].lower_bound,
6444 riter->u.a.dim[i].s.end);
6445 /* The memptr stays unchanged when ref'ing the first element
6446 * in a dimension. */
6447 break;
6448 default:
6449 caf_runtime_error(unknownarrreftype, src_stat, NULL, 0);
6450 return;
6451 } // switch
6452 dprint("i = %zd, array_ref = %s, delta = %ld\n",
6453 i, caf_array_ref_str[array_ref], delta);
6454 if (delta <= 0)
6455 return;
6456 size *= (ptrdiff_t)delta;
6457 }
6458 if (in_array_ref)
6459 {
6460 array_extent_fixed = true;
6461 in_array_ref = false;
6462 }
6463 break;
6464 case CAF_REF_STATIC_ARRAY:
6465 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6466 {
6467 array_ref = riter->u.a.mode[i];
6468 switch (array_ref)
6469 {
6470 case CAF_ARR_REF_VECTOR:
6471 delta = riter->u.a.dim[i].v.nvec;
6472 #define KINDCASE(kind, type) \
6473 case kind: \
6474 remote_memptr += \
6475 ((type *)riter->u.a.dim[i].v.vector)[0] * riter->item_size; \
6476 break
6477 switch (riter->u.a.dim[i].v.kind)
6478 {
6479 KINDCASE(1, int8_t);
6480 KINDCASE(2, int16_t);
6481 KINDCASE(4, int32_t);
6482 KINDCASE(8, int64_t);
6483 #ifdef HAVE_GFC_INTEGER_16
6484 KINDCASE(16, __int128);
6485 #endif
6486 default:
6487 caf_runtime_error(vecrefunknownkind, src_stat, NULL, 0);
6488 return;
6489 }
6490 #undef KINDCASE
6491 break;
6492 case CAF_ARR_REF_FULL:
6493 delta = riter->u.a.dim[i].s.end / riter->u.a.dim[i].s.stride + 1;
6494 /* The memptr stays unchanged when ref'ing the first element
6495 * in a dimension. */
6496 break;
6497 case CAF_ARR_REF_RANGE:
6498 COMPUTE_NUM_ITEMS(delta,
6499 riter->u.a.dim[i].s.stride,
6500 riter->u.a.dim[i].s.start,
6501 riter->u.a.dim[i].s.end);
6502 remote_memptr += riter->u.a.dim[i].s.start
6503 * riter->u.a.dim[i].s.stride
6504 * riter->item_size;
6505 break;
6506 case CAF_ARR_REF_SINGLE:
6507 delta = 1;
6508 remote_memptr += riter->u.a.dim[i].s.start
6509 * riter->u.a.dim[i].s.stride
6510 * riter->item_size;
6511 break;
6512 case CAF_ARR_REF_OPEN_END:
6513 /* This and OPEN_START are mapped to a RANGE and therefore
6514 * can not occur here. */
6515 case CAF_ARR_REF_OPEN_START:
6516 default:
6517 caf_runtime_error(unknownarrreftype, src_stat, NULL, 0);
6518 return;
6519 } // switch
6520 dprint("i = %zd, array_ref = %s, delta = %ld\n",
6521 i, caf_array_ref_str[array_ref], delta);
6522 if (delta <= 0)
6523 return;
6524 size *= (ptrdiff_t)delta;
6525 }
6526 if (in_array_ref)
6527 {
6528 array_extent_fixed = true;
6529 in_array_ref = false;
6530 }
6531 break;
6532 default:
6533 caf_runtime_error(unknownreftype, src_stat, NULL, 0);
6534 return;
6535 } // switch
6536 src_size = riter->item_size;
6537 riter = riter->next;
6538 }
6539 if (size == 0 || src_size == 0)
6540 return;
6541 /* Postcondition:
6542 * - size contains the number of elements to store in the destination array,
6543 * - src_size gives the size in bytes of each item in the destination array.
6544 */
6545
6546 dst_rank = (size > 1) ? 1 : 0;
6547 memset(&temp_src_desc, 0, sizeof(gfc_dim1_descriptor_t));
6548 #ifdef GCC_GE_8
6549 temp_src_desc.base.dtype.elem_len = (dst_type != BT_COMPLEX) ?
6550 dst_kind : (2 * dst_kind);
6551 temp_src_desc.base.dtype.rank = 1;
6552 temp_src_desc.base.dtype.type = dst_type;
6553 #else // GCC_GE_7
6554 temp_src_desc.base.dtype = GFC_DTYPE_INTEGER_4 | 1;
6555 #endif
6556 temp_src_desc.base.offset = 0;
6557 temp_src_desc.dim[0]._ubound = size - 1;
6558 temp_src_desc.dim[0]._stride = 1;
6559
6560 temp_src_desc.base.base_addr =
6561 malloc(size * GFC_DESCRIPTOR_SIZE((gfc_descriptor_t *)&temp_src_desc));
6562 if (unlikely(temp_src_desc.base.base_addr == NULL))
6563 {
6564 caf_runtime_error(
6565 cannotallocdst, src_stat,
6566 size * GFC_DESCRIPTOR_SIZE((gfc_descriptor_t *)&temp_src_desc));
6567 return;
6568 }
6569
6570 #ifndef GCC_GE_8
6571 static bool warning_given = false;
6572 if (!warning_given)
6573 {
6574 fprintf(stderr,
6575 "lib_caf_mpi::sendget_by_ref(): Warning !! sendget_by_ref() is "
6576 "mostly unfunctional due to a design error. Split up your "
6577 "statement with coarray refs on both sides of the assignment "
6578 "when the datatype transfered is non 4-byte-integer compatible, "
6579 "or use gcc >= 8.\n");
6580 warning_given = true;
6581 }
6582 #endif
6583 /* Reset the token. */
6584 src_mpi_token = (mpi_caf_token_t *) src_token;
6585 remote_memptr = src_mpi_token->memptr;
6586 dst_index = 0;
6587 #ifdef EXTRA_DEBUG_OUTPUT
6588 dprint("dst_rank: %d\n", dst_rank);
6589 for (i = 0; i < dst_rank; ++i)
6590 {
6591 dprint("temp_src_dim[%zd] = (%zd, %zd)\n",
6592 i, temp_src_desc.dim[i].lower_bound, temp_src_desc.dim[i]._ubound);
6593 }
6594 #endif
6595 i = 0;
6596 dprint("calling get_for_ref.\n");
6597 get_for_ref(src_refs, &i, dst_index, src_mpi_token,
6598 (gfc_descriptor_t *)&temp_src_desc, src_mpi_token->desc,
6599 temp_src_desc.base.base_addr, remote_memptr, 0, 0, dst_kind,
6600 src_kind, 0, 0, 1, src_stat, global_src_rank, memptr_src_rank,
6601 false, false
6602 #ifdef GCC_GE_8
6603 , src_type
6604 #endif
6605 );
6606 CAF_Win_unlock(global_src_rank, global_dynamic_win);
6607 CAF_Win_unlock(memptr_src_rank, src_mpi_token->memptr_win);
6608 dprint("calling send_for_ref. num elems: size = %zd, elem size in bytes: "
6609 "src_size = %zd\n", size, src_size);
6610 i = 0;
6611
6612 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, global_dst_rank, global_dynamic_win);
6613 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, memptr_dst_rank, dst_mpi_token->memptr_win);
6614 send_for_ref(dst_refs, &i, src_index, dst_mpi_token, dst_mpi_token->desc,
6615 (gfc_descriptor_t *)&temp_src_desc, dst_mpi_token->memptr,
6616 temp_src_desc.base.base_addr, 0, 0, dst_kind, src_kind, 0, 0,
6617 1, dst_stat, global_dst_rank, memptr_dst_rank, false, false
6618 #ifdef GCC_GE_8
6619 , dst_type
6620 #endif
6621 );
6622 CAF_Win_unlock(global_dst_rank, global_dynamic_win);
6623 CAF_Win_unlock(memptr_dst_rank, src_mpi_token->memptr_win);
6624 }
6625
6626 int
6627 PREFIX(is_present) (caf_token_t token, int image_index, caf_reference_t *refs)
6628 {
6629 const char unsupportedRefType[] =
6630 "Unsupported ref-type in caf_is_present().";
6631 const char unexpectedEndOfRefs[] =
6632 "Unexpected end of references in caf_is_present.";
6633 const char remotesInnerRefNA[] =
6634 "Memory referenced on the remote image is not allocated.";
6635 const int ptr_size = sizeof(void *);
6636 const int remote_image = image_index - 1;
6637 mpi_caf_token_t *mpi_token = (mpi_caf_token_t *)token;
6638 ptrdiff_t local_offset = 0;
6639 void *remote_memptr = NULL, *remote_base_memptr = NULL;
6640 bool carryOn = true, firstDesc = true;
6641 caf_reference_t *riter = refs, *prev;
6642 size_t i, ref_rank;
6643 int ierr;
6644 gfc_max_dim_descriptor_t src_desc;
6645 caf_array_ref_t array_ref;
6646
6647 while (carryOn && riter)
6648 {
6649 switch (riter->type)
6650 {
6651 case CAF_REF_COMPONENT:
6652 if (riter->u.c.caf_token_offset)
6653 {
6654 CAF_Win_lock(MPI_LOCK_SHARED, remote_image, mpi_token->memptr_win);
6655 ierr = MPI_Get(&remote_memptr, ptr_size, MPI_BYTE, remote_image,
6656 local_offset + riter->u.c.offset, ptr_size,
6657 MPI_BYTE, mpi_token->memptr_win); chk_err(ierr);
6658 CAF_Win_unlock(remote_image, mpi_token->memptr_win);
6659 dprint("Got first remote address %p from offset %zd\n",
6660 remote_memptr, local_offset);
6661 local_offset = 0;
6662 carryOn = false;
6663 }
6664 else
6665 local_offset += riter->u.c.offset;
6666 break;
6667 case CAF_REF_ARRAY:
6668 {
6669 const gfc_descriptor_t *src =
6670 (gfc_descriptor_t *)(mpi_token->memptr + local_offset);
6671 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6672 {
6673 array_ref = riter->u.a.mode[i];
6674 dprint("i = %zd, array_ref = %s\n",
6675 i, caf_array_ref_str[array_ref]);
6676 switch (array_ref)
6677 {
6678 case CAF_ARR_REF_FULL:
6679 /* The local_offset stays unchanged when ref'ing the first
6680 * element in a dimension. */
6681 break;
6682 case CAF_ARR_REF_SINGLE:
6683 local_offset +=
6684 (riter->u.a.dim[i].s.start - src->dim[i].lower_bound)
6685 * src->dim[i]._stride * riter->item_size;
6686 break;
6687 case CAF_ARR_REF_VECTOR:
6688 case CAF_ARR_REF_RANGE:
6689 case CAF_ARR_REF_OPEN_END:
6690 case CAF_ARR_REF_OPEN_START:
6691 /* Intentionally fall through, because these are not
6692 * suported here. */
6693 default:
6694 caf_runtime_error(unsupportedRefType);
6695 return false;
6696 }
6697 }
6698 }
6699 break;
6700 case CAF_REF_STATIC_ARRAY:
6701 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6702 {
6703 array_ref = riter->u.a.mode[i];
6704 dprint("i = %zd, array_ref = %s\n", i, caf_array_ref_str[array_ref]);
6705 switch (array_ref)
6706 {
6707 case CAF_ARR_REF_FULL:
6708 /* The local_offset stays unchanged when ref'ing the first
6709 * element in a dimension. */
6710 break;
6711 case CAF_ARR_REF_SINGLE:
6712 local_offset += riter->u.a.dim[i].s.start
6713 * riter->u.a.dim[i].s.stride * riter->item_size;
6714 break;
6715 case CAF_ARR_REF_VECTOR:
6716 case CAF_ARR_REF_RANGE:
6717 case CAF_ARR_REF_OPEN_END:
6718 case CAF_ARR_REF_OPEN_START:
6719 default:
6720 caf_runtime_error(unsupportedRefType);
6721 return false;
6722 }
6723 }
6724 break;
6725 default:
6726 caf_runtime_error(unsupportedRefType);
6727 return false;
6728 } // switch
6729 prev = riter;
6730 riter = riter->next;
6731 }
6732
6733 if (carryOn)
6734 {
6735 // This can only happen, when riter == NULL.
6736 caf_runtime_error(unexpectedEndOfRefs);
6737 }
6738
6739 CAF_Win_lock(MPI_LOCK_SHARED, remote_image, global_dynamic_win);
6740 if (remote_memptr != NULL)
6741 remote_base_memptr = remote_memptr + local_offset;
6742
6743 dprint("Remote desc address is %p from remote memptr %p and offset %zd\n",
6744 remote_base_memptr, remote_memptr, local_offset);
6745
6746 while (riter)
6747 {
6748 switch (riter->type)
6749 {
6750 case CAF_REF_COMPONENT:
6751 /* After reffing the first allocatable/pointer component, descriptors
6752 * need to be picked up from the global_win. */
6753 firstDesc = firstDesc && riter->u.c.caf_token_offset == 0;
6754 local_offset += riter->u.c.offset;
6755 remote_base_memptr = remote_memptr + local_offset;
6756 ierr = MPI_Get(&remote_memptr, ptr_size, MPI_BYTE, remote_image,
6757 (MPI_Aint)remote_base_memptr, ptr_size,
6758 MPI_BYTE, global_dynamic_win); chk_err(ierr);
6759 dprint("Got remote address %p from offset %zd nd base memptr %p\n",
6760 remote_memptr, local_offset, remote_base_memptr);
6761 local_offset = 0;
6762 break;
6763 case CAF_REF_ARRAY:
6764 if (remote_base_memptr == NULL)
6765 {
6766 /* Refing an unallocated array ends in a full_ref. Check that this
6767 * is true. Error when not full-refing. */
6768 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6769 {
6770 if (riter->u.a.mode[i] != CAF_ARR_REF_FULL)
6771 break;
6772 }
6773 if (riter->u.a.mode[i] != CAF_ARR_REF_NONE)
6774 caf_runtime_error(remotesInnerRefNA);
6775 break;
6776 }
6777 if (firstDesc)
6778 {
6779 /* The first descriptor is accessible by the mpi_token->memptr_win.
6780 * Count the dims to fetch. */
6781 for (ref_rank = 0; riter->u.a.mode[ref_rank] != CAF_ARR_REF_NONE;
6782 ++ref_rank)
6783 ;
6784 dprint("Getting remote descriptor of rank %zd from win: %d, "
6785 "sizeof() %zd\n", ref_rank, mpi_token->memptr_win,
6786 sizeof_desc_for_rank(ref_rank));
6787 ierr = MPI_Get(&src_desc, sizeof_desc_for_rank(ref_rank),
6788 MPI_BYTE, remote_image, local_offset,
6789 sizeof_desc_for_rank(ref_rank),
6790 MPI_BYTE, mpi_token->memptr_win); chk_err(ierr);
6791 firstDesc = false;
6792 }
6793 else
6794 {
6795 /* All inner descriptors go by the dynamic window.
6796 * Count the dims to fetch. */
6797 for (ref_rank = 0; riter->u.a.mode[ref_rank] != CAF_ARR_REF_NONE;
6798 ++ref_rank)
6799 ;
6800 dprint("Getting remote descriptor of rank %zd from: %p, "
6801 "sizeof() %zd\n", ref_rank, remote_base_memptr,
6802 sizeof_desc_for_rank(ref_rank));
6803 ierr = MPI_Get(&src_desc, sizeof_desc_for_rank(ref_rank), MPI_BYTE,
6804 remote_image, (MPI_Aint)remote_base_memptr,
6805 sizeof_desc_for_rank(ref_rank), MPI_BYTE,
6806 global_dynamic_win); chk_err(ierr);
6807 }
6808 #ifdef EXTRA_DEBUG_OUTPUT
6809 {
6810 gfc_descriptor_t * src = (gfc_descriptor_t *)(&src_desc);
6811 dprint("remote desc rank: %zd (ref_rank: %zd)\n",
6812 GFC_DESCRIPTOR_RANK(src), ref_rank);
6813 for (i = 0; i < GFC_DESCRIPTOR_RANK(src); ++i)
6814 {
6815 dprint("remote desc dim[%zd] = (lb=%zd, ub=%zd, stride=%zd)\n",
6816 i, src_desc.dim[i].lower_bound, src_desc.dim[i]._ubound,
6817 src_desc.dim[i]._stride);
6818 }
6819 }
6820 #endif
6821
6822 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6823 {
6824 array_ref = riter->u.a.mode[i];
6825 dprint("i = %zd, array_ref = %s\n", i, caf_array_ref_str[array_ref]);
6826 switch (array_ref)
6827 {
6828 case CAF_ARR_REF_FULL:
6829 /* The local_offset stays unchanged when ref'ing the first
6830 * element in a dimension. */
6831 break;
6832 case CAF_ARR_REF_SINGLE:
6833 local_offset +=
6834 (riter->u.a.dim[i].s.start - src_desc.dim[i].lower_bound)
6835 * src_desc.dim[i]._stride
6836 * riter->item_size;
6837 break;
6838 case CAF_ARR_REF_VECTOR:
6839 case CAF_ARR_REF_RANGE:
6840 case CAF_ARR_REF_OPEN_END:
6841 case CAF_ARR_REF_OPEN_START:
6842 /* Intentionally fall through, because these are not suported
6843 * here. */
6844 default:
6845 caf_runtime_error(unsupportedRefType);
6846 CAF_Win_unlock(remote_image, global_dynamic_win);
6847 return false;
6848 }
6849 }
6850 break;
6851 case CAF_REF_STATIC_ARRAY:
6852 for (i = 0; riter->u.a.mode[i] != CAF_ARR_REF_NONE; ++i)
6853 {
6854 array_ref = riter->u.a.mode[i];
6855 dprint("i = %zd, array_ref = %s\n", i, caf_array_ref_str[array_ref]);
6856 switch (array_ref)
6857 {
6858 case CAF_ARR_REF_FULL:
6859 /* The memptr stays unchanged when ref'ing the first element
6860 * in a dimension. */
6861 break;
6862 case CAF_ARR_REF_SINGLE:
6863 local_offset += riter->u.a.dim[i].s.start
6864 * riter->u.a.dim[i].s.stride * riter->item_size;
6865 break;
6866 case CAF_ARR_REF_VECTOR:
6867 case CAF_ARR_REF_RANGE:
6868 case CAF_ARR_REF_OPEN_END:
6869 case CAF_ARR_REF_OPEN_START:
6870 default:
6871 caf_runtime_error(unsupportedRefType);
6872 CAF_Win_unlock(remote_image, global_dynamic_win);
6873 return false;
6874 }
6875 }
6876 break;
6877 default:
6878 caf_runtime_error(unsupportedRefType);
6879 CAF_Win_unlock(remote_image, global_dynamic_win);
6880 return false;
6881 } // switch
6882 riter = riter->next;
6883 }
6884 CAF_Win_unlock(remote_image, global_dynamic_win);
6885
6886 dprint("Got remote_memptr: %p\n", remote_memptr);
6887 return remote_memptr != NULL;
6888 }
6889 #endif // GCC_GE_7
6890
6891
6892 /* SYNC IMAGES. Note: SYNC IMAGES(*) is passed as count == -1 while
6893 * SYNC IMAGES([]) has count == 0. Note further that SYNC IMAGES(*)
6894 * is not semantically equivalent to SYNC ALL. */
6895
6896 void
6897 PREFIX(sync_images) (int count, int images[], int *stat, char *errmsg,
6898 charlen_t errmsg_len)
6899 {
6900 sync_images_internal(count, images, stat, errmsg, errmsg_len, false);
6901 }
6902
6903 static void
6904 sync_images_internal(int count, int images[], int *stat, char *errmsg,
6905 size_t errmsg_len, bool internal)
6906 {
6907 int ierr = 0, i = 0, j = 0, int_zero = 0, done_count = 0, flag;
6908 MPI_Status s;
6909
6910 #ifdef WITH_FAILED_IMAGES
6911 no_stopped_images_check_in_errhandler = true;
6912 #endif
6913 dprint("Entering\n");
6914 if (count == 0 || (count == 1 && images[0] == caf_this_image))
6915 {
6916 if (stat)
6917 *stat = 0;
6918 #ifdef WITH_FAILED_IMAGES
6919 no_stopped_images_check_in_errhandler = false;
6920 #endif
6921 dprint("Leaving early.\n");
6922 return;
6923 }
6924
6925 /* halt execution if sync images contains duplicate image numbers */
6926 for (i = 0; i < count; ++i)
6927 {
6928 for (j = 0; j < i; ++j)
6929 {
6930 if (images[i] == images[j])
6931 {
6932 ierr = STAT_DUP_SYNC_IMAGES;
6933 if (stat)
6934 *stat = ierr;
6935 goto sync_images_err_chk;
6936 }
6937 }
6938 }
6939
6940 #ifdef GFC_CAF_CHECK
6941 for (i = 0; i < count; ++i)
6942 {
6943 if (images[i] < 1 || images[i] > caf_num_images)
6944 {
6945 fprintf(stderr, "COARRAY ERROR: Invalid image index %d to SYNC IMAGES",
6946 images[i]);
6947 terminate_internal(1, 1);
6948 }
6949 }
6950 #endif
6951
6952 if (unlikely(caf_is_finalized))
6953 {
6954 ierr = STAT_STOPPED_IMAGE;
6955 }
6956 else
6957 {
6958 if (count == -1)
6959 {
6960 count = caf_num_images - 1;
6961 images = images_full;
6962 }
6963
6964 #if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
6965 explicit_flush();
6966 #endif
6967
6968 #ifdef WITH_FAILED_IMAGES
6969 /* Provoke detecting process fails. */
6970 ierr = MPI_Test(&alive_request, &flag, MPI_STATUS_IGNORE); chk_err(ierr);
6971 #endif
6972 /* A rather simple way to synchronice:
6973 * - expect all images to sync with receiving an int,
6974 * - on the other side, send all processes to sync with an int,
6975 * - when the int received is STAT_STOPPED_IMAGE the return immediately,
6976 * else wait until all images in the current set of images have send
6977 * some data, i.e., synced.
6978 *
6979 * This approach as best as possible implements the syncing of different
6980 * sets of images and figuring that an image has stopped. MPI does not
6981 * provide any direct means of syncing non-coherent sets of images.
6982 * The groups/communicators of MPI always need to be consistent, i.e.,
6983 * have the same members on all images participating. This is
6984 * contradictiory to the sync images statement, where syncing, e.g., in a
6985 * ring pattern is possible.
6986 *
6987 * This implementation guarantees, that as long as no image is stopped
6988 * an image only is allowed to continue, when all its images to sync to
6989 * also have reached a sync images statement. This implementation makes
6990 * no assumption when the image continues or in which order synced
6991 * images continue. */
6992 for (i = 0; i < count; ++i)
6993 {
6994 /* Need to have the request handlers contigously in the handlers
6995 * array or waitany below will trip about the handler as illegal. */
6996 ierr = MPI_Irecv(&arrived[images[i] - 1], 1, MPI_INT, images[i] - 1,
6997 MPI_TAG_CAF_SYNC_IMAGES, CAF_COMM_WORLD,
6998 &sync_handles[i]); chk_err(ierr);
6999 }
7000 for (i = 0; i < count; ++i)
7001 {
7002 ierr = MPI_Send(&int_zero, 1, MPI_INT, images[i] - 1,
7003 MPI_TAG_CAF_SYNC_IMAGES, CAF_COMM_WORLD); chk_err(ierr);
7004 }
7005 done_count = 0;
7006 while (done_count < count)
7007 {
7008 ierr = MPI_Waitany(count, sync_handles, &i, &s);
7009 if (ierr == MPI_SUCCESS && i != MPI_UNDEFINED)
7010 {
7011 ++done_count;
7012 if (ierr == MPI_SUCCESS && arrived[s.MPI_SOURCE] == STAT_STOPPED_IMAGE)
7013 {
7014 /* Possible future extension: Abort pending receives. At the
7015 * moment the receives are discarded by the program
7016 * termination. For the tested mpi-implementation this is ok. */
7017 ierr = STAT_STOPPED_IMAGE;
7018 break;
7019 }
7020 }
7021 else if (ierr != MPI_SUCCESS)
7022 #ifdef WITH_FAILED_IMAGES
7023 {
7024 int err;
7025 MPI_Error_class(ierr, &err);
7026 if (err == MPIX_ERR_PROC_FAILED)
7027 {
7028 dprint("Image failed, provoking error handling.\n");
7029 ierr = STAT_FAILED_IMAGE;
7030 /* Provoke detecting process fails. */
7031 MPI_Test(&alive_request, &flag, MPI_STATUS_IGNORE);
7032 }
7033 break;
7034 }
7035 #else
7036 break;
7037 #endif // WITH_FAILED_IMAGES
7038 }
7039 }
7040
7041 sync_images_err_chk:
7042 #ifdef WITH_FAILED_IMAGES
7043 no_stopped_images_check_in_errhandler = false;
7044 #endif
7045 dprint("Leaving\n");
7046 if (stat)
7047 *stat = ierr;
7048 #ifdef WITH_FAILED_IMAGES
7049 else if (ierr == STAT_FAILED_IMAGE)
7050 terminate_internal(ierr, 0);
7051 #endif
7052
7053 if (ierr != 0 && ierr != STAT_FAILED_IMAGE)
7054 {
7055 char msg[80];
7056 strcpy(msg, "SYNC IMAGES failed");
7057 if (caf_is_finalized)
7058 strcat(msg, " - there are stopped images");
7059
7060 if (errmsg_len > 0)
7061 {
7062 size_t len = (strlen(msg) > errmsg_len) ? errmsg_len : strlen (msg);
7063 memcpy(errmsg, msg, len);
7064 if (errmsg_len > len)
7065 memset(&errmsg[len], ' ', errmsg_len-len);
7066 }
7067 else if (!internal && stat == NULL)
7068 caf_runtime_error(msg);
7069 }
7070 }
7071
7072
7073 #define GEN_REDUCTION(name, datatype, operator) \
7074 static void \
7075 name(datatype *invec, datatype *inoutvec, int *len, \
7076 MPI_Datatype *datatype __attribute__((unused))) \
7077 { \
7078 for (int i = 0; i < len; ++i) \
7079 { \
7080 operator; \
7081 } \
7082 }
7083
7084 #define REFERENCE_FUNC(TYPE) TYPE ## _by_reference
7085 #define VALUE_FUNC(TYPE) TYPE ## _by_value
7086
7087 #define GEN_COREDUCE(name, dt) \
7088 static void \
7089 name##_by_reference_adapter(void *invec, void *inoutvec, \
7090 int *len, MPI_Datatype *datatype) \
7091 { \
7092 for (int i = 0; i < *len; ++i) \
7093 { \
7094 *((dt*)inoutvec) = \
7095 (dt)(REFERENCE_FUNC(dt)((dt *)invec, (dt *)inoutvec)); \
7096 invec += sizeof(dt); \
7097 inoutvec += sizeof(dt); \
7098 } \
7099 } \
7100 static void \
7101 name##_by_value_adapter(void *invec, void *inoutvec, \
7102 int *len, MPI_Datatype *datatype) \
7103 { \
7104 for (int i = 0; i < *len; ++i) \
7105 { \
7106 *((dt*)inoutvec) = \
7107 (dt)(VALUE_FUNC(dt)(*(dt *)invec, *(dt *)inoutvec)); \
7108 invec += sizeof(dt); \
7109 inoutvec += sizeof(dt); \
7110 } \
7111 }
7112
7113 GEN_COREDUCE(redux_int8, int8_t)
7114 GEN_COREDUCE(redux_int16, int16_t)
7115 GEN_COREDUCE(redux_int32, int32_t)
7116 GEN_COREDUCE(redux_int64, int64_t)
7117 GEN_COREDUCE(redux_real32, float)
7118 GEN_COREDUCE(redux_real64, double)
7119
7120 static void
7121 redux_char_by_reference_adapter(void *invec, void *inoutvec, int *len,
7122 MPI_Datatype *datatype)
7123 {
7124 MPI_Aint lb, string_len;
7125 MPI_Type_get_extent(*datatype, &lb, &string_len);
7126 for (int i = 0; i < *len; i++)
7127 {
7128 /* The length of the result is fixed, i.e., no deferred string length is
7129 * allowed there. */
7130 REFERENCE_FUNC(char)(
7131 (char *)inoutvec, string_len, (char *)invec,
7132 (char *)inoutvec, string_len, string_len
7133 );
7134 invec += sizeof(char) * string_len;
7135 inoutvec += sizeof(char) * string_len;
7136 }
7137 }
7138
7139 #ifndef MPI_INTEGER1
7140 GEN_REDUCTION(do_sum_int1, int8_t, inoutvec[i] += invec[i])
7141 GEN_REDUCTION(do_min_int1, int8_t,
7142 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7143 GEN_REDUCTION(do_max_int1, int8_t,
7144 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7145 #endif
7146
7147 /*
7148 #ifndef MPI_INTEGER2
7149 GEN_REDUCTION(do_sum_int1, int16_t, inoutvec[i] += invec[i])
7150 GEN_REDUCTION(do_min_int1, int16_t,
7151 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7152 GEN_REDUCTION(do_max_int1, int16_t,
7153 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7154 #endif
7155 */
7156
7157 #if defined(MPI_INTEGER16) && defined(GFC_INTEGER_16)
7158 GEN_REDUCTION(do_sum_int1, GFC_INTEGER_16, inoutvec[i] += invec[i])
7159 GEN_REDUCTION(do_min_int1, GFC_INTEGER_16,
7160 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7161 GEN_REDUCTION(do_max_int1, GFC_INTEGER_16,
7162 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7163 #endif
7164
7165 #if defined(GFC_DTYPE_REAL_10) \
7166 || (!defined(GFC_DTYPE_REAL_10) && defined(GFC_DTYPE_REAL_16))
7167 GEN_REDUCTION(do_sum_real10, long double, inoutvec[i] += invec[i])
7168 GEN_REDUCTION(do_min_real10, long double,
7169 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7170 GEN_REDUCTION(do_max_real10, long double,
7171 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7172 GEN_REDUCTION(do_sum_complex10, _Complex long double, inoutvec[i] += invec[i])
7173 GEN_REDUCTION(do_min_complex10, _Complex long double,
7174 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7175 GEN_REDUCTION(do_max_complex10, _Complex long double,
7176 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7177 #endif
7178
7179 #if defined(GFC_DTYPE_REAL_10) && defined(GFC_DTYPE_REAL_16)
7180 GEN_REDUCTION(do_sum_real10, __float128, inoutvec[i] += invec[i])
7181 GEN_REDUCTION(do_min_real10, __float128,
7182 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7183 GEN_REDUCTION(do_max_real10, __float128,
7184 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7185 GEN_REDUCTION(do_sum_complex10, _Complex __float128, inoutvec[i] += invec[i])
7186 GEN_REDUCTION(do_mincomplexl10, _Complex __float128,
7187 inoutvec[i] = (invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i]))
7188 GEN_REDUCTION(do_max_complex10, _Complex __float128,
7189 inoutvec[i] = (invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i]))
7190 #endif
7191 #undef GEN_REDUCTION
7192
7193
7194 static MPI_Datatype
7195 get_MPI_datatype(gfc_descriptor_t *desc, int char_len)
7196 {
7197 int ierr;
7198 /* FIXME: Better check whether the sizes are okay and supported;
7199 * MPI3 adds more types, e.g. MPI_INTEGER1. */
7200 switch (GFC_DTYPE_TYPE_SIZE(desc))
7201 {
7202 #ifdef MPI_INTEGER1
7203 case GFC_DTYPE_INTEGER_1:
7204 return MPI_INTEGER1;
7205 #endif
7206 #ifdef MPI_INTEGER2
7207 case GFC_DTYPE_INTEGER_2:
7208 return MPI_INTEGER2;
7209 #endif
7210 case GFC_DTYPE_INTEGER_4:
7211 #ifdef MPI_INTEGER4
7212 return MPI_INTEGER4;
7213 #else
7214 return MPI_INTEGER;
7215 #endif
7216 #ifdef MPI_INTEGER8
7217 case GFC_DTYPE_INTEGER_8:
7218 return MPI_INTEGER8;
7219 #endif
7220 #if defined(MPI_INTEGER16) && defined(GFC_DTYPE_INTEGER_16)
7221 case GFC_DTYPE_INTEGER_16:
7222 return MPI_INTEGER16;
7223 #endif
7224
7225 case GFC_DTYPE_LOGICAL_4:
7226 return MPI_INT;
7227
7228 case GFC_DTYPE_REAL_4:
7229 #ifdef MPI_REAL4
7230 return MPI_REAL4;
7231 #else
7232 return MPI_REAL;
7233 #endif
7234 case GFC_DTYPE_REAL_8:
7235 #ifdef MPI_REAL8
7236 return MPI_REAL8;
7237 #else
7238 return MPI_DOUBLE_PRECISION;
7239 #endif
7240
7241 /* Note that we cannot use REAL_16 as we do not know whether it matches REAL(10)
7242 * or REAL(16), which have both the same bitsize and only make use of less
7243 * bits. */
7244 case GFC_DTYPE_COMPLEX_4:
7245 return MPI_COMPLEX;
7246 case GFC_DTYPE_COMPLEX_8:
7247 return MPI_DOUBLE_COMPLEX;
7248 }
7249 /* gfortran passes character string arguments with a
7250 * GFC_DTYPE_TYPE_SIZE == GFC_TYPE_CHARACTER + 64*strlen */
7251 if ((GFC_DTYPE_TYPE_SIZE(desc) - GFC_DTYPE_CHARACTER) % 64 == 0)
7252 {
7253 MPI_Datatype string;
7254
7255 if (char_len == 0)
7256 char_len = GFC_DESCRIPTOR_SIZE(desc);
7257 ierr = MPI_Type_contiguous(char_len, MPI_CHARACTER, &string); chk_err(ierr);
7258 ierr = MPI_Type_commit(&string); chk_err(ierr);
7259 return string;
7260 }
7261
7262 return MPI_BYTE;
7263 /* caf_runtime_error("Unsupported data type in collective: %zd\n", */
7264 /* GFC_DTYPE_TYPE_SIZE(desc)); */
7265 /* return 0; */
7266 }
7267
7268
7269 static void
7270 internal_co_reduce(MPI_Op op, gfc_descriptor_t *source, int result_image,
7271 int *stat, char *errmsg, int src_len, size_t errmsg_len)
7272 {
7273 size_t i, size;
7274 int j, ierr, rank = GFC_DESCRIPTOR_RANK(source);
7275 ptrdiff_t dimextent;
7276
7277 MPI_Datatype datatype = get_MPI_datatype(source, src_len);
7278
7279 size = 1;
7280 for (j = 0; j < rank; ++j)
7281 {
7282 dimextent = source->dim[j]._ubound - source->dim[j].lower_bound + 1;
7283 if (dimextent < 0)
7284 dimextent = 0;
7285 size *= dimextent;
7286 }
7287
7288 if (rank == 0 || PREFIX(is_contiguous) (source))
7289 {
7290 if (result_image == 0)
7291 {
7292 ierr = MPI_Allreduce(MPI_IN_PLACE, source->base_addr, size, datatype, op,
7293 CAF_COMM_WORLD); chk_err(ierr);
7294 }
7295 else if (result_image == caf_this_image)
7296 {
7297 ierr = MPI_Reduce(MPI_IN_PLACE, source->base_addr, size, datatype, op,
7298 result_image - 1, CAF_COMM_WORLD); chk_err(ierr);
7299 }
7300 else
7301 {
7302 ierr = MPI_Reduce(source->base_addr, NULL, size, datatype, op,
7303 result_image - 1, CAF_COMM_WORLD); chk_err(ierr);
7304 }
7305 if (ierr)
7306 goto error;
7307 goto co_reduce_cleanup;
7308 }
7309
7310 for (i = 0; i < size; ++i)
7311 {
7312 ptrdiff_t array_offset_sr = 0, tot_ext = 1, extent = 1;
7313 for (j = 0; j < rank - 1; ++j)
7314 {
7315 extent = source->dim[j]._ubound - source->dim[j].lower_bound + 1;
7316 array_offset_sr += ((i / tot_ext) % extent) * source->dim[j]._stride;
7317 tot_ext *= extent;
7318 }
7319 array_offset_sr += (i / tot_ext) * source->dim[rank - 1]._stride;
7320 void *sr = (void *)((char *)source->base_addr
7321 + array_offset_sr * GFC_DESCRIPTOR_SIZE(source));
7322 if (result_image == 0)
7323 {
7324 ierr = MPI_Allreduce(MPI_IN_PLACE, sr, 1, datatype, op, CAF_COMM_WORLD);
7325 chk_err(ierr);
7326 }
7327 else if (result_image == caf_this_image)
7328 {
7329 ierr = MPI_Reduce(MPI_IN_PLACE, sr, 1, datatype, op, result_image - 1,
7330 CAF_COMM_WORLD); chk_err(ierr);
7331 }
7332 else
7333 {
7334 ierr = MPI_Reduce(sr, NULL, 1, datatype, op, result_image - 1,
7335 CAF_COMM_WORLD); chk_err(ierr);
7336 }
7337 if (ierr)
7338 goto error;
7339 }
7340
7341 co_reduce_cleanup:
7342 if (GFC_DESCRIPTOR_TYPE(source) == BT_CHARACTER)
7343 {
7344 ierr = MPI_Type_free(&datatype); chk_err(ierr);
7345 }
7346 if (stat)
7347 *stat = 0;
7348 return;
7349 error:
7350 /* FIXME: Put this in an extra function and use it elsewhere. */
7351 if (stat)
7352 {
7353 *stat = ierr;
7354 if (!errmsg)
7355 return;
7356 }
7357
7358 int len = sizeof(err_buffer);
7359 MPI_Error_string(ierr, err_buffer, &len);
7360 if (!stat)
7361 {
7362 err_buffer[len == sizeof(err_buffer) ? len - 1 : len] = '\0';
7363 caf_runtime_error("CO_SUM failed with %s\n", err_buffer);
7364 }
7365 memcpy(errmsg, err_buffer, (errmsg_len > len) ? len : errmsg_len);
7366 if (errmsg_len > len)
7367 memset(&errmsg[len], '\0', errmsg_len - len);
7368 }
7369
7370 void
7371 PREFIX(co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat,
7372 char *errmsg, charlen_t errmsg_len)
7373 {
7374 size_t i, size;
7375 int j, ierr, rank = GFC_DESCRIPTOR_RANK(a);
7376 ptrdiff_t dimextent;
7377
7378 MPI_Datatype datatype = get_MPI_datatype(a, 0);
7379
7380 size = 1;
7381 for (j = 0; j < rank; ++j)
7382 {
7383 dimextent = a->dim[j]._ubound - a->dim[j].lower_bound + 1;
7384 if (dimextent < 0)
7385 dimextent = 0;
7386 size *= dimextent;
7387 }
7388
7389 if (rank == 0)
7390 {
7391 if( datatype == MPI_BYTE)
7392 {
7393 ierr = MPI_Bcast(a->base_addr, size*GFC_DESCRIPTOR_SIZE(a),
7394 datatype, source_image - 1,
7395 CAF_COMM_WORLD); chk_err(ierr);
7396 }
7397 else if (datatype != MPI_CHARACTER)
7398 {
7399 ierr = MPI_Bcast(a->base_addr, size, datatype, source_image - 1,
7400 CAF_COMM_WORLD); chk_err(ierr);
7401 }
7402 else
7403 {
7404 int a_length;
7405 if (caf_this_image == source_image)
7406 a_length = strlen(a->base_addr);
7407 /* Broadcast the string lenth */
7408 ierr = MPI_Bcast(&a_length, 1, MPI_INT, source_image - 1, CAF_COMM_WORLD);
7409 chk_err(ierr);
7410 if (ierr)
7411 goto error;
7412 /* Broadcast the string itself */
7413 ierr = MPI_Bcast(a->base_addr, a_length, datatype, source_image - 1,
7414 CAF_COMM_WORLD); chk_err(ierr);
7415 }
7416
7417 if (ierr)
7418 goto error;
7419 goto co_broadcast_exit;
7420 }
7421 else if (datatype == MPI_CHARACTER) /* rank !=0 */
7422 {
7423 caf_runtime_error("Co_broadcast of character arrays are "
7424 "not yet supported\n");
7425 }
7426
7427 for (i = 0; i < size; ++i)
7428 {
7429 ptrdiff_t array_offset_sr = 0, tot_ext = 1, extent = 1;
7430 for (j = 0; j < rank - 1; ++j)
7431 {
7432 extent = a->dim[j]._ubound - a->dim[j].lower_bound + 1;
7433 array_offset_sr += ((i / tot_ext) % extent) * a->dim[j]._stride;
7434 tot_ext *= extent;
7435 }
7436 array_offset_sr += (i / tot_ext) * a->dim[rank - 1]._stride;
7437 void *sr = (void *)(
7438 (char *)a->base_addr + array_offset_sr * GFC_DESCRIPTOR_SIZE(a));
7439
7440 ierr = MPI_Bcast(sr, 1, datatype, source_image - 1, CAF_COMM_WORLD);
7441 chk_err(ierr);
7442
7443 if (ierr)
7444 goto error;
7445 }
7446
7447 co_broadcast_exit:
7448 if (stat)
7449 *stat = 0;
7450 if (GFC_DESCRIPTOR_TYPE(a) == BT_CHARACTER)
7451 {
7452 ierr = MPI_Type_free(&datatype); chk_err(ierr);
7453 }
7454 return;
7455
7456 error:
7457 /* FIXME: Put this in an extra function and use it elsewhere. */
7458 if (stat)
7459 {
7460 *stat = ierr;
7461 if (!errmsg)
7462 return;
7463 }
7464
7465 int len = sizeof(err_buffer);
7466 MPI_Error_string(ierr, err_buffer, &len);
7467 if (!stat)
7468 {
7469 err_buffer[len == sizeof(err_buffer) ? len - 1 : len] = '\0';
7470 caf_runtime_error("CO_SUM failed with %s\n", err_buffer);
7471 }
7472 memcpy(errmsg, err_buffer, (errmsg_len > len) ? len : errmsg_len);
7473 if (errmsg_len > len)
7474 memset(&errmsg[len], '\0', errmsg_len - len);
7475 }
7476
7477 /* The front-end function for co_reduce functionality. It sets up the MPI_Op
7478 * for use in MPI_*Reduce functions. */
7479 void
7480 PREFIX(co_reduce) (gfc_descriptor_t *a, void *(*opr) (void *, void *),
7481 int opr_flags, int result_image, int *stat, char *errmsg,
7482 int a_len, charlen_t errmsg_len)
7483 {
7484 MPI_Op op;
7485 int type_a = GFC_DESCRIPTOR_TYPE(a), ierr;
7486 /* Integers and logicals can be treated the same. */
7487 if (type_a == BT_INTEGER || type_a == BT_LOGICAL)
7488 {
7489 /* When the ARG_VALUE opr_flag is set, then the user-function expects its
7490 * arguments to be passed by value. */
7491 if ((opr_flags & GFC_CAF_ARG_VALUE) > 0)
7492 {
7493 #define ifTypeGen(type) \
7494 if (GFC_DESCRIPTOR_SIZE(a) == sizeof(type ## _t)) \
7495 { \
7496 type ## _t_by_value = (typeof(VALUE_FUNC(type ## _t)))opr; \
7497 int ierr = MPI_Op_create(redux_ ## type ## _by_value_adapter, 1, &op); \
7498 chk_err(ierr); \
7499 }
7500 ifTypeGen(int8)
7501 else ifTypeGen(int16)
7502 else ifTypeGen(int32)
7503 else ifTypeGen(int64)
7504 else
7505 {
7506 caf_runtime_error("CO_REDUCE unsupported integer datatype");
7507 }
7508 #undef ifTypeGen
7509 }
7510 else
7511 {
7512 int32_t_by_reference = (typeof(REFERENCE_FUNC(int32_t)))opr;
7513 ierr = MPI_Op_create(redux_int32_by_reference_adapter, 1, &op);
7514 chk_err(ierr);
7515 }
7516 }
7517 /* Treat reals/doubles. */
7518 else if (type_a == BT_REAL)
7519 {
7520 /* When the ARG_VALUE opr_flag is set, then the user-function expects its
7521 * arguments to be passed by value. */
7522 if (GFC_DESCRIPTOR_SIZE(a) == sizeof(float))
7523 {
7524 if ((opr_flags & GFC_CAF_ARG_VALUE) > 0)
7525 {
7526 float_by_value = (typeof(VALUE_FUNC(float)))opr;
7527 ierr = MPI_Op_create(redux_real32_by_value_adapter, 1, &op);
7528 chk_err(ierr);
7529 }
7530 else
7531 {
7532 float_by_reference = (typeof(REFERENCE_FUNC(float)))opr;
7533 ierr = MPI_Op_create(redux_real32_by_reference_adapter, 1, &op);
7534 chk_err(ierr);
7535 }
7536 }
7537 else
7538 {
7539 /* When the ARG_VALUE opr_flag is set, then the user-function expects
7540 * its arguments to be passed by value. */
7541 if ((opr_flags & GFC_CAF_ARG_VALUE) > 0)
7542 {
7543 double_by_value = (typeof(VALUE_FUNC(double)))opr;
7544 ierr = MPI_Op_create(redux_real64_by_value_adapter, 1, &op);
7545 chk_err(ierr);
7546 }
7547 else
7548 {
7549 double_by_reference = (typeof(REFERENCE_FUNC(double)))opr;
7550 ierr = MPI_Op_create(redux_real64_by_reference_adapter, 1, &op);
7551 chk_err(ierr);
7552 }
7553 }
7554 }
7555 else if (type_a == BT_CHARACTER)
7556 {
7557 /* Char array functions always pass by reference. */
7558 char_by_reference = (typeof(REFERENCE_FUNC(char)))opr;
7559 ierr = MPI_Op_create(redux_char_by_reference_adapter, 1, &op);
7560 chk_err(ierr);
7561 }
7562 else
7563 {
7564 caf_runtime_error("Data type not yet supported for co_reduce\n");
7565 }
7566
7567 internal_co_reduce(op, a, result_image, stat, errmsg, a_len, errmsg_len);
7568 }
7569
7570 void
7571 PREFIX(co_sum) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
7572 charlen_t errmsg_len)
7573 {
7574 internal_co_reduce(MPI_SUM, a, result_image, stat, errmsg, 0, errmsg_len);
7575 }
7576
7577
7578 void
7579 PREFIX(co_min) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
7580 int src_len, charlen_t errmsg_len)
7581 {
7582 internal_co_reduce(MPI_MIN, a, result_image, stat, errmsg, src_len,
7583 errmsg_len);
7584 }
7585
7586
7587 void
7588 PREFIX(co_max) (gfc_descriptor_t *a, int result_image, int *stat,
7589 char *errmsg, int src_len, charlen_t errmsg_len)
7590 {
7591 internal_co_reduce(MPI_MAX, a, result_image, stat, errmsg, src_len,
7592 errmsg_len);
7593 }
7594
7595
7596 /* Locking functions */
7597
7598 void
7599 PREFIX(lock) (caf_token_t token, size_t index, int image_index,
7600 int *acquired_lock, int *stat, char *errmsg,
7601 charlen_t errmsg_len)
7602 {
7603 MPI_Win *p = TOKEN(token);
7604 mutex_lock(*p, (image_index == 0) ? caf_this_image : image_index,
7605 index, stat, acquired_lock, errmsg, errmsg_len);
7606 }
7607
7608
7609 void
7610 PREFIX(unlock) (caf_token_t token, size_t index, int image_index,
7611 int *stat, char *errmsg, charlen_t errmsg_len)
7612 {
7613 MPI_Win *p = TOKEN(token);
7614 mutex_unlock(*p, (image_index == 0) ? caf_this_image : image_index,
7615 index, stat, errmsg, errmsg_len);
7616 }
7617
7618 /* Atomics operations */
7619
7620 void
7621 PREFIX(atomic_define) (caf_token_t token, size_t offset,
7622 int image_index, void *value, int *stat,
7623 int type __attribute__((unused)), int kind)
7624 {
7625 MPI_Win *p = TOKEN(token);
7626 MPI_Datatype dt;
7627 int ierr = 0,
7628 image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
7629
7630 selectType(kind, &dt);
7631
7632 #if MPI_VERSION >= 3
7633 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7634 ierr = MPI_Accumulate(value, 1, dt, image, offset, 1, dt, MPI_REPLACE, *p);
7635 chk_err(ierr);
7636 CAF_Win_unlock(image, *p);
7637 #else // MPI_VERSION
7638 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7639 ierr = MPI_Put(value, 1, dt, image, offset, 1, dt, *p); chk_err(ierr);
7640 CAF_Win_unlock(image, *p);
7641 #endif // MPI_VERSION
7642
7643 if (stat)
7644 *stat = ierr;
7645 else if (ierr != 0)
7646 terminate_internal(ierr, 0);
7647
7648 return;
7649 }
7650
7651 void
7652 PREFIX(atomic_ref) (caf_token_t token, size_t offset,
7653 int image_index,
7654 void *value, int *stat,
7655 int type __attribute__((unused)), int kind)
7656 {
7657 MPI_Win *p = TOKEN(token);
7658 MPI_Datatype dt;
7659 int ierr = 0,
7660 image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
7661
7662 selectType(kind, &dt);
7663
7664 #if MPI_VERSION >= 3
7665 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7666 ierr = MPI_Fetch_and_op(NULL, value, dt, image, offset, MPI_NO_OP, *p);
7667 chk_err(ierr);
7668 CAF_Win_unlock(image, *p);
7669 #else // MPI_VERSION
7670 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7671 ierr = MPI_Get(value, 1, dt, image, offset, 1, dt, *p); chk_err(ierr);
7672 CAF_Win_unlock(image, *p);
7673 #endif // MPI_VERSION
7674
7675 if (stat)
7676 *stat = ierr;
7677 else if (ierr != 0)
7678 terminate_internal(ierr, 0);
7679
7680 return;
7681 }
7682
7683 void
7684 PREFIX(atomic_cas) (caf_token_t token, size_t offset, int image_index,
7685 void *old, void *compare, void *new_val, int *stat,
7686 int type __attribute__((unused)), int kind)
7687 {
7688 MPI_Win *p = TOKEN(token);
7689 MPI_Datatype dt;
7690 int ierr = 0,
7691 image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
7692
7693 selectType(kind, &dt);
7694
7695 #if MPI_VERSION >= 3
7696 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7697 ierr = MPI_Compare_and_swap(new_val, compare, old, dt, image, offset, *p);
7698 chk_err(ierr);
7699 CAF_Win_unlock(image, *p);
7700 #else // MPI_VERSION
7701 #warning atomic_cas for MPI-2 is not yet implemented
7702 printf("We apologize but atomic_cas for MPI-2 is not yet implemented\n");
7703 ierr = 1;
7704 #endif // MPI_VERSION
7705
7706 if (stat)
7707 *stat = ierr;
7708 else if (ierr != 0)
7709 terminate_internal(ierr, 0);
7710
7711 return;
7712 }
7713
7714 void
7715 PREFIX(atomic_op) (int op, caf_token_t token, size_t offset, int image_index,
7716 void *value, void *old, int *stat,
7717 int type __attribute__((unused)), int kind)
7718 {
7719 int ierr = 0;
7720 MPI_Datatype dt;
7721 MPI_Win *p = TOKEN(token);
7722 int image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
7723
7724 #if MPI_VERSION >= 3
7725 old = malloc(kind);
7726 selectType(kind, &dt);
7727
7728 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7729 /* Atomic_add */
7730 switch(op) {
7731 case 1:
7732 ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_SUM, *p);
7733 chk_err(ierr);
7734 break;
7735 case 2:
7736 ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_BAND, *p);
7737 chk_err(ierr);
7738 break;
7739 case 4:
7740 ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_BOR, *p);
7741 chk_err(ierr);
7742 break;
7743 case 5:
7744 ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_BXOR, *p);
7745 chk_err(ierr);
7746 break;
7747 default:
7748 printf("We apologize but the atomic operation requested for MPI < 3 "
7749 "is not yet implemented\n");
7750 break;
7751 }
7752 CAF_Win_unlock(image, *p);
7753
7754 free(old);
7755 #else // MPI_VERSION
7756 #warning atomic_op for MPI is not yet implemented
7757 printf("We apologize but atomic_op for MPI < 3 is not yet implemented\n");
7758 #endif // MPI_VERSION
7759 if (stat)
7760 *stat = ierr;
7761 else if (ierr != 0)
7762 terminate_internal(ierr, 0);
7763
7764 return;
7765 }
7766
7767 /* Events */
7768
7769 void
7770 PREFIX(event_post) (caf_token_t token, size_t index, int image_index,
7771 int *stat, char *errmsg, charlen_t errmsg_len)
7772 {
7773 int value = 1, ierr = 0, flag;
7774 MPI_Win *p = TOKEN(token);
7775 const char msg[] = "Error on event post";
7776 int image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
7777
7778 if (stat != NULL)
7779 *stat = 0;
7780
7781 #if MPI_VERSION >= 3
7782 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7783 ierr = MPI_Accumulate(&value, 1, MPI_INT, image, index * sizeof(int), 1,
7784 MPI_INT, MPI_SUM, *p); chk_err(ierr);
7785 CAF_Win_unlock(image, *p);
7786 #else // MPI_VERSION
7787 #warning Events for MPI-2 are not implemented
7788 printf("Events for MPI-2 are not supported, "
7789 "please update your MPI implementation\n");
7790 #endif // MPI_VERSION
7791
7792 check_image_health(image_index, stat);
7793
7794 if (!stat && ierr == STAT_FAILED_IMAGE)
7795 terminate_internal(ierr, 0);
7796
7797 if (ierr != MPI_SUCCESS)
7798 {
7799 if (stat != NULL)
7800 *stat = ierr;
7801 if (errmsg != NULL)
7802 {
7803 memset(errmsg,' ',errmsg_len);
7804 memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
7805 }
7806 }
7807 }
7808
7809 void
7810 PREFIX(event_wait) (caf_token_t token, size_t index, int until_count,
7811 int *stat, char *errmsg, charlen_t errmsg_len)
7812 {
7813 int ierr = 0, count = 0, i, image = caf_this_image - 1;
7814 int *var = NULL, flag, old = 0, newval = 0;
7815 const int spin_loop_max = 20000;
7816 MPI_Win *p = TOKEN(token);
7817 const char msg[] = "Error on event wait";
7818
7819 if (stat != NULL)
7820 *stat = 0;
7821
7822 ierr = MPI_Win_get_attr(*p, MPI_WIN_BASE, &var, &flag); chk_err(ierr);
7823
7824 for (i = 0; i < spin_loop_max; ++i)
7825 {
7826 ierr = MPI_Win_sync(*p); chk_err(ierr);
7827 count = var[index];
7828 if (count >= until_count)
7829 break;
7830 }
7831
7832 i = 1;
7833 while (count < until_count)
7834 {
7835 ierr = MPI_Win_sync(*p); chk_err(ierr);
7836 count = var[index];
7837 usleep(10 * i);
7838 ++i;
7839 /* Needed to enforce MPI progress */
7840 ierr = MPI_Win_flush(image, *p); chk_err(ierr);
7841 }
7842
7843 newval = -until_count;
7844
7845 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7846 ierr = MPI_Fetch_and_op(&newval, &old, MPI_INT, image, index * sizeof(int),
7847 MPI_SUM, *p); chk_err(ierr);
7848 CAF_Win_unlock(image, *p);
7849 check_image_health(image, stat);
7850
7851 if (!stat && ierr == STAT_FAILED_IMAGE)
7852 terminate_internal(ierr, 0);
7853
7854 if (ierr != MPI_SUCCESS)
7855 {
7856 if (stat != NULL)
7857 *stat = ierr;
7858 if (errmsg != NULL)
7859 {
7860 memset(errmsg,' ',errmsg_len);
7861 memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
7862 }
7863 }
7864 }
7865
7866 void
7867 PREFIX(event_query) (caf_token_t token, size_t index,
7868 int image_index, int *count, int *stat)
7869 {
7870 MPI_Win *p = TOKEN(token);
7871 int ierr = 0,
7872 image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
7873
7874 if (stat != NULL)
7875 *stat = 0;
7876
7877 #if MPI_VERSION >= 3
7878 CAF_Win_lock(MPI_LOCK_EXCLUSIVE, image, *p);
7879 ierr = MPI_Fetch_and_op(NULL, count, MPI_INT, image, index * sizeof(int),
7880 MPI_NO_OP, *p); chk_err(ierr);
7881 CAF_Win_unlock(image, *p);
7882 #else // MPI_VERSION
7883 #warning Events for MPI-2 are not implemented
7884 printf("Events for MPI-2 are not supported, "
7885 "please update your MPI implementation\n");
7886 #endif // MPI_VERSION
7887 if (ierr != MPI_SUCCESS && stat != NULL)
7888 *stat = ierr;
7889 }
7890
7891
7892 /* Internal function to execute the part that is common to all (error) stop
7893 * functions. */
7894
7895 static void
7896 terminate_internal(int stat_code, int exit_code)
7897 {
7898 dprint("terminate_internal (stat_code = %d, exit_code = %d).\n",
7899 stat_code, exit_code);
7900 finalize_internal(stat_code);
7901
7902 #ifndef WITH_FAILED_IMAGES
7903 MPI_Abort(MPI_COMM_WORLD, exit_code);
7904 #endif
7905 exit(exit_code);
7906 }
7907
7908
7909 #ifdef GCC_GE_8
7910 #undef QUIETARG
7911 #define QUIETARG , bool quiet
7912 #endif
7913
7914 /* STOP function for integer arguments. */
7915
7916 void
7917 PREFIX(stop_numeric) (int stop_code QUIETARG)
7918 {
7919 #ifndef GCC_GE_8
7920 bool quiet = false;
7921 #endif
7922 if (!quiet)
7923 fprintf(stderr, "STOP %d\n", stop_code);
7924
7925 /* Stopping includes taking down the runtime regularly and returning the
7926 * stop_code. */
7927 terminate_internal(STAT_STOPPED_IMAGE, stop_code);
7928 }
7929
7930
7931 /* STOP function for string arguments. */
7932
7933 void
7934 PREFIX(stop_str) (const char *string, charlen_t len QUIETARG)
7935 {
7936 #ifndef GCC_GE_8
7937 bool quiet = false;
7938 #endif
7939 if (!quiet)
7940 {
7941 fputs("STOP ", stderr);
7942 while (len--)
7943 fputc(*(string++), stderr);
7944 fputs("\n", stderr);
7945 }
7946 /* Stopping includes taking down the runtime regularly. */
7947 terminate_internal(STAT_STOPPED_IMAGE, 0);
7948 }
7949
7950
7951 /* ERROR STOP function for string arguments. */
7952
7953 static void
7954 error_stop_str(const char *string, size_t len, bool quiet)
7955 {
7956 if (!quiet)
7957 {
7958 fputs("ERROR STOP ", stderr);
7959 while (len--)
7960 fputc(*(string++), stderr);
7961 fputs("\n", stderr);
7962 }
7963 terminate_internal(STAT_STOPPED_IMAGE, 1);
7964 }
7965
7966
7967 void
7968 PREFIX(error_stop_str) (const char *string, charlen_t len QUIETARG)
7969 {
7970 #ifndef GCC_GE_8
7971 bool quiet = false;
7972 #endif
7973 error_stop_str(string, len, quiet);
7974 }
7975
7976
7977 /* ERROR STOP function for numerical arguments. */
7978
7979 void
7980 PREFIX(error_stop) (int error QUIETARG)
7981 {
7982 #ifndef GCC_GE_8
7983 bool quiet = false;
7984 #endif
7985 if (!quiet)
7986 fprintf(stderr, "ERROR STOP %d\n", error);
7987
7988 terminate_internal(STAT_STOPPED_IMAGE, error);
7989 }
7990
7991
7992 /* FAIL IMAGE statement. */
7993
7994 void
7995 PREFIX(fail_image) (void)
7996 {
7997 fputs("IMAGE FAILED!\n", stderr);
7998
7999 raise(SIGKILL);
8000 /* A failing image is expected to take down the runtime regularly. */
8001 terminate_internal(STAT_FAILED_IMAGE, 0);
8002 }
8003
8004 int
8005 PREFIX(image_status) (int image)
8006 {
8007 #ifdef GFC_CAF_CHECK
8008 if (image < 1 || image > caf_num_images)
8009 {
8010 char errmsg[60];
8011 sprintf(errmsg, "Image #%d out of bounds of images 1..%d.",
8012 image, caf_num_images);
8013 caf_runtime_error(errmsg);
8014 }
8015 #endif
8016 #ifdef WITH_FAILED_IMAGES
8017 if (image_stati[image - 1] == 0)
8018 {
8019 int status, ierr;
8020 /* Check that we are fine before doing anything.
8021 *
8022 * Do an MPI-operation to learn about failed/stopped images, that have
8023 * not been detected yet. */
8024 ierr = MPI_Test(&alive_request, &status, MPI_STATUSES_IGNORE);
8025 chk_err(ierr);
8026 MPI_Error_class(ierr, &status);
8027 if (ierr == MPI_SUCCESS)
8028 {
8029 CAF_Win_lock(MPI_LOCK_SHARED, image - 1, *stat_tok);
8030 ierr = MPI_Get(&status, 1, MPI_INT, image - 1, 0, 1, MPI_INT, *stat_tok);
8031 chk_err(ierr);
8032 dprint("Image status of image #%d is: %d\n", image, status);
8033 CAF_Win_unlock(image - 1, *stat_tok);
8034 image_stati[image - 1] = status;
8035 }
8036 else if (status == MPIX_ERR_PROC_FAILED)
8037 {
8038 image_stati[image - 1] = STAT_FAILED_IMAGE;
8039 }
8040 else
8041 {
8042 const int strcap = 200;
8043 char errmsg[strcap];
8044 int slen, supplied_len;
8045 sprintf(errmsg, "Image status for image #%d returned mpi error: ",
8046 image);
8047 slen = strlen(errmsg);
8048 supplied_len = strcap - slen;
8049 MPI_Error_string(status, &errmsg[slen], &supplied_len);
8050 caf_runtime_error(errmsg);
8051 }
8052 }
8053 return image_stati[image - 1];
8054 #else
8055 unsupported_fail_images_message("IMAGE_STATUS()");
8056 #endif // WITH_FAILED_IMAGES
8057
8058 return 0;
8059 }
8060
8061 void
8062 PREFIX(failed_images) (gfc_descriptor_t *array,
8063 int team __attribute__((unused)), int * kind)
8064 {
8065 int local_kind = kind ? *kind : 4; /* GFC_DEFAULT_INTEGER_KIND = 4*/
8066
8067 #ifdef WITH_FAILED_IMAGES
8068 void *mem = calloc(num_images_failed, local_kind);
8069 array->base_addr = mem;
8070 for (int i = 0; i < caf_num_images; ++i)
8071 {
8072 if (image_stati[i] == STAT_FAILED_IMAGE)
8073 {
8074 switch (local_kind)
8075 {
8076 case 1:
8077 *(int8_t *)mem = i + 1;
8078 break;
8079 case 2:
8080 *(int16_t *)mem = i + 1;
8081 break;
8082 case 4:
8083 *(int32_t *)mem = i + 1;
8084 break;
8085 case 8:
8086 *(int64_t *)mem = i + 1;
8087 break;
8088 #ifdef HAVE_GFC_INTEGER_16
8089 case 16:
8090 *(int128t *)mem = i + 1;
8091 break;
8092 #endif
8093 default:
8094 caf_runtime_error("Unsupported integer kind %1 "
8095 "in caf_failed_images.", local_kind);
8096 }
8097 mem += local_kind;
8098 }
8099 }
8100 array->dim[0]._ubound = num_images_failed - 1;
8101 #else
8102 unsupported_fail_images_message("FAILED_IMAGES()");
8103 array->dim[0]._ubound = -1;
8104 array->base_addr = NULL;
8105 #endif // WITH_FAILED_IMAGES
8106
8107 #ifdef GCC_GE_8
8108 array->dtype.type = BT_INTEGER;
8109 array->dtype.elem_len = local_kind;
8110 #else
8111 array->dtype = ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT)
8112 | (local_kind << GFC_DTYPE_SIZE_SHIFT));
8113 #endif
8114 array->dim[0].lower_bound = 0;
8115 array->dim[0]._stride = 1;
8116 array->offset = 0;
8117 }
8118
8119 void
8120 PREFIX(stopped_images) (gfc_descriptor_t *array,
8121 int team __attribute__((unused)), int * kind)
8122 {
8123 int local_kind = kind ? *kind : 4; /* GFC_DEFAULT_INTEGER_KIND = 4*/
8124
8125 #ifdef WITH_FAILED_IMAGES
8126 void *mem = calloc(num_images_stopped, local_kind);
8127 array->base_addr = mem;
8128 for (int i = 0; i < caf_num_images; ++i)
8129 {
8130 if (image_stati[i])
8131 {
8132 switch (local_kind)
8133 {
8134 case 1:
8135 *(int8_t *)mem = i + 1;
8136 break;
8137 case 2:
8138 *(int16_t *)mem = i + 1;
8139 break;
8140 case 4:
8141 *(int32_t *)mem = i + 1;
8142 break;
8143 case 8:
8144 *(int64_t *)mem = i + 1;
8145 break;
8146 #ifdef HAVE_GFC_INTEGER_16
8147 case 16:
8148 *(int128t *)mem = i + 1;
8149 break;
8150 #endif
8151 default:
8152 caf_runtime_error("Unsupported integer kind %1 "
8153 "in caf_stopped_images.", local_kind);
8154 }
8155 mem += local_kind;
8156 }
8157 }
8158 array->dim[0]._ubound = num_images_stopped - 1;
8159 #else
8160 unsupported_fail_images_message("STOPPED_IMAGES()");
8161 array->dim[0]._ubound = -1;
8162 array->base_addr = NULL;
8163 #endif // WITH_FAILED_IMAGES
8164
8165 #ifdef GCC_GE_8
8166 array->dtype.type = BT_INTEGER;
8167 array->dtype.elem_len = local_kind;
8168 #else
8169 array->dtype = ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT)
8170 | (local_kind << GFC_DTYPE_SIZE_SHIFT));
8171 #endif
8172 array->dim[0].lower_bound = 0;
8173 array->dim[0]._stride = 1;
8174 array->offset = 0;
8175 }
8176
8177 /* Give a descriptive message when failed images support is not available. */
8178 void
8179 unsupported_fail_images_message(const char * functionname)
8180 {
8181 fprintf(stderr,
8182 "*** caf_mpi-lib runtime message on image %d:\n"
8183 "*** The failed images feature '%s' "
8184 "*** of Fortran 2015 standard is not available in this build."
8185 "*** You need a compiler with failed images support activated and "
8186 "*** compile OpenCoarrays with failed images support.\n",
8187 caf_this_image, functionname);
8188 #ifdef STOP_ON_UNSUPPORTED
8189 exit(EXIT_FAILURE);
8190 #endif
8191 }
8192
8193 /* Give a descriptive message when support for an allocatable components
8194 * feature is not available. */
8195 void
8196 unimplemented_alloc_comps_message(const char * functionname)
8197 {
8198 fprintf(stderr,
8199 "*** Message from libcaf_mpi runtime function '%s' on image %d:\n"
8200 "*** Assigning to an allocatable coarray component of a derived type"
8201 "is not yet supported with GCC 7.\n"
8202 "*** Either revert to GCC 6 or convert all "
8203 "puts (type(foo)::x; x%%y[recipient] = z) to "
8204 "gets (z = x%%y[provider]).\n",
8205 functionname, caf_this_image );
8206 #ifdef STOP_ON_UNSUPPORTED
8207 exit(EXIT_FAILURE);
8208 #endif
8209 }
8210
8211 void PREFIX(form_team) (int team_id, caf_team_t *team,
8212 int index __attribute__((unused)))
8213 {
8214 struct caf_teams_list *tmp;
8215 void * tmp_team;
8216 MPI_Comm *newcomm;
8217 MPI_Comm *current_comm = &CAF_COMM_WORLD;
8218 int ierr;
8219
8220 newcomm = (MPI_Comm *)calloc(1,sizeof(MPI_Comm));
8221 ierr = MPI_Comm_split(*current_comm, team_id, caf_this_image, newcomm);
8222 chk_err(ierr);
8223
8224 tmp = calloc(1,sizeof(struct caf_teams_list));
8225 tmp->prev = teams_list;
8226 teams_list = tmp;
8227 teams_list->team_id = team_id;
8228 teams_list->team = newcomm;
8229 *team = tmp;
8230 }
8231
8232 void PREFIX(change_team) (caf_team_t *team,
8233 int coselector __attribute__((unused)))
8234 {
8235 caf_used_teams_list *tmp_used = NULL;
8236 caf_teams_list * tmp_list = NULL;
8237 void *tmp_team;
8238 MPI_Comm *tmp_comm;
8239
8240 tmp_list = (struct caf_teams_list *)*team;
8241 tmp_team = (void *)tmp_list->team;
8242 tmp_comm = (MPI_Comm *)tmp_team;
8243
8244 tmp_used = (caf_used_teams_list *)calloc(1,sizeof(caf_used_teams_list));
8245 tmp_used->prev = used_teams;
8246
8247 /* We need to look in the teams_list and find the appropriate element.
8248 * This is not efficient but can be easily fixed in the future.
8249 * Instead of keeping track of the communicator in the compiler
8250 * we should keep track of the caf_teams_list element associated with it. */
8251
8252 /*
8253 tmp_list = teams_list;
8254
8255 while (tmp_list)
8256 {
8257 if (tmp_list->team == tmp_team)
8258 break;
8259 tmp_list = tmp_list->prev;
8260 }
8261 */
8262
8263 if (tmp_list == NULL)
8264 caf_runtime_error("CHANGE TEAM called on a non-existing team");
8265
8266 tmp_used->team_list_elem = tmp_list;
8267 used_teams = tmp_used;
8268 tmp_team = tmp_used->team_list_elem->team;
8269 tmp_comm = (MPI_Comm *)tmp_team;
8270 CAF_COMM_WORLD = *tmp_comm;
8271 int ierr = MPI_Comm_rank(*tmp_comm,&caf_this_image); chk_err(ierr);
8272 caf_this_image++;
8273 ierr = MPI_Comm_size(*tmp_comm,&caf_num_images); chk_err(ierr);
8274 ierr = MPI_Barrier(*tmp_comm); chk_err(ierr);
8275 }
8276
8277 MPI_Fint
8278 PREFIX(get_communicator) (caf_team_t *team)
8279 {
8280 if (team != NULL) caf_runtime_error("get_communicator does not yet support "
8281 "the optional team argument");
8282
8283 MPI_Comm* comm_ptr = teams_list->team;
8284 MPI_Fint ret = MPI_Comm_c2f(*comm_ptr);
8285
8286 return ret;
8287 // return *(int*)comm_ptr;
8288 }
8289
8290 int
8291 PREFIX(team_number) (caf_team_t *team)
8292 {
8293 if (team != NULL)
8294 return ((caf_teams_list *)team)->team_id;
8295 else
8296 return used_teams->team_list_elem->team_id; /* current team */
8297 }
8298
8299 void PREFIX(end_team) (caf_team_t *team __attribute__((unused)))
8300 {
8301 caf_used_teams_list *tmp_used = NULL;
8302 void *tmp_team;
8303 MPI_Comm *tmp_comm;
8304 int ierr;
8305
8306 ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
8307 if (used_teams->prev == NULL)
8308 caf_runtime_error("END TEAM called on initial team");
8309
8310 tmp_used = used_teams;
8311 used_teams = used_teams->prev;
8312 free(tmp_used);
8313 tmp_used = used_teams;
8314 tmp_team = tmp_used->team_list_elem->team;
8315 tmp_comm = (MPI_Comm *)tmp_team;
8316 CAF_COMM_WORLD = *tmp_comm;
8317 /* CAF_COMM_WORLD = (MPI_Comm)*tmp_used->team_list_elem->team; */
8318 ierr = MPI_Comm_rank(CAF_COMM_WORLD,&caf_this_image); chk_err(ierr);
8319 caf_this_image++;
8320 ierr = MPI_Comm_size(CAF_COMM_WORLD,&caf_num_images); chk_err(ierr);
8321 }
8322
8323 void PREFIX(sync_team) (caf_team_t *team , int unused __attribute__((unused)))
8324 {
8325 caf_teams_list *tmp_list = NULL;
8326 caf_used_teams_list *tmp_used = NULL;
8327 void *tmp_team;
8328 MPI_Comm *tmp_comm;
8329
8330 tmp_used = used_teams;
8331 tmp_list = (struct caf_teams_list *)*team;
8332 tmp_team = (void *)tmp_list->team;
8333 tmp_comm = (MPI_Comm *)tmp_team;
8334
8335 /* if the team is not a child */
8336 if (tmp_used->team_list_elem != tmp_list->prev)
8337 /* then search backwards through the team list, first checking if it's the
8338 * current team, then if it is an ancestor team */
8339 while (tmp_used)
8340 {
8341 if (tmp_used->team_list_elem == tmp_list)
8342 break;
8343 tmp_used = tmp_used->prev;
8344 }
8345
8346 if (tmp_used == NULL)
8347 caf_runtime_error("SYNC TEAM called on team different from current, "
8348 "or ancestor, or child");
8349
8350 int ierr = MPI_Barrier(*tmp_comm); chk_err(ierr);
8351 }
8352