1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2021 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
lookup_host(struct gomp_device_descr * dev,void * h,size_t s)40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
lookup_dev_1(splay_tree_node node,uintptr_t d,size_t s)56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
lookup_dev(splay_tree mem_map,void * d,size_t s)80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
acc_malloc(size_t s)93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
acc_free(void * d)123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
memcpy_tofrom_device(bool from,void * d,void * h,size_t s,int async,const char * libfnname)174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
acc_memcpy_to_device(void * d,void * h,size_t s)215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
acc_memcpy_to_device_async(void * d,void * h,size_t s,int async)221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
acc_memcpy_from_device(void * h,void * d,size_t s)227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
acc_memcpy_from_device_async(void * h,void * d,size_t s,int async)233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
acc_deviceptr(void * h)242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
acc_hostptr(void * d)282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
acc_is_present(void * h,size_t s)321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
acc_map_data(void * h,void * d,size_t s)356 acc_map_data (void *h, void *d, size_t s)
357 {
358 size_t mapnum = 1;
359 void *hostaddrs = h;
360 void *devaddrs = d;
361 size_t sizes = s;
362 unsigned short kinds = GOMP_MAP_ALLOC;
363
364 goacc_lazy_initialize ();
365
366 struct goacc_thread *thr = goacc_thread ();
367 struct gomp_device_descr *acc_dev = thr->dev;
368
369 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
370 {
371 if (d != h)
372 gomp_fatal ("cannot map data on shared-memory system");
373 }
374 else
375 {
376 struct goacc_thread *thr = goacc_thread ();
377
378 if (!d || !h || !s)
379 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380 (void *)h, (int)s, (void *)d, (int)s);
381
382 acc_prof_info prof_info;
383 acc_api_info api_info;
384 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
385
386 gomp_mutex_lock (&acc_dev->lock);
387
388 if (lookup_host (acc_dev, h, s))
389 {
390 gomp_mutex_unlock (&acc_dev->lock);
391 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
392 (int)s);
393 }
394
395 if (lookup_dev (&thr->dev->mem_map, d, s))
396 {
397 gomp_mutex_unlock (&acc_dev->lock);
398 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
399 (int)s);
400 }
401
402 gomp_mutex_unlock (&acc_dev->lock);
403
404 struct target_mem_desc *tgt
405 = goacc_map_vars (acc_dev, NULL, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 assert (tgt->list_count == 1);
409 splay_tree_key n = tgt->list[0].key;
410 assert (n);
411 assert (n->refcount == 1);
412 assert (n->dynamic_refcount == 0);
413 /* Special reference counting behavior. */
414 n->refcount = REFCOUNT_INFINITY;
415
416 if (profiling_p)
417 {
418 thr->prof_info = NULL;
419 thr->api_info = NULL;
420 }
421 }
422 }
423
424 void
acc_unmap_data(void * h)425 acc_unmap_data (void *h)
426 {
427 struct goacc_thread *thr = goacc_thread ();
428 struct gomp_device_descr *acc_dev = thr->dev;
429
430 /* No need to call lazy open, as the address must have been mapped. */
431
432 /* This is a no-op on shared-memory targets. */
433 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
434 return;
435
436 acc_prof_info prof_info;
437 acc_api_info api_info;
438 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
439
440 gomp_mutex_lock (&acc_dev->lock);
441
442 splay_tree_key n = lookup_host (acc_dev, h, 1);
443
444 if (!n)
445 {
446 gomp_mutex_unlock (&acc_dev->lock);
447 gomp_fatal ("%p is not a mapped block", (void *)h);
448 }
449
450 size_t host_size = n->host_end - n->host_start;
451
452 if (n->host_start != (uintptr_t) h)
453 {
454 gomp_mutex_unlock (&acc_dev->lock);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n->host_start, (int) host_size, (void *) h);
457 }
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462 etc.)? */
463 else if (n->refcount != REFCOUNT_INFINITY)
464 {
465 gomp_mutex_unlock (&acc_dev->lock);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h, (int) host_size);
469 }
470
471 struct target_mem_desc *tgt = n->tgt;
472
473 if (tgt->refcount == REFCOUNT_INFINITY)
474 {
475 gomp_mutex_unlock (&acc_dev->lock);
476 gomp_fatal ("cannot unmap target block");
477 }
478
479 /* Above, we've verified that the mapping must have been set up by
480 'acc_map_data'. */
481 assert (tgt->refcount == 1);
482
483 /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484 from freeing the target memory. */
485 tgt->tgt_end = 0;
486 tgt->to_free = NULL;
487
488 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
489 assert (is_tgt_unmapped);
490
491 gomp_mutex_unlock (&acc_dev->lock);
492
493 if (profiling_p)
494 {
495 thr->prof_info = NULL;
496 thr->api_info = NULL;
497 }
498 }
499
500
501 /* Helper function to map a single dynamic data item, represented by a single
502 mapping. The acc_dev->lock should be held on entry, and remains locked on
503 exit. */
504
505 static void *
goacc_map_var_existing(struct gomp_device_descr * acc_dev,void * hostaddr,size_t size,splay_tree_key n)506 goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
507 size_t size, splay_tree_key n)
508 {
509 assert (n);
510
511 /* Present. */
512 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr
513 - n->host_start);
514
515 if (hostaddr + size > (void *) n->host_end)
516 {
517 gomp_mutex_unlock (&acc_dev->lock);
518 gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size);
519 }
520
521 assert (n->refcount != REFCOUNT_LINK);
522 if (n->refcount != REFCOUNT_INFINITY)
523 n->refcount++;
524 n->dynamic_refcount++;
525
526 return d;
527 }
528
529 /* Enter dynamic mapping for a single datum. Return the device pointer. */
530
531 static void *
goacc_enter_datum(void ** hostaddrs,size_t * sizes,void * kinds,int async)532 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
533 {
534 void *d;
535 splay_tree_key n;
536
537 if (!hostaddrs[0] || !sizes[0])
538 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
539
540 goacc_lazy_initialize ();
541
542 struct goacc_thread *thr = goacc_thread ();
543 struct gomp_device_descr *acc_dev = thr->dev;
544
545 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
546 return hostaddrs[0];
547
548 acc_prof_info prof_info;
549 acc_api_info api_info;
550 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
551 if (profiling_p)
552 {
553 prof_info.async = async;
554 prof_info.async_queue = prof_info.async;
555 }
556
557 gomp_mutex_lock (&acc_dev->lock);
558
559 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
560 if (n)
561 {
562 d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n);
563 gomp_mutex_unlock (&acc_dev->lock);
564 }
565 else
566 {
567 const size_t mapnum = 1;
568
569 gomp_mutex_unlock (&acc_dev->lock);
570
571 goacc_aq aq = get_goacc_asyncqueue (async);
572
573 struct target_mem_desc *tgt
574 = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
575 kinds, true, GOMP_MAP_VARS_ENTER_DATA);
576 assert (tgt);
577 assert (tgt->list_count == 1);
578 n = tgt->list[0].key;
579 assert (n);
580 assert (n->refcount == 1);
581 assert (n->dynamic_refcount == 0);
582 n->dynamic_refcount++;
583
584 d = (void *) tgt->tgt_start;
585 }
586
587 if (profiling_p)
588 {
589 thr->prof_info = NULL;
590 thr->api_info = NULL;
591 }
592
593 return d;
594 }
595
596 void *
acc_create(void * h,size_t s)597 acc_create (void *h, size_t s)
598 {
599 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
600 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
601 }
602
603 void
acc_create_async(void * h,size_t s,int async)604 acc_create_async (void *h, size_t s, int async)
605 {
606 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
607 goacc_enter_datum (&h, &s, &kinds, async);
608 }
609
610 /* acc_present_or_create used to be what acc_create is now. */
611 /* acc_pcreate is acc_present_or_create by a different name. */
612 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_create,acc_present_or_create)613 strong_alias (acc_create, acc_present_or_create)
614 strong_alias (acc_create, acc_pcreate)
615 #else
616 void *
617 acc_present_or_create (void *h, size_t s)
618 {
619 return acc_create (h, s);
620 }
621
622 void *
623 acc_pcreate (void *h, size_t s)
624 {
625 return acc_create (h, s);
626 }
627 #endif
628
629 void *
630 acc_copyin (void *h, size_t s)
631 {
632 unsigned short kinds[1] = { GOMP_MAP_TO };
633 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
634 }
635
636 void
acc_copyin_async(void * h,size_t s,int async)637 acc_copyin_async (void *h, size_t s, int async)
638 {
639 unsigned short kinds[1] = { GOMP_MAP_TO };
640 goacc_enter_datum (&h, &s, &kinds, async);
641 }
642
643 /* acc_present_or_copyin used to be what acc_copyin is now. */
644 /* acc_pcopyin is acc_present_or_copyin by a different name. */
645 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_copyin,acc_present_or_copyin)646 strong_alias (acc_copyin, acc_present_or_copyin)
647 strong_alias (acc_copyin, acc_pcopyin)
648 #else
649 void *
650 acc_present_or_copyin (void *h, size_t s)
651 {
652 return acc_copyin (h, s);
653 }
654
655 void *
656 acc_pcopyin (void *h, size_t s)
657 {
658 return acc_copyin (h, s);
659 }
660 #endif
661
662
663 /* Helper function to unmap a single data item. Device lock should be held on
664 entry, and remains locked on exit. */
665
666 static void
667 goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
668 unsigned short kind, splay_tree_key n, goacc_aq aq)
669 {
670 assert (kind != GOMP_MAP_DETACH
671 && kind != GOMP_MAP_FORCE_DETACH);
672
673 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
674 {
675 size_t host_size = n->host_end - n->host_start;
676 gomp_mutex_unlock (&acc_dev->lock);
677 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
678 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
679 }
680
681 bool finalize = (kind == GOMP_MAP_FORCE_FROM
682 || kind == GOMP_MAP_DELETE);
683
684 assert (n->refcount != REFCOUNT_LINK);
685 if (n->refcount != REFCOUNT_INFINITY
686 && n->refcount < n->dynamic_refcount)
687 {
688 gomp_mutex_unlock (&acc_dev->lock);
689 gomp_fatal ("Dynamic reference counting assert fail\n");
690 }
691
692 if (finalize)
693 {
694 if (n->refcount != REFCOUNT_INFINITY)
695 n->refcount -= n->dynamic_refcount;
696 n->dynamic_refcount = 0;
697 }
698 else if (n->dynamic_refcount)
699 {
700 if (n->refcount != REFCOUNT_INFINITY)
701 n->refcount--;
702 n->dynamic_refcount--;
703 }
704
705 if (n->refcount == 0)
706 {
707 bool copyout = (kind == GOMP_MAP_FROM
708 || kind == GOMP_MAP_FORCE_FROM);
709 if (copyout)
710 {
711 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
712 + (uintptr_t) h - n->host_start);
713 gomp_copy_dev2host (acc_dev, aq, h, d, s);
714 }
715
716 if (aq)
717 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
718 'gomp_unref_tgt' comment in
719 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
720 PR92881. */
721 gomp_remove_var_async (acc_dev, n, aq);
722 else
723 {
724 size_t num_mappings = 0;
725 /* If the target_mem_desc represents a single data mapping, we can
726 check that it is freed when this splay tree key's refcount reaches
727 zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
728 multiple members), fall back to skipping the test. */
729 for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
730 if (n->tgt->list[l_i].key
731 && !n->tgt->list[l_i].is_attach)
732 ++num_mappings;
733 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
734 assert (is_tgt_unmapped || num_mappings > 1);
735 }
736 }
737 }
738
739
740 /* Exit a dynamic mapping for a single variable. */
741
742 static void
goacc_exit_datum(void * h,size_t s,unsigned short kind,int async)743 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
744 {
745 /* No need to call lazy open, as the data must already have been
746 mapped. */
747
748 kind &= 0xff;
749
750 struct goacc_thread *thr = goacc_thread ();
751 struct gomp_device_descr *acc_dev = thr->dev;
752
753 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
754 return;
755
756 acc_prof_info prof_info;
757 acc_api_info api_info;
758 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
759 if (profiling_p)
760 {
761 prof_info.async = async;
762 prof_info.async_queue = prof_info.async;
763 }
764
765 gomp_mutex_lock (&acc_dev->lock);
766
767 splay_tree_key n = lookup_host (acc_dev, h, s);
768 /* Non-present data is a no-op: PR92726, RP92970, PR92984. */
769 if (n)
770 {
771 goacc_aq aq = get_goacc_asyncqueue (async);
772 goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq);
773 }
774
775 gomp_mutex_unlock (&acc_dev->lock);
776
777 if (profiling_p)
778 {
779 thr->prof_info = NULL;
780 thr->api_info = NULL;
781 }
782 }
783
784 void
acc_delete(void * h,size_t s)785 acc_delete (void *h , size_t s)
786 {
787 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
788 }
789
790 void
acc_delete_async(void * h,size_t s,int async)791 acc_delete_async (void *h , size_t s, int async)
792 {
793 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
794 }
795
796 void
acc_delete_finalize(void * h,size_t s)797 acc_delete_finalize (void *h , size_t s)
798 {
799 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
800 }
801
802 void
acc_delete_finalize_async(void * h,size_t s,int async)803 acc_delete_finalize_async (void *h , size_t s, int async)
804 {
805 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
806 }
807
808 void
acc_copyout(void * h,size_t s)809 acc_copyout (void *h, size_t s)
810 {
811 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
812 }
813
814 void
acc_copyout_async(void * h,size_t s,int async)815 acc_copyout_async (void *h, size_t s, int async)
816 {
817 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
818 }
819
820 void
acc_copyout_finalize(void * h,size_t s)821 acc_copyout_finalize (void *h, size_t s)
822 {
823 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
824 }
825
826 void
acc_copyout_finalize_async(void * h,size_t s,int async)827 acc_copyout_finalize_async (void *h, size_t s, int async)
828 {
829 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
830 }
831
832 static void
update_dev_host(int is_dev,void * h,size_t s,int async)833 update_dev_host (int is_dev, void *h, size_t s, int async)
834 {
835 splay_tree_key n;
836 void *d;
837
838 goacc_lazy_initialize ();
839
840 struct goacc_thread *thr = goacc_thread ();
841 struct gomp_device_descr *acc_dev = thr->dev;
842
843 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
844 return;
845
846 /* Fortran optional arguments that are non-present result in a
847 NULL host address here. This can safely be ignored as it is
848 not possible to 'update' a non-present optional argument. */
849 if (h == NULL)
850 return;
851
852 acc_prof_info prof_info;
853 acc_api_info api_info;
854 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
855 if (profiling_p)
856 {
857 prof_info.async = async;
858 prof_info.async_queue = prof_info.async;
859 }
860
861 gomp_mutex_lock (&acc_dev->lock);
862
863 n = lookup_host (acc_dev, h, s);
864
865 if (!n)
866 {
867 gomp_mutex_unlock (&acc_dev->lock);
868 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
869 }
870
871 d = (void *) (n->tgt->tgt_start + n->tgt_offset
872 + (uintptr_t) h - n->host_start);
873
874 goacc_aq aq = get_goacc_asyncqueue (async);
875
876 if (is_dev)
877 gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
878 else
879 gomp_copy_dev2host (acc_dev, aq, h, d, s);
880
881 gomp_mutex_unlock (&acc_dev->lock);
882
883 if (profiling_p)
884 {
885 thr->prof_info = NULL;
886 thr->api_info = NULL;
887 }
888 }
889
890 void
acc_update_device(void * h,size_t s)891 acc_update_device (void *h, size_t s)
892 {
893 update_dev_host (1, h, s, acc_async_sync);
894 }
895
896 void
acc_update_device_async(void * h,size_t s,int async)897 acc_update_device_async (void *h, size_t s, int async)
898 {
899 update_dev_host (1, h, s, async);
900 }
901
902 void
acc_update_self(void * h,size_t s)903 acc_update_self (void *h, size_t s)
904 {
905 update_dev_host (0, h, s, acc_async_sync);
906 }
907
908 void
acc_update_self_async(void * h,size_t s,int async)909 acc_update_self_async (void *h, size_t s, int async)
910 {
911 update_dev_host (0, h, s, async);
912 }
913
914 void
acc_attach_async(void ** hostaddr,int async)915 acc_attach_async (void **hostaddr, int async)
916 {
917 struct goacc_thread *thr = goacc_thread ();
918 struct gomp_device_descr *acc_dev = thr->dev;
919 goacc_aq aq = get_goacc_asyncqueue (async);
920
921 struct splay_tree_key_s cur_node;
922 splay_tree_key n;
923
924 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
925 return;
926
927 gomp_mutex_lock (&acc_dev->lock);
928
929 cur_node.host_start = (uintptr_t) hostaddr;
930 cur_node.host_end = cur_node.host_start + sizeof (void *);
931 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
932
933 if (n == NULL)
934 {
935 gomp_mutex_unlock (&acc_dev->lock);
936 gomp_fatal ("struct not mapped for acc_attach");
937 }
938
939 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
940 0, NULL);
941
942 gomp_mutex_unlock (&acc_dev->lock);
943 }
944
945 void
acc_attach(void ** hostaddr)946 acc_attach (void **hostaddr)
947 {
948 acc_attach_async (hostaddr, acc_async_sync);
949 }
950
951 static void
goacc_detach_internal(void ** hostaddr,int async,bool finalize)952 goacc_detach_internal (void **hostaddr, int async, bool finalize)
953 {
954 struct goacc_thread *thr = goacc_thread ();
955 struct gomp_device_descr *acc_dev = thr->dev;
956 struct splay_tree_key_s cur_node;
957 splay_tree_key n;
958 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
959
960 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
961 return;
962
963 gomp_mutex_lock (&acc_dev->lock);
964
965 cur_node.host_start = (uintptr_t) hostaddr;
966 cur_node.host_end = cur_node.host_start + sizeof (void *);
967 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
968
969 if (n == NULL)
970 {
971 gomp_mutex_unlock (&acc_dev->lock);
972 gomp_fatal ("struct not mapped for acc_detach");
973 }
974
975 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
976
977 gomp_mutex_unlock (&acc_dev->lock);
978 }
979
980 void
acc_detach(void ** hostaddr)981 acc_detach (void **hostaddr)
982 {
983 goacc_detach_internal (hostaddr, acc_async_sync, false);
984 }
985
986 void
acc_detach_async(void ** hostaddr,int async)987 acc_detach_async (void **hostaddr, int async)
988 {
989 goacc_detach_internal (hostaddr, async, false);
990 }
991
992 void
acc_detach_finalize(void ** hostaddr)993 acc_detach_finalize (void **hostaddr)
994 {
995 goacc_detach_internal (hostaddr, acc_async_sync, true);
996 }
997
998 void
acc_detach_finalize_async(void ** hostaddr,int async)999 acc_detach_finalize_async (void **hostaddr, int async)
1000 {
1001 goacc_detach_internal (hostaddr, async, true);
1002 }
1003
1004 /* Some types of (pointer) variables use several consecutive mappings, which
1005 must be treated as a group for enter/exit data directives. This function
1006 returns the last mapping in such a group (inclusive), or POS for singleton
1007 mappings. */
1008
1009 static int
find_group_last(int pos,size_t mapnum,size_t * sizes,unsigned short * kinds)1010 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
1011 {
1012 unsigned char kind0 = kinds[pos] & 0xff;
1013 int first_pos = pos;
1014
1015 switch (kind0)
1016 {
1017 case GOMP_MAP_TO_PSET:
1018 if (pos + 1 < mapnum
1019 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1020 return pos + 1;
1021
1022 while (pos + 1 < mapnum
1023 && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1024 pos++;
1025 /* We expect at least one GOMP_MAP_POINTER (if not a single
1026 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */
1027 assert (pos > first_pos);
1028 break;
1029
1030 case GOMP_MAP_STRUCT:
1031 pos += sizes[pos];
1032 break;
1033
1034 case GOMP_MAP_POINTER:
1035 case GOMP_MAP_ALWAYS_POINTER:
1036 /* These mappings are only expected after some other mapping. If we
1037 see one by itself, something has gone wrong. */
1038 gomp_fatal ("unexpected mapping");
1039 break;
1040
1041 case GOMP_MAP_ATTACH:
1042 break;
1043
1044 default:
1045 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1046 mapping. */
1047 if (pos + 1 < mapnum)
1048 {
1049 unsigned char kind1 = kinds[pos + 1] & 0xff;
1050 if (kind1 == GOMP_MAP_ALWAYS_POINTER)
1051 return pos + 1;
1052 }
1053
1054 /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1055 mapping. */
1056 if (pos + 1 < mapnum
1057 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1058 return pos + 1;
1059
1060 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1061 (etc.) mapping. */
1062 while (pos + 1 < mapnum
1063 && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1064 pos++;
1065 }
1066
1067 return pos;
1068 }
1069
1070 /* Map variables for OpenACC "enter data". We can't just call
1071 goacc_map_vars once, because individual mapped variables might have
1072 "exit data" called for them at different times. */
1073
1074 static void
goacc_enter_data_internal(struct gomp_device_descr * acc_dev,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,goacc_aq aq)1075 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1076 void **hostaddrs, size_t *sizes,
1077 unsigned short *kinds, goacc_aq aq)
1078 {
1079 gomp_mutex_lock (&acc_dev->lock);
1080
1081 for (size_t i = 0; i < mapnum; i++)
1082 {
1083 splay_tree_key n;
1084 size_t group_last = find_group_last (i, mapnum, sizes, kinds);
1085 bool struct_p = false;
1086 size_t size, groupnum = (group_last - i) + 1;
1087
1088 switch (kinds[i] & 0xff)
1089 {
1090 case GOMP_MAP_STRUCT:
1091 {
1092 size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
1093 - (uintptr_t) hostaddrs[i];
1094 struct_p = true;
1095 }
1096 break;
1097
1098 case GOMP_MAP_ATTACH:
1099 size = sizeof (void *);
1100 break;
1101
1102 default:
1103 size = sizes[i];
1104 }
1105
1106 n = lookup_host (acc_dev, hostaddrs[i], size);
1107
1108 if (n && struct_p)
1109 {
1110 for (size_t j = i + 1; j <= group_last; j++)
1111 {
1112 struct splay_tree_key_s cur_node;
1113 cur_node.host_start = (uintptr_t) hostaddrs[j];
1114 cur_node.host_end = cur_node.host_start + sizes[j];
1115 splay_tree_key n2
1116 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1117 if (!n2
1118 || n2->tgt != n->tgt
1119 || n2->host_start - n->host_start
1120 != n2->tgt_offset - n->tgt_offset)
1121 {
1122 gomp_mutex_unlock (&acc_dev->lock);
1123 gomp_fatal ("Trying to map into device [%p..%p) structure "
1124 "element when other mapped elements from the "
1125 "same structure weren't mapped together with "
1126 "it", (void *) cur_node.host_start,
1127 (void *) cur_node.host_end);
1128 }
1129 }
1130 /* This is a special case because we must increment the refcount by
1131 the number of mapped struct elements, rather than by one. */
1132 if (n->refcount != REFCOUNT_INFINITY)
1133 n->refcount += groupnum - 1;
1134 n->dynamic_refcount += groupnum - 1;
1135 }
1136 else if (n && groupnum == 1)
1137 {
1138 void *h = hostaddrs[i];
1139 size_t s = sizes[i];
1140
1141 if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
1142 {
1143 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
1144 (uintptr_t) h, s, NULL);
1145 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1146 reference counts ('n->refcount', 'n->dynamic_refcount'). */
1147 }
1148 else
1149 goacc_map_var_existing (acc_dev, h, s, n);
1150 }
1151 else if (n && groupnum > 1)
1152 {
1153 assert (n->refcount != REFCOUNT_INFINITY
1154 && n->refcount != REFCOUNT_LINK);
1155
1156 for (size_t j = i + 1; j <= group_last; j++)
1157 if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
1158 {
1159 splay_tree_key m
1160 = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
1161 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
1162 (uintptr_t) hostaddrs[j], sizes[j], NULL);
1163 }
1164
1165 bool processed = false;
1166
1167 struct target_mem_desc *tgt = n->tgt;
1168 for (size_t j = 0; j < tgt->list_count; j++)
1169 if (tgt->list[j].key == n)
1170 {
1171 /* We are processing a group of mappings (e.g.
1172 [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
1173 Find the right group in the target_mem_desc's variable
1174 list, and increment the refcounts for each item in that
1175 group. */
1176 for (size_t k = 0; k < groupnum; k++)
1177 if (j + k < tgt->list_count
1178 && tgt->list[j + k].key
1179 && !tgt->list[j + k].is_attach)
1180 {
1181 tgt->list[j + k].key->refcount++;
1182 tgt->list[j + k].key->dynamic_refcount++;
1183 }
1184 processed = true;
1185 break;
1186 }
1187
1188 if (!processed)
1189 {
1190 gomp_mutex_unlock (&acc_dev->lock);
1191 gomp_fatal ("dynamic refcount incrementing failed for "
1192 "pointer/pset");
1193 }
1194 }
1195 else if (hostaddrs[i])
1196 {
1197 /* The data is not mapped already. Map it now, unless the first
1198 member in the group has a NULL pointer (e.g. a non-present
1199 optional parameter). */
1200 gomp_mutex_unlock (&acc_dev->lock);
1201
1202 struct target_mem_desc *tgt
1203 = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
1204 &sizes[i], &kinds[i], true,
1205 GOMP_MAP_VARS_ENTER_DATA);
1206 assert (tgt);
1207
1208 gomp_mutex_lock (&acc_dev->lock);
1209
1210 for (size_t j = 0; j < tgt->list_count; j++)
1211 {
1212 n = tgt->list[j].key;
1213 if (n && !tgt->list[j].is_attach)
1214 n->dynamic_refcount++;
1215 }
1216 }
1217
1218 i = group_last;
1219 }
1220
1221 gomp_mutex_unlock (&acc_dev->lock);
1222 }
1223
1224 /* Unmap variables for OpenACC "exit data". */
1225
1226 static void
goacc_exit_data_internal(struct gomp_device_descr * acc_dev,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,goacc_aq aq)1227 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1228 void **hostaddrs, size_t *sizes,
1229 unsigned short *kinds, goacc_aq aq)
1230 {
1231 gomp_mutex_lock (&acc_dev->lock);
1232
1233 /* Handle "detach" before copyback/deletion of mapped data. */
1234 for (size_t i = 0; i < mapnum; ++i)
1235 {
1236 unsigned char kind = kinds[i] & 0xff;
1237 bool finalize = false;
1238 switch (kind)
1239 {
1240 case GOMP_MAP_FORCE_DETACH:
1241 finalize = true;
1242 /* Fallthrough. */
1243
1244 case GOMP_MAP_DETACH:
1245 {
1246 struct splay_tree_key_s cur_node;
1247 uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1248 cur_node.host_start = hostaddr;
1249 cur_node.host_end = cur_node.host_start + sizeof (void *);
1250 splay_tree_key n
1251 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1252
1253 if (n == NULL)
1254 {
1255 gomp_mutex_unlock (&acc_dev->lock);
1256 gomp_fatal ("struct not mapped for detach operation");
1257 }
1258
1259 gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1260 }
1261 break;
1262 default:
1263 ;
1264 }
1265 }
1266
1267 for (size_t i = 0; i < mapnum; ++i)
1268 {
1269 unsigned char kind = kinds[i] & 0xff;
1270
1271 switch (kind)
1272 {
1273 case GOMP_MAP_FROM:
1274 case GOMP_MAP_FORCE_FROM:
1275 case GOMP_MAP_TO_PSET:
1276 case GOMP_MAP_POINTER:
1277 case GOMP_MAP_DELETE:
1278 case GOMP_MAP_RELEASE:
1279 {
1280 struct splay_tree_key_s cur_node;
1281 size_t size;
1282 if (kind == GOMP_MAP_POINTER)
1283 size = sizeof (void *);
1284 else
1285 size = sizes[i];
1286 cur_node.host_start = (uintptr_t) hostaddrs[i];
1287 cur_node.host_end = cur_node.host_start + size;
1288 splay_tree_key n
1289 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1290
1291 if (n == NULL)
1292 continue;
1293
1294 goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);
1295 }
1296 break;
1297
1298 case GOMP_MAP_STRUCT:
1299 /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1300 for all its entries. This special handling exists for GCC 10.1
1301 compatibility; afterwards, we're not generating these no-op
1302 'GOMP_MAP_STRUCT's anymore. */
1303 break;
1304
1305 case GOMP_MAP_DETACH:
1306 case GOMP_MAP_FORCE_DETACH:
1307 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1308 reference counts ('n->refcount', 'n->dynamic_refcount'). */
1309 break;
1310
1311 default:
1312 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1313 kind);
1314 }
1315 }
1316
1317 gomp_mutex_unlock (&acc_dev->lock);
1318 }
1319
1320 static void
goacc_enter_exit_data_internal(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,bool data_enter,int async,int num_waits,va_list * ap)1321 goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
1322 size_t *sizes, unsigned short *kinds,
1323 bool data_enter, int async, int num_waits,
1324 va_list *ap)
1325 {
1326 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1327
1328 struct goacc_thread *thr;
1329 struct gomp_device_descr *acc_dev;
1330
1331 goacc_lazy_initialize ();
1332
1333 thr = goacc_thread ();
1334 acc_dev = thr->dev;
1335
1336 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1337
1338 acc_prof_info prof_info;
1339 if (profiling_p)
1340 {
1341 thr->prof_info = &prof_info;
1342
1343 prof_info.event_type
1344 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1345 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1346 prof_info.version = _ACC_PROF_INFO_VERSION;
1347 prof_info.device_type = acc_device_type (acc_dev->type);
1348 prof_info.device_number = acc_dev->target_id;
1349 prof_info.thread_id = -1;
1350 prof_info.async = async;
1351 prof_info.async_queue = prof_info.async;
1352 prof_info.src_file = NULL;
1353 prof_info.func_name = NULL;
1354 prof_info.line_no = -1;
1355 prof_info.end_line_no = -1;
1356 prof_info.func_line_no = -1;
1357 prof_info.func_end_line_no = -1;
1358 }
1359 acc_event_info enter_exit_data_event_info;
1360 if (profiling_p)
1361 {
1362 enter_exit_data_event_info.other_event.event_type
1363 = prof_info.event_type;
1364 enter_exit_data_event_info.other_event.valid_bytes
1365 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1366 enter_exit_data_event_info.other_event.parent_construct
1367 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1368 enter_exit_data_event_info.other_event.implicit = 0;
1369 enter_exit_data_event_info.other_event.tool_info = NULL;
1370 }
1371 acc_api_info api_info;
1372 if (profiling_p)
1373 {
1374 thr->api_info = &api_info;
1375
1376 api_info.device_api = acc_device_api_none;
1377 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1378 api_info.device_type = prof_info.device_type;
1379 api_info.vendor = -1;
1380 api_info.device_handle = NULL;
1381 api_info.context_handle = NULL;
1382 api_info.async_handle = NULL;
1383 }
1384
1385 if (profiling_p)
1386 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1387 &api_info);
1388
1389 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1390 || (flags & GOACC_FLAG_HOST_FALLBACK))
1391 {
1392 prof_info.device_type = acc_device_host;
1393 api_info.device_type = prof_info.device_type;
1394
1395 goto out_prof;
1396 }
1397
1398 if (num_waits)
1399 goacc_wait (async, num_waits, ap);
1400
1401 goacc_aq aq = get_goacc_asyncqueue (async);
1402
1403 if (data_enter)
1404 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1405 else
1406 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1407
1408 out_prof:
1409 if (profiling_p)
1410 {
1411 prof_info.event_type
1412 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1413 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1414 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1415 &api_info);
1416
1417 thr->prof_info = NULL;
1418 thr->api_info = NULL;
1419 }
1420 }
1421
1422 /* Legacy entry point (GCC 11 and earlier). */
1423
1424 void
GOACC_enter_exit_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1425 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1426 size_t *sizes, unsigned short *kinds, int async,
1427 int num_waits, ...)
1428 {
1429 /* Determine if this is an OpenACC "enter data". */
1430 bool data_enter = false;
1431 for (size_t i = 0; i < mapnum; ++i)
1432 {
1433 unsigned char kind = kinds[i] & 0xff;
1434
1435 if (kind == GOMP_MAP_POINTER
1436 || kind == GOMP_MAP_TO_PSET
1437 || kind == GOMP_MAP_STRUCT)
1438 continue;
1439
1440 if (kind == GOMP_MAP_FORCE_ALLOC
1441 || kind == GOMP_MAP_FORCE_PRESENT
1442 || kind == GOMP_MAP_ATTACH
1443 || kind == GOMP_MAP_FORCE_TO
1444 || kind == GOMP_MAP_TO
1445 || kind == GOMP_MAP_ALLOC)
1446 {
1447 data_enter = true;
1448 break;
1449 }
1450
1451 if (kind == GOMP_MAP_RELEASE
1452 || kind == GOMP_MAP_DELETE
1453 || kind == GOMP_MAP_DETACH
1454 || kind == GOMP_MAP_FORCE_DETACH
1455 || kind == GOMP_MAP_FROM
1456 || kind == GOMP_MAP_FORCE_FROM)
1457 break;
1458
1459 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1460 kind);
1461 }
1462
1463 va_list ap;
1464 va_start (ap, num_waits);
1465 goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1466 data_enter, async, num_waits, &ap);
1467 va_end (ap);
1468 }
1469
1470 void
GOACC_enter_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1471 GOACC_enter_data (int flags_m, size_t mapnum, void **hostaddrs,
1472 size_t *sizes, unsigned short *kinds, int async,
1473 int num_waits, ...)
1474 {
1475 va_list ap;
1476 va_start (ap, num_waits);
1477 goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1478 true, async, num_waits, &ap);
1479 va_end (ap);
1480 }
1481
1482 void
GOACC_exit_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1483 GOACC_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1484 size_t *sizes, unsigned short *kinds, int async,
1485 int num_waits, ...)
1486 {
1487 va_list ap;
1488 va_start (ap, num_waits);
1489 goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1490 false, async, num_waits, &ap);
1491 va_end (ap);
1492 }
1493
1494 void
GOACC_declare(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)1495 GOACC_declare (int flags_m, size_t mapnum,
1496 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1497 {
1498 for (size_t i = 0; i < mapnum; i++)
1499 {
1500 unsigned char kind = kinds[i] & 0xff;
1501
1502 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1503 continue;
1504
1505 switch (kind)
1506 {
1507 case GOMP_MAP_ALLOC:
1508 if (acc_is_present (hostaddrs[i], sizes[i]))
1509 continue;
1510 /* FALLTHRU */
1511 case GOMP_MAP_FORCE_ALLOC:
1512 case GOMP_MAP_TO:
1513 case GOMP_MAP_FORCE_TO:
1514 goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i],
1515 &kinds[i], true, GOMP_ASYNC_SYNC, 0, NULL);
1516 break;
1517
1518 case GOMP_MAP_FROM:
1519 case GOMP_MAP_FORCE_FROM:
1520 case GOMP_MAP_RELEASE:
1521 case GOMP_MAP_DELETE:
1522 goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i],
1523 &kinds[i], false, GOMP_ASYNC_SYNC, 0, NULL);
1524 break;
1525
1526 case GOMP_MAP_FORCE_DEVICEPTR:
1527 break;
1528
1529 case GOMP_MAP_FORCE_PRESENT:
1530 if (!acc_is_present (hostaddrs[i], sizes[i]))
1531 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1532 (unsigned long) sizes[i]);
1533 break;
1534
1535 default:
1536 assert (0);
1537 break;
1538 }
1539 }
1540 }
1541