xref: /netbsd/external/gpl3/gcc/dist/libgomp/oacc-mem.c (revision f0fbc68b)
1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2022 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35 
36 /* Return block containing [H->S), or NULL if not contained.  The device lock
37    for DEV must be locked on entry, and remains locked on exit.  */
38 
39 static splay_tree_key
lookup_host(struct gomp_device_descr * dev,void * h,size_t s)40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42   struct splay_tree_key_s node;
43   splay_tree_key key;
44 
45   node.host_start = (uintptr_t) h;
46   node.host_end = (uintptr_t) h + s;
47 
48   key = splay_tree_lookup (&dev->mem_map, &node);
49 
50   return key;
51 }
52 
53 /* Helper for lookup_dev.  Iterate over splay tree.  */
54 
55 static splay_tree_key
lookup_dev_1(splay_tree_node node,uintptr_t d,size_t s)56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58   splay_tree_key key = &node->key;
59   if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60     return key;
61 
62   key = NULL;
63   if (node->left)
64     key = lookup_dev_1 (node->left, d, s);
65   if (!key && node->right)
66     key = lookup_dev_1 (node->right, d, s);
67 
68   return key;
69 }
70 
71 /* Return block containing [D->S), or NULL if not contained.
72 
73    This iterates over the splay tree.  This is not expected to be a common
74    operation.
75 
76    The device lock associated with MEM_MAP must be locked on entry, and remains
77    locked on exit.  */
78 
79 static splay_tree_key
lookup_dev(splay_tree mem_map,void * d,size_t s)80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82   if (!mem_map || !mem_map->root)
83     return NULL;
84 
85   return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87 
88 
89 /* OpenACC is silent on how memory exhaustion is indicated.  We return
90    NULL.  */
91 
92 void *
acc_malloc(size_t s)93 acc_malloc (size_t s)
94 {
95   if (!s)
96     return NULL;
97 
98   goacc_lazy_initialize ();
99 
100   struct goacc_thread *thr = goacc_thread ();
101 
102   assert (thr->dev);
103 
104   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105     return malloc (s);
106 
107   acc_prof_info prof_info;
108   acc_api_info api_info;
109   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110 
111   void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112 
113   if (profiling_p)
114     {
115       thr->prof_info = NULL;
116       thr->api_info = NULL;
117     }
118 
119   return res;
120 }
121 
122 void
acc_free(void * d)123 acc_free (void *d)
124 {
125   splay_tree_key k;
126 
127   if (!d)
128     return;
129 
130   struct goacc_thread *thr = goacc_thread ();
131 
132   assert (thr && thr->dev);
133 
134   struct gomp_device_descr *acc_dev = thr->dev;
135 
136   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137     return free (d);
138 
139   acc_prof_info prof_info;
140   acc_api_info api_info;
141   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142 
143   gomp_mutex_lock (&acc_dev->lock);
144 
145   /* We don't have to call lazy open here, as the ptr value must have
146      been returned by acc_malloc.  It's not permitted to pass NULL in
147      (unless you got that null from acc_malloc).  */
148   if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149     {
150       void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151       void *h = k->host_start + offset;
152       size_t h_size = k->host_end - k->host_start;
153       gomp_mutex_unlock (&acc_dev->lock);
154       /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 	 used in a mapping".  */
156       gomp_fatal ("refusing to free device memory space at %p that is still"
157 		  " mapped at [%p,+%d]",
158 		  d, h, (int) h_size);
159     }
160   else
161     gomp_mutex_unlock (&acc_dev->lock);
162 
163   if (!acc_dev->free_func (acc_dev->target_id, d))
164     gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165 
166   if (profiling_p)
167     {
168       thr->prof_info = NULL;
169       thr->api_info = NULL;
170     }
171 }
172 
173 static void
memcpy_tofrom_device(bool from,void * d,void * h,size_t s,int async,const char * libfnname)174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 		      const char *libfnname)
176 {
177   /* No need to call lazy open here, as the device pointer must have
178      been obtained from a routine that did that.  */
179   struct goacc_thread *thr = goacc_thread ();
180 
181   assert (thr && thr->dev);
182 
183   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184     {
185       if (from)
186 	memmove (h, d, s);
187       else
188 	memmove (d, h, s);
189       return;
190     }
191 
192   acc_prof_info prof_info;
193   acc_api_info api_info;
194   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195   if (profiling_p)
196     {
197       prof_info.async = async;
198       prof_info.async_queue = prof_info.async;
199     }
200 
201   goacc_aq aq = get_goacc_asyncqueue (async);
202   if (from)
203     gomp_copy_dev2host (thr->dev, aq, h, d, s);
204   else
205     gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
206 
207   if (profiling_p)
208     {
209       thr->prof_info = NULL;
210       thr->api_info = NULL;
211     }
212 }
213 
214 void
acc_memcpy_to_device(void * d,void * h,size_t s)215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217   memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219 
220 void
acc_memcpy_to_device_async(void * d,void * h,size_t s,int async)221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223   memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225 
226 void
acc_memcpy_from_device(void * h,void * d,size_t s)227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229   memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231 
232 void
acc_memcpy_from_device_async(void * h,void * d,size_t s,int async)233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235   memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237 
238 /* Return the device pointer that corresponds to host data H.  Or NULL
239    if no mapping.  */
240 
241 void *
acc_deviceptr(void * h)242 acc_deviceptr (void *h)
243 {
244   splay_tree_key n;
245   void *d;
246   void *offset;
247 
248   goacc_lazy_initialize ();
249 
250   struct goacc_thread *thr = goacc_thread ();
251   struct gomp_device_descr *dev = thr->dev;
252 
253   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254     return h;
255 
256   /* In the following, no OpenACC Profiling Interface events can possibly be
257      generated.  */
258 
259   gomp_mutex_lock (&dev->lock);
260 
261   n = lookup_host (dev, h, 1);
262 
263   if (!n)
264     {
265       gomp_mutex_unlock (&dev->lock);
266       return NULL;
267     }
268 
269   offset = h - n->host_start;
270 
271   d = n->tgt->tgt_start + n->tgt_offset + offset;
272 
273   gomp_mutex_unlock (&dev->lock);
274 
275   return d;
276 }
277 
278 /* Return the host pointer that corresponds to device data D.  Or NULL
279    if no mapping.  */
280 
281 void *
acc_hostptr(void * d)282 acc_hostptr (void *d)
283 {
284   splay_tree_key n;
285   void *h;
286   void *offset;
287 
288   goacc_lazy_initialize ();
289 
290   struct goacc_thread *thr = goacc_thread ();
291   struct gomp_device_descr *acc_dev = thr->dev;
292 
293   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294     return d;
295 
296   /* In the following, no OpenACC Profiling Interface events can possibly be
297      generated.  */
298 
299   gomp_mutex_lock (&acc_dev->lock);
300 
301   n = lookup_dev (&acc_dev->mem_map, d, 1);
302 
303   if (!n)
304     {
305       gomp_mutex_unlock (&acc_dev->lock);
306       return NULL;
307     }
308 
309   offset = d - n->tgt->tgt_start + n->tgt_offset;
310 
311   h = n->host_start + offset;
312 
313   gomp_mutex_unlock (&acc_dev->lock);
314 
315   return h;
316 }
317 
318 /* Return 1 if host data [H,+S] is present on the device.  */
319 
320 int
acc_is_present(void * h,size_t s)321 acc_is_present (void *h, size_t s)
322 {
323   splay_tree_key n;
324 
325   if (!s || !h)
326     return 0;
327 
328   goacc_lazy_initialize ();
329 
330   struct goacc_thread *thr = goacc_thread ();
331   struct gomp_device_descr *acc_dev = thr->dev;
332 
333   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334     return h != NULL;
335 
336   /* In the following, no OpenACC Profiling Interface events can possibly be
337      generated.  */
338 
339   gomp_mutex_lock (&acc_dev->lock);
340 
341   n = lookup_host (acc_dev, h, s);
342 
343   if (n && ((uintptr_t)h < n->host_start
344 	    || (uintptr_t)h + s > n->host_end
345 	    || s > n->host_end - n->host_start))
346     n = NULL;
347 
348   gomp_mutex_unlock (&acc_dev->lock);
349 
350   return n != NULL;
351 }
352 
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354 
355 void
acc_map_data(void * h,void * d,size_t s)356 acc_map_data (void *h, void *d, size_t s)
357 {
358   size_t mapnum = 1;
359   void *hostaddrs = h;
360   void *devaddrs = d;
361   size_t sizes = s;
362   unsigned short kinds = GOMP_MAP_ALLOC;
363 
364   goacc_lazy_initialize ();
365 
366   struct goacc_thread *thr = goacc_thread ();
367   struct gomp_device_descr *acc_dev = thr->dev;
368 
369   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
370     {
371       if (d != h)
372         gomp_fatal ("cannot map data on shared-memory system");
373     }
374   else
375     {
376       struct goacc_thread *thr = goacc_thread ();
377 
378       if (!d || !h || !s)
379 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380                     (void *)h, (int)s, (void *)d, (int)s);
381 
382       acc_prof_info prof_info;
383       acc_api_info api_info;
384       bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
385 
386       gomp_mutex_lock (&acc_dev->lock);
387 
388       if (lookup_host (acc_dev, h, s))
389         {
390 	  gomp_mutex_unlock (&acc_dev->lock);
391 	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
392 		      (int)s);
393 	}
394 
395       if (lookup_dev (&thr->dev->mem_map, d, s))
396         {
397 	  gomp_mutex_unlock (&acc_dev->lock);
398 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
399 		      (int)s);
400 	}
401 
402       gomp_mutex_unlock (&acc_dev->lock);
403 
404       struct target_mem_desc *tgt
405 	= goacc_map_vars (acc_dev, NULL, mapnum, &hostaddrs, &devaddrs, &sizes,
406 			  &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407       assert (tgt);
408       assert (tgt->list_count == 1);
409       splay_tree_key n = tgt->list[0].key;
410       assert (n);
411       assert (n->refcount == 1);
412       assert (n->dynamic_refcount == 0);
413       /* Special reference counting behavior.  */
414       n->refcount = REFCOUNT_INFINITY;
415 
416       if (profiling_p)
417 	{
418 	  thr->prof_info = NULL;
419 	  thr->api_info = NULL;
420 	}
421     }
422 }
423 
424 void
acc_unmap_data(void * h)425 acc_unmap_data (void *h)
426 {
427   struct goacc_thread *thr = goacc_thread ();
428   struct gomp_device_descr *acc_dev = thr->dev;
429 
430   /* No need to call lazy open, as the address must have been mapped.  */
431 
432   /* This is a no-op on shared-memory targets.  */
433   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
434     return;
435 
436   acc_prof_info prof_info;
437   acc_api_info api_info;
438   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
439 
440   gomp_mutex_lock (&acc_dev->lock);
441 
442   splay_tree_key n = lookup_host (acc_dev, h, 1);
443 
444   if (!n)
445     {
446       gomp_mutex_unlock (&acc_dev->lock);
447       gomp_fatal ("%p is not a mapped block", (void *)h);
448     }
449 
450   size_t host_size = n->host_end - n->host_start;
451 
452   if (n->host_start != (uintptr_t) h)
453     {
454       gomp_mutex_unlock (&acc_dev->lock);
455       gomp_fatal ("[%p,%d] surrounds %p",
456 		  (void *) n->host_start, (int) host_size, (void *) h);
457     }
458   /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459      'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
460      the different 'REFCOUNT_INFINITY' cases, or simply separate
461      'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462      etc.)?  */
463   else if (n->refcount != REFCOUNT_INFINITY)
464     {
465       gomp_mutex_unlock (&acc_dev->lock);
466       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 		  " by 'acc_map_data'",
468 		  (void *) h, (int) host_size);
469     }
470 
471   struct target_mem_desc *tgt = n->tgt;
472 
473   if (tgt->refcount == REFCOUNT_INFINITY)
474     {
475       gomp_mutex_unlock (&acc_dev->lock);
476       gomp_fatal ("cannot unmap target block");
477     }
478 
479   /* Above, we've verified that the mapping must have been set up by
480      'acc_map_data'.  */
481   assert (tgt->refcount == 1);
482 
483   /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484      from freeing the target memory.  */
485   tgt->tgt_end = 0;
486   tgt->to_free = NULL;
487 
488   bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
489   assert (is_tgt_unmapped);
490 
491   gomp_mutex_unlock (&acc_dev->lock);
492 
493   if (profiling_p)
494     {
495       thr->prof_info = NULL;
496       thr->api_info = NULL;
497     }
498 }
499 
500 
501 /* Helper function to map a single dynamic data item, represented by a single
502    mapping.  The acc_dev->lock should be held on entry, and remains locked on
503    exit.  */
504 
505 static void *
goacc_map_var_existing(struct gomp_device_descr * acc_dev,void * hostaddr,size_t size,splay_tree_key n)506 goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
507 			size_t size, splay_tree_key n)
508 {
509   assert (n);
510 
511   /* Present. */
512   void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr
513 	    - n->host_start);
514 
515   if (hostaddr + size > (void *) n->host_end)
516     {
517       gomp_mutex_unlock (&acc_dev->lock);
518       gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size);
519     }
520 
521   assert (n->refcount != REFCOUNT_LINK);
522   if (n->refcount != REFCOUNT_INFINITY)
523     n->refcount++;
524   n->dynamic_refcount++;
525 
526   return d;
527 }
528 
529 /* Enter dynamic mapping for a single datum.  Return the device pointer.  */
530 
531 static void *
goacc_enter_datum(void ** hostaddrs,size_t * sizes,void * kinds,int async)532 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
533 {
534   void *d;
535   splay_tree_key n;
536 
537   if (!hostaddrs[0] || !sizes[0])
538     gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
539 
540   goacc_lazy_initialize ();
541 
542   struct goacc_thread *thr = goacc_thread ();
543   struct gomp_device_descr *acc_dev = thr->dev;
544 
545   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
546     return hostaddrs[0];
547 
548   acc_prof_info prof_info;
549   acc_api_info api_info;
550   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
551   if (profiling_p)
552     {
553       prof_info.async = async;
554       prof_info.async_queue = prof_info.async;
555     }
556 
557   gomp_mutex_lock (&acc_dev->lock);
558 
559   n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
560   if (n)
561     {
562       d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n);
563       gomp_mutex_unlock (&acc_dev->lock);
564     }
565   else
566     {
567       const size_t mapnum = 1;
568 
569       gomp_mutex_unlock (&acc_dev->lock);
570 
571       goacc_aq aq = get_goacc_asyncqueue (async);
572 
573       struct target_mem_desc *tgt
574 	= goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
575 			  kinds, true, GOMP_MAP_VARS_ENTER_DATA);
576       assert (tgt);
577       assert (tgt->list_count == 1);
578       n = tgt->list[0].key;
579       assert (n);
580       assert (n->refcount == 1);
581       assert (n->dynamic_refcount == 0);
582       n->dynamic_refcount++;
583 
584       d = (void *) tgt->tgt_start;
585     }
586 
587   if (profiling_p)
588     {
589       thr->prof_info = NULL;
590       thr->api_info = NULL;
591     }
592 
593   return d;
594 }
595 
596 void *
acc_create(void * h,size_t s)597 acc_create (void *h, size_t s)
598 {
599   unsigned short kinds[1] = { GOMP_MAP_ALLOC };
600   return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
601 }
602 
603 void
acc_create_async(void * h,size_t s,int async)604 acc_create_async (void *h, size_t s, int async)
605 {
606   unsigned short kinds[1] = { GOMP_MAP_ALLOC };
607   goacc_enter_datum (&h, &s, &kinds, async);
608 }
609 
610 /* acc_present_or_create used to be what acc_create is now.  */
611 /* acc_pcreate is acc_present_or_create by a different name.  */
612 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_create,acc_present_or_create)613 strong_alias (acc_create, acc_present_or_create)
614 strong_alias (acc_create, acc_pcreate)
615 #else
616 void *
617 acc_present_or_create (void *h, size_t s)
618 {
619   return acc_create (h, s);
620 }
621 
622 void *
623 acc_pcreate (void *h, size_t s)
624 {
625   return acc_create (h, s);
626 }
627 #endif
628 
629 void *
630 acc_copyin (void *h, size_t s)
631 {
632   unsigned short kinds[1] = { GOMP_MAP_TO };
633   return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
634 }
635 
636 void
acc_copyin_async(void * h,size_t s,int async)637 acc_copyin_async (void *h, size_t s, int async)
638 {
639   unsigned short kinds[1] = { GOMP_MAP_TO };
640   goacc_enter_datum (&h, &s, &kinds, async);
641 }
642 
643 /* acc_present_or_copyin used to be what acc_copyin is now.  */
644 /* acc_pcopyin is acc_present_or_copyin by a different name.  */
645 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_copyin,acc_present_or_copyin)646 strong_alias (acc_copyin, acc_present_or_copyin)
647 strong_alias (acc_copyin, acc_pcopyin)
648 #else
649 void *
650 acc_present_or_copyin (void *h, size_t s)
651 {
652   return acc_copyin (h, s);
653 }
654 
655 void *
656 acc_pcopyin (void *h, size_t s)
657 {
658   return acc_copyin (h, s);
659 }
660 #endif
661 
662 
663 /* Helper function to unmap a single data item.  Device lock should be held on
664    entry, and remains locked on exit.  */
665 
666 static void
667 goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
668 		    unsigned short kind, splay_tree_key n, goacc_aq aq)
669 {
670   assert (kind != GOMP_MAP_DETACH
671 	  && kind != GOMP_MAP_FORCE_DETACH);
672 
673   if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
674     {
675       size_t host_size = n->host_end - n->host_start;
676       gomp_mutex_unlock (&acc_dev->lock);
677       gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
678 		  (void *) h, (int) s, (void *) n->host_start, (int) host_size);
679     }
680 
681   bool finalize = (kind == GOMP_MAP_FORCE_FROM
682 		   || kind == GOMP_MAP_DELETE);
683 
684   assert (n->refcount != REFCOUNT_LINK);
685   if (n->refcount != REFCOUNT_INFINITY
686       && n->refcount < n->dynamic_refcount)
687     {
688       gomp_mutex_unlock (&acc_dev->lock);
689       gomp_fatal ("Dynamic reference counting assert fail\n");
690     }
691 
692   if (finalize)
693     {
694       if (n->refcount != REFCOUNT_INFINITY)
695 	n->refcount -= n->dynamic_refcount;
696       n->dynamic_refcount = 0;
697     }
698   else if (n->dynamic_refcount)
699     {
700       if (n->refcount != REFCOUNT_INFINITY)
701 	n->refcount--;
702       n->dynamic_refcount--;
703     }
704 
705   if (n->refcount == 0)
706     {
707       bool copyout = (kind == GOMP_MAP_FROM
708 		      || kind == GOMP_MAP_FORCE_FROM);
709       if (copyout)
710 	{
711 	  void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
712 			      + (uintptr_t) h - n->host_start);
713 	  gomp_copy_dev2host (acc_dev, aq, h, d, s);
714 	}
715 
716       if (aq)
717 	/* TODO We can't do the 'is_tgt_unmapped' checking -- see the
718 	   'gomp_unref_tgt' comment in
719 	   <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
720 	   PR92881.  */
721 	gomp_remove_var_async (acc_dev, n, aq);
722       else
723 	{
724 	  size_t num_mappings = 0;
725 	  /* If the target_mem_desc represents a single data mapping, we can
726 	     check that it is freed when this splay tree key's refcount reaches
727 	     zero.  Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
728 	     multiple members), fall back to skipping the test.  */
729 	  for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
730 	    if (n->tgt->list[l_i].key
731 		&& !n->tgt->list[l_i].is_attach)
732 	      ++num_mappings;
733 	  bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
734 	  assert (is_tgt_unmapped || num_mappings > 1);
735 	}
736     }
737 }
738 
739 
740 /* Exit a dynamic mapping for a single variable.  */
741 
742 static void
goacc_exit_datum(void * h,size_t s,unsigned short kind,int async)743 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
744 {
745   /* No need to call lazy open, as the data must already have been
746      mapped.  */
747 
748   kind &= 0xff;
749 
750   struct goacc_thread *thr = goacc_thread ();
751   struct gomp_device_descr *acc_dev = thr->dev;
752 
753   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
754     return;
755 
756   acc_prof_info prof_info;
757   acc_api_info api_info;
758   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
759   if (profiling_p)
760     {
761       prof_info.async = async;
762       prof_info.async_queue = prof_info.async;
763     }
764 
765   gomp_mutex_lock (&acc_dev->lock);
766 
767   splay_tree_key n = lookup_host (acc_dev, h, s);
768   /* Non-present data is a no-op: PR92726, RP92970, PR92984.  */
769   if (n)
770     {
771       goacc_aq aq = get_goacc_asyncqueue (async);
772       goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq);
773     }
774 
775   gomp_mutex_unlock (&acc_dev->lock);
776 
777   if (profiling_p)
778     {
779       thr->prof_info = NULL;
780       thr->api_info = NULL;
781     }
782 }
783 
784 void
acc_delete(void * h,size_t s)785 acc_delete (void *h , size_t s)
786 {
787   goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
788 }
789 
790 void
acc_delete_async(void * h,size_t s,int async)791 acc_delete_async (void *h , size_t s, int async)
792 {
793   goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
794 }
795 
796 void
acc_delete_finalize(void * h,size_t s)797 acc_delete_finalize (void *h , size_t s)
798 {
799   goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
800 }
801 
802 void
acc_delete_finalize_async(void * h,size_t s,int async)803 acc_delete_finalize_async (void *h , size_t s, int async)
804 {
805   goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
806 }
807 
808 void
acc_copyout(void * h,size_t s)809 acc_copyout (void *h, size_t s)
810 {
811   goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
812 }
813 
814 void
acc_copyout_async(void * h,size_t s,int async)815 acc_copyout_async (void *h, size_t s, int async)
816 {
817   goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
818 }
819 
820 void
acc_copyout_finalize(void * h,size_t s)821 acc_copyout_finalize (void *h, size_t s)
822 {
823   goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
824 }
825 
826 void
acc_copyout_finalize_async(void * h,size_t s,int async)827 acc_copyout_finalize_async (void *h, size_t s, int async)
828 {
829   goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
830 }
831 
832 static void
update_dev_host(int is_dev,void * h,size_t s,int async)833 update_dev_host (int is_dev, void *h, size_t s, int async)
834 {
835   splay_tree_key n;
836   void *d;
837 
838   goacc_lazy_initialize ();
839 
840   struct goacc_thread *thr = goacc_thread ();
841   struct gomp_device_descr *acc_dev = thr->dev;
842 
843   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
844     return;
845 
846   /* Fortran optional arguments that are non-present result in a
847      NULL host address here.  This can safely be ignored as it is
848      not possible to 'update' a non-present optional argument.  */
849   if (h == NULL)
850     return;
851 
852   acc_prof_info prof_info;
853   acc_api_info api_info;
854   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
855   if (profiling_p)
856     {
857       prof_info.async = async;
858       prof_info.async_queue = prof_info.async;
859     }
860 
861   gomp_mutex_lock (&acc_dev->lock);
862 
863   n = lookup_host (acc_dev, h, s);
864 
865   if (!n)
866     {
867       gomp_mutex_unlock (&acc_dev->lock);
868       gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
869     }
870 
871   d = (void *) (n->tgt->tgt_start + n->tgt_offset
872 		+ (uintptr_t) h - n->host_start);
873 
874   goacc_aq aq = get_goacc_asyncqueue (async);
875 
876   if (is_dev)
877     gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
878   else
879     gomp_copy_dev2host (acc_dev, aq, h, d, s);
880 
881   gomp_mutex_unlock (&acc_dev->lock);
882 
883   if (profiling_p)
884     {
885       thr->prof_info = NULL;
886       thr->api_info = NULL;
887     }
888 }
889 
890 void
acc_update_device(void * h,size_t s)891 acc_update_device (void *h, size_t s)
892 {
893   update_dev_host (1, h, s, acc_async_sync);
894 }
895 
896 void
acc_update_device_async(void * h,size_t s,int async)897 acc_update_device_async (void *h, size_t s, int async)
898 {
899   update_dev_host (1, h, s, async);
900 }
901 
902 void
acc_update_self(void * h,size_t s)903 acc_update_self (void *h, size_t s)
904 {
905   update_dev_host (0, h, s, acc_async_sync);
906 }
907 
908 void
acc_update_self_async(void * h,size_t s,int async)909 acc_update_self_async (void *h, size_t s, int async)
910 {
911   update_dev_host (0, h, s, async);
912 }
913 
914 void
acc_attach_async(void ** hostaddr,int async)915 acc_attach_async (void **hostaddr, int async)
916 {
917   struct goacc_thread *thr = goacc_thread ();
918   struct gomp_device_descr *acc_dev = thr->dev;
919   goacc_aq aq = get_goacc_asyncqueue (async);
920 
921   struct splay_tree_key_s cur_node;
922   splay_tree_key n;
923 
924   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
925     return;
926 
927   gomp_mutex_lock (&acc_dev->lock);
928 
929   cur_node.host_start = (uintptr_t) hostaddr;
930   cur_node.host_end = cur_node.host_start + sizeof (void *);
931   n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
932 
933   if (n == NULL)
934     {
935       gomp_mutex_unlock (&acc_dev->lock);
936       gomp_fatal ("struct not mapped for acc_attach");
937     }
938 
939   gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
940 		       0, NULL, false);
941 
942   gomp_mutex_unlock (&acc_dev->lock);
943 }
944 
945 void
acc_attach(void ** hostaddr)946 acc_attach (void **hostaddr)
947 {
948   acc_attach_async (hostaddr, acc_async_sync);
949 }
950 
951 static void
goacc_detach_internal(void ** hostaddr,int async,bool finalize)952 goacc_detach_internal (void **hostaddr, int async, bool finalize)
953 {
954   struct goacc_thread *thr = goacc_thread ();
955   struct gomp_device_descr *acc_dev = thr->dev;
956   struct splay_tree_key_s cur_node;
957   splay_tree_key n;
958   struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
959 
960   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
961     return;
962 
963   gomp_mutex_lock (&acc_dev->lock);
964 
965   cur_node.host_start = (uintptr_t) hostaddr;
966   cur_node.host_end = cur_node.host_start + sizeof (void *);
967   n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
968 
969   if (n == NULL)
970     {
971       gomp_mutex_unlock (&acc_dev->lock);
972       gomp_fatal ("struct not mapped for acc_detach");
973     }
974 
975   gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
976 
977   gomp_mutex_unlock (&acc_dev->lock);
978 }
979 
980 void
acc_detach(void ** hostaddr)981 acc_detach (void **hostaddr)
982 {
983   goacc_detach_internal (hostaddr, acc_async_sync, false);
984 }
985 
986 void
acc_detach_async(void ** hostaddr,int async)987 acc_detach_async (void **hostaddr, int async)
988 {
989   goacc_detach_internal (hostaddr, async, false);
990 }
991 
992 void
acc_detach_finalize(void ** hostaddr)993 acc_detach_finalize (void **hostaddr)
994 {
995   goacc_detach_internal (hostaddr, acc_async_sync, true);
996 }
997 
998 void
acc_detach_finalize_async(void ** hostaddr,int async)999 acc_detach_finalize_async (void **hostaddr, int async)
1000 {
1001   goacc_detach_internal (hostaddr, async, true);
1002 }
1003 
1004 /* Some types of (pointer) variables use several consecutive mappings, which
1005    must be treated as a group for enter/exit data directives.  This function
1006    returns the last mapping in such a group (inclusive), or POS for singleton
1007    mappings.  */
1008 
1009 static int
find_group_last(int pos,size_t mapnum,size_t * sizes,unsigned short * kinds)1010 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
1011 {
1012   unsigned char kind0 = kinds[pos] & 0xff;
1013   int first_pos = pos;
1014 
1015   switch (kind0)
1016     {
1017     case GOMP_MAP_TO_PSET:
1018       if (pos + 1 < mapnum
1019 	  && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1020 	return pos + 1;
1021 
1022       while (pos + 1 < mapnum
1023 	     && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1024 	pos++;
1025       /* We expect at least one GOMP_MAP_POINTER (if not a single
1026 	 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET.  */
1027       assert (pos > first_pos);
1028       break;
1029 
1030     case GOMP_MAP_STRUCT:
1031       pos += sizes[pos];
1032       break;
1033 
1034     case GOMP_MAP_POINTER:
1035     case GOMP_MAP_ALWAYS_POINTER:
1036       /* These mappings are only expected after some other mapping.  If we
1037 	 see one by itself, something has gone wrong.  */
1038       gomp_fatal ("unexpected mapping");
1039       break;
1040 
1041     case GOMP_MAP_ATTACH:
1042       break;
1043 
1044     default:
1045       /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1046 	 mapping.  */
1047       if (pos + 1 < mapnum)
1048 	{
1049 	  unsigned char kind1 = kinds[pos + 1] & 0xff;
1050 	  if (kind1 == GOMP_MAP_ALWAYS_POINTER)
1051 	    return pos + 1;
1052 	}
1053 
1054       /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1055 	 mapping.  */
1056       if (pos + 1 < mapnum
1057 	  && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1058 	return pos + 1;
1059 
1060       /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1061 	 (etc.) mapping.  */
1062       while (pos + 1 < mapnum
1063 	     && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1064 	pos++;
1065     }
1066 
1067   return pos;
1068 }
1069 
1070 /* Map variables for OpenACC "enter data".  We can't just call
1071    goacc_map_vars once, because individual mapped variables might have
1072    "exit data" called for them at different times.  */
1073 
1074 static void
goacc_enter_data_internal(struct gomp_device_descr * acc_dev,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,goacc_aq aq)1075 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1076 			   void **hostaddrs, size_t *sizes,
1077 			   unsigned short *kinds, goacc_aq aq)
1078 {
1079   gomp_mutex_lock (&acc_dev->lock);
1080 
1081   for (size_t i = 0; i < mapnum; i++)
1082     {
1083       splay_tree_key n;
1084       size_t group_last = find_group_last (i, mapnum, sizes, kinds);
1085       bool struct_p = false;
1086       size_t size, groupnum = (group_last - i) + 1;
1087 
1088       switch (kinds[i] & 0xff)
1089 	{
1090 	case GOMP_MAP_STRUCT:
1091 	  {
1092 	    size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
1093 		   - (uintptr_t) hostaddrs[i];
1094 	    struct_p = true;
1095 	  }
1096 	  break;
1097 
1098 	case GOMP_MAP_ATTACH:
1099 	  size = sizeof (void *);
1100 	  break;
1101 
1102 	default:
1103 	  size = sizes[i];
1104 	}
1105 
1106       n = lookup_host (acc_dev, hostaddrs[i], size);
1107 
1108       if (n && struct_p)
1109 	{
1110 	  for (size_t j = i + 1; j <= group_last; j++)
1111 	    {
1112 	      struct splay_tree_key_s cur_node;
1113 	      cur_node.host_start = (uintptr_t) hostaddrs[j];
1114 	      cur_node.host_end = cur_node.host_start + sizes[j];
1115 	      splay_tree_key n2
1116 		= splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1117 	      if (!n2
1118 		  || n2->tgt != n->tgt
1119 		  || n2->host_start - n->host_start
1120 		     != n2->tgt_offset - n->tgt_offset)
1121 		{
1122 		  gomp_mutex_unlock (&acc_dev->lock);
1123 		  gomp_fatal ("Trying to map into device [%p..%p) structure "
1124 			      "element when other mapped elements from the "
1125 			      "same structure weren't mapped together with "
1126 			      "it", (void *) cur_node.host_start,
1127 			      (void *) cur_node.host_end);
1128 		}
1129 	    }
1130 	  /* This is a special case because we must increment the refcount by
1131 	     the number of mapped struct elements, rather than by one.  */
1132 	  if (n->refcount != REFCOUNT_INFINITY)
1133 	    n->refcount += groupnum - 1;
1134 	  n->dynamic_refcount += groupnum - 1;
1135 	}
1136       else if (n && groupnum == 1)
1137 	{
1138 	  void *h = hostaddrs[i];
1139 	  size_t s = sizes[i];
1140 
1141 	  if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
1142 	    {
1143 	      gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
1144 				   (uintptr_t) h, s, NULL, false);
1145 	      /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1146 		 reference counts ('n->refcount', 'n->dynamic_refcount').  */
1147 	    }
1148 	  else
1149 	    goacc_map_var_existing (acc_dev, h, s, n);
1150 	}
1151       else if (n && groupnum > 1)
1152 	{
1153 	  assert (n->refcount != REFCOUNT_INFINITY
1154 		  && n->refcount != REFCOUNT_LINK);
1155 
1156 	  for (size_t j = i + 1; j <= group_last; j++)
1157 	    if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
1158 	      {
1159 		splay_tree_key m
1160 		  = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
1161 		gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
1162 				     (uintptr_t) hostaddrs[j], sizes[j], NULL,
1163 				     false);
1164 	      }
1165 
1166 	  bool processed = false;
1167 
1168 	  struct target_mem_desc *tgt = n->tgt;
1169 	  for (size_t j = 0; j < tgt->list_count; j++)
1170 	    if (tgt->list[j].key == n)
1171 	      {
1172 		/* We are processing a group of mappings (e.g.
1173 		   [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
1174 		   Find the right group in the target_mem_desc's variable
1175 		   list, and increment the refcounts for each item in that
1176 		   group.  */
1177 		for (size_t k = 0; k < groupnum; k++)
1178 		  if (j + k < tgt->list_count
1179 		      && tgt->list[j + k].key
1180 		      && !tgt->list[j + k].is_attach)
1181 		    {
1182 		      tgt->list[j + k].key->refcount++;
1183 		      tgt->list[j + k].key->dynamic_refcount++;
1184 		    }
1185 		processed = true;
1186 		break;
1187 	      }
1188 
1189 	  if (!processed)
1190 	    {
1191 	      gomp_mutex_unlock (&acc_dev->lock);
1192 	      gomp_fatal ("dynamic refcount incrementing failed for "
1193 			  "pointer/pset");
1194 	    }
1195 	}
1196       else if (hostaddrs[i])
1197 	{
1198 	  /* The data is not mapped already.  Map it now, unless the first
1199 	     member in the group has a NULL pointer (e.g. a non-present
1200 	     optional parameter).  */
1201 	  gomp_mutex_unlock (&acc_dev->lock);
1202 
1203 	  struct target_mem_desc *tgt
1204 	    = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
1205 			      &sizes[i], &kinds[i], true,
1206 			      GOMP_MAP_VARS_ENTER_DATA);
1207 	  assert (tgt);
1208 
1209 	  gomp_mutex_lock (&acc_dev->lock);
1210 
1211 	  for (size_t j = 0; j < tgt->list_count; j++)
1212 	    {
1213 	      n = tgt->list[j].key;
1214 	      if (n && !tgt->list[j].is_attach)
1215 		n->dynamic_refcount++;
1216 	    }
1217 	}
1218 
1219       i = group_last;
1220     }
1221 
1222   gomp_mutex_unlock (&acc_dev->lock);
1223 }
1224 
1225 /* Unmap variables for OpenACC "exit data".  */
1226 
1227 static void
goacc_exit_data_internal(struct gomp_device_descr * acc_dev,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,goacc_aq aq)1228 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1229 			  void **hostaddrs, size_t *sizes,
1230 			  unsigned short *kinds, goacc_aq aq)
1231 {
1232   gomp_mutex_lock (&acc_dev->lock);
1233 
1234   /* Handle "detach" before copyback/deletion of mapped data.  */
1235   for (size_t i = 0; i < mapnum; ++i)
1236     {
1237       unsigned char kind = kinds[i] & 0xff;
1238       bool finalize = false;
1239       switch (kind)
1240 	{
1241 	case GOMP_MAP_FORCE_DETACH:
1242 	  finalize = true;
1243 	  /* Fallthrough.  */
1244 
1245 	case GOMP_MAP_DETACH:
1246 	  {
1247 	    struct splay_tree_key_s cur_node;
1248 	    uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1249 	    cur_node.host_start = hostaddr;
1250 	    cur_node.host_end = cur_node.host_start + sizeof (void *);
1251 	    splay_tree_key n
1252 	      = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1253 
1254 	    if (n == NULL)
1255 	      {
1256 		gomp_mutex_unlock (&acc_dev->lock);
1257 		gomp_fatal ("struct not mapped for detach operation");
1258 	      }
1259 
1260 	    gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1261 	  }
1262 	  break;
1263 	default:
1264 	  ;
1265 	}
1266     }
1267 
1268   for (size_t i = 0; i < mapnum; ++i)
1269     {
1270       unsigned char kind = kinds[i] & 0xff;
1271 
1272       switch (kind)
1273 	{
1274 	case GOMP_MAP_FROM:
1275 	case GOMP_MAP_FORCE_FROM:
1276 	case GOMP_MAP_TO_PSET:
1277 	case GOMP_MAP_POINTER:
1278 	case GOMP_MAP_DELETE:
1279 	case GOMP_MAP_RELEASE:
1280 	  {
1281 	    struct splay_tree_key_s cur_node;
1282 	    size_t size;
1283 	    if (kind == GOMP_MAP_POINTER)
1284 	      size = sizeof (void *);
1285 	    else
1286 	      size = sizes[i];
1287 	    cur_node.host_start = (uintptr_t) hostaddrs[i];
1288 	    cur_node.host_end = cur_node.host_start + size;
1289 	    splay_tree_key n
1290 	      = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1291 
1292 	    if (n == NULL)
1293 	      continue;
1294 
1295 	    goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);
1296 	  }
1297 	  break;
1298 
1299 	case GOMP_MAP_STRUCT:
1300 	  /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1301 	     for all its entries.  This special handling exists for GCC 10.1
1302 	     compatibility; afterwards, we're not generating these no-op
1303 	     'GOMP_MAP_STRUCT's anymore.  */
1304 	  break;
1305 
1306 	case GOMP_MAP_DETACH:
1307 	case GOMP_MAP_FORCE_DETACH:
1308 	  /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1309 	     reference counts ('n->refcount', 'n->dynamic_refcount').  */
1310 	  break;
1311 
1312 	default:
1313 	  gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1314 			  kind);
1315 	}
1316     }
1317 
1318   gomp_mutex_unlock (&acc_dev->lock);
1319 }
1320 
1321 static void
goacc_enter_exit_data_internal(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,bool data_enter,int async,int num_waits,va_list * ap)1322 goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
1323 				size_t *sizes, unsigned short *kinds,
1324 				bool data_enter, int async, int num_waits,
1325 				va_list *ap)
1326 {
1327   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1328 
1329   struct goacc_thread *thr;
1330   struct gomp_device_descr *acc_dev;
1331 
1332   goacc_lazy_initialize ();
1333 
1334   thr = goacc_thread ();
1335   acc_dev = thr->dev;
1336 
1337   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1338 
1339   acc_prof_info prof_info;
1340   if (profiling_p)
1341     {
1342       thr->prof_info = &prof_info;
1343 
1344       prof_info.event_type
1345 	= data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1346       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1347       prof_info.version = _ACC_PROF_INFO_VERSION;
1348       prof_info.device_type = acc_device_type (acc_dev->type);
1349       prof_info.device_number = acc_dev->target_id;
1350       prof_info.thread_id = -1;
1351       prof_info.async = async;
1352       prof_info.async_queue = prof_info.async;
1353       prof_info.src_file = NULL;
1354       prof_info.func_name = NULL;
1355       prof_info.line_no = -1;
1356       prof_info.end_line_no = -1;
1357       prof_info.func_line_no = -1;
1358       prof_info.func_end_line_no = -1;
1359     }
1360   acc_event_info enter_exit_data_event_info;
1361   if (profiling_p)
1362     {
1363       enter_exit_data_event_info.other_event.event_type
1364 	= prof_info.event_type;
1365       enter_exit_data_event_info.other_event.valid_bytes
1366 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1367       enter_exit_data_event_info.other_event.parent_construct
1368 	= data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1369       enter_exit_data_event_info.other_event.implicit = 0;
1370       enter_exit_data_event_info.other_event.tool_info = NULL;
1371     }
1372   acc_api_info api_info;
1373   if (profiling_p)
1374     {
1375       thr->api_info = &api_info;
1376 
1377       api_info.device_api = acc_device_api_none;
1378       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1379       api_info.device_type = prof_info.device_type;
1380       api_info.vendor = -1;
1381       api_info.device_handle = NULL;
1382       api_info.context_handle = NULL;
1383       api_info.async_handle = NULL;
1384     }
1385 
1386   if (profiling_p)
1387     goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1388 			      &api_info);
1389 
1390   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1391       || (flags & GOACC_FLAG_HOST_FALLBACK))
1392     {
1393       prof_info.device_type = acc_device_host;
1394       api_info.device_type = prof_info.device_type;
1395 
1396       goto out_prof;
1397     }
1398 
1399   if (num_waits)
1400     goacc_wait (async, num_waits, ap);
1401 
1402   goacc_aq aq = get_goacc_asyncqueue (async);
1403 
1404   if (data_enter)
1405     goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1406   else
1407     goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1408 
1409  out_prof:
1410   if (profiling_p)
1411     {
1412       prof_info.event_type
1413 	= data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1414       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1415       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1416 				&api_info);
1417 
1418       thr->prof_info = NULL;
1419       thr->api_info = NULL;
1420     }
1421 }
1422 
1423 /* Legacy entry point (GCC 11 and earlier).  */
1424 
1425 void
GOACC_enter_exit_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1426 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1427 		       size_t *sizes, unsigned short *kinds, int async,
1428 		       int num_waits, ...)
1429 {
1430   /* Determine if this is an OpenACC "enter data".  */
1431   bool data_enter = false;
1432   for (size_t i = 0; i < mapnum; ++i)
1433     {
1434       unsigned char kind = kinds[i] & 0xff;
1435 
1436       if (kind == GOMP_MAP_POINTER
1437 	  || kind == GOMP_MAP_TO_PSET
1438 	  || kind == GOMP_MAP_STRUCT)
1439 	continue;
1440 
1441       if (kind == GOMP_MAP_FORCE_ALLOC
1442 	  || kind == GOMP_MAP_FORCE_PRESENT
1443 	  || kind == GOMP_MAP_ATTACH
1444 	  || kind == GOMP_MAP_FORCE_TO
1445 	  || kind == GOMP_MAP_TO
1446 	  || kind == GOMP_MAP_ALLOC)
1447 	{
1448 	  data_enter = true;
1449 	  break;
1450 	}
1451 
1452       if (kind == GOMP_MAP_RELEASE
1453 	  || kind == GOMP_MAP_DELETE
1454 	  || kind == GOMP_MAP_DETACH
1455 	  || kind == GOMP_MAP_FORCE_DETACH
1456 	  || kind == GOMP_MAP_FROM
1457 	  || kind == GOMP_MAP_FORCE_FROM)
1458 	break;
1459 
1460       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1461 		      kind);
1462     }
1463 
1464   va_list ap;
1465   va_start (ap, num_waits);
1466   goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1467 				  data_enter, async, num_waits, &ap);
1468   va_end (ap);
1469 }
1470 
1471 void
GOACC_enter_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1472 GOACC_enter_data (int flags_m, size_t mapnum, void **hostaddrs,
1473 		  size_t *sizes, unsigned short *kinds, int async,
1474 		  int num_waits, ...)
1475 {
1476   va_list ap;
1477   va_start (ap, num_waits);
1478   goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1479 				  true, async, num_waits, &ap);
1480   va_end (ap);
1481 }
1482 
1483 void
GOACC_exit_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1484 GOACC_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1485 		 size_t *sizes, unsigned short *kinds, int async,
1486 		 int num_waits, ...)
1487 {
1488   va_list ap;
1489   va_start (ap, num_waits);
1490   goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1491 				  false, async, num_waits, &ap);
1492   va_end (ap);
1493 }
1494 
1495 void
GOACC_declare(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)1496 GOACC_declare (int flags_m, size_t mapnum,
1497 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
1498 {
1499   for (size_t i = 0; i < mapnum; i++)
1500     {
1501       unsigned char kind = kinds[i] & 0xff;
1502 
1503       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1504 	continue;
1505 
1506       switch (kind)
1507 	{
1508 	case GOMP_MAP_ALLOC:
1509 	  if (acc_is_present (hostaddrs[i], sizes[i]))
1510 	    continue;
1511 	  /* FALLTHRU */
1512 	case GOMP_MAP_FORCE_ALLOC:
1513 	case GOMP_MAP_TO:
1514 	case GOMP_MAP_FORCE_TO:
1515 	  goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i],
1516 					  &kinds[i], true, GOMP_ASYNC_SYNC, 0, NULL);
1517 	  break;
1518 
1519 	case GOMP_MAP_FROM:
1520 	case GOMP_MAP_FORCE_FROM:
1521 	case GOMP_MAP_RELEASE:
1522 	case GOMP_MAP_DELETE:
1523 	  goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i],
1524 					  &kinds[i], false, GOMP_ASYNC_SYNC, 0, NULL);
1525 	  break;
1526 
1527 	case GOMP_MAP_FORCE_DEVICEPTR:
1528 	  break;
1529 
1530 	case GOMP_MAP_FORCE_PRESENT:
1531 	  if (!acc_is_present (hostaddrs[i], sizes[i]))
1532 	    gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1533 			(unsigned long) sizes[i]);
1534 	  break;
1535 
1536 	default:
1537 	  assert (0);
1538 	  break;
1539 	}
1540     }
1541 }
1542