1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2021 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35 
36 /* Return block containing [H->S), or NULL if not contained.  The device lock
37    for DEV must be locked on entry, and remains locked on exit.  */
38 
39 static splay_tree_key
lookup_host(struct gomp_device_descr * dev,void * h,size_t s)40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42   struct splay_tree_key_s node;
43   splay_tree_key key;
44 
45   node.host_start = (uintptr_t) h;
46   node.host_end = (uintptr_t) h + s;
47 
48   key = splay_tree_lookup (&dev->mem_map, &node);
49 
50   return key;
51 }
52 
53 /* Helper for lookup_dev.  Iterate over splay tree.  */
54 
55 static splay_tree_key
lookup_dev_1(splay_tree_node node,uintptr_t d,size_t s)56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58   splay_tree_key key = &node->key;
59   if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60     return key;
61 
62   key = NULL;
63   if (node->left)
64     key = lookup_dev_1 (node->left, d, s);
65   if (!key && node->right)
66     key = lookup_dev_1 (node->right, d, s);
67 
68   return key;
69 }
70 
71 /* Return block containing [D->S), or NULL if not contained.
72 
73    This iterates over the splay tree.  This is not expected to be a common
74    operation.
75 
76    The device lock associated with MEM_MAP must be locked on entry, and remains
77    locked on exit.  */
78 
79 static splay_tree_key
lookup_dev(splay_tree mem_map,void * d,size_t s)80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82   if (!mem_map || !mem_map->root)
83     return NULL;
84 
85   return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87 
88 
89 /* OpenACC is silent on how memory exhaustion is indicated.  We return
90    NULL.  */
91 
92 void *
acc_malloc(size_t s)93 acc_malloc (size_t s)
94 {
95   if (!s)
96     return NULL;
97 
98   goacc_lazy_initialize ();
99 
100   struct goacc_thread *thr = goacc_thread ();
101 
102   assert (thr->dev);
103 
104   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105     return malloc (s);
106 
107   acc_prof_info prof_info;
108   acc_api_info api_info;
109   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110 
111   void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112 
113   if (profiling_p)
114     {
115       thr->prof_info = NULL;
116       thr->api_info = NULL;
117     }
118 
119   return res;
120 }
121 
122 void
acc_free(void * d)123 acc_free (void *d)
124 {
125   splay_tree_key k;
126 
127   if (!d)
128     return;
129 
130   struct goacc_thread *thr = goacc_thread ();
131 
132   assert (thr && thr->dev);
133 
134   struct gomp_device_descr *acc_dev = thr->dev;
135 
136   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137     return free (d);
138 
139   acc_prof_info prof_info;
140   acc_api_info api_info;
141   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142 
143   gomp_mutex_lock (&acc_dev->lock);
144 
145   /* We don't have to call lazy open here, as the ptr value must have
146      been returned by acc_malloc.  It's not permitted to pass NULL in
147      (unless you got that null from acc_malloc).  */
148   if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149     {
150       void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151       void *h = k->host_start + offset;
152       size_t h_size = k->host_end - k->host_start;
153       gomp_mutex_unlock (&acc_dev->lock);
154       /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 	 used in a mapping".  */
156       gomp_fatal ("refusing to free device memory space at %p that is still"
157 		  " mapped at [%p,+%d]",
158 		  d, h, (int) h_size);
159     }
160   else
161     gomp_mutex_unlock (&acc_dev->lock);
162 
163   if (!acc_dev->free_func (acc_dev->target_id, d))
164     gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165 
166   if (profiling_p)
167     {
168       thr->prof_info = NULL;
169       thr->api_info = NULL;
170     }
171 }
172 
173 static void
memcpy_tofrom_device(bool from,void * d,void * h,size_t s,int async,const char * libfnname)174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 		      const char *libfnname)
176 {
177   /* No need to call lazy open here, as the device pointer must have
178      been obtained from a routine that did that.  */
179   struct goacc_thread *thr = goacc_thread ();
180 
181   assert (thr && thr->dev);
182 
183   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184     {
185       if (from)
186 	memmove (h, d, s);
187       else
188 	memmove (d, h, s);
189       return;
190     }
191 
192   acc_prof_info prof_info;
193   acc_api_info api_info;
194   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195   if (profiling_p)
196     {
197       prof_info.async = async;
198       prof_info.async_queue = prof_info.async;
199     }
200 
201   goacc_aq aq = get_goacc_asyncqueue (async);
202   if (from)
203     gomp_copy_dev2host (thr->dev, aq, h, d, s);
204   else
205     gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
206 
207   if (profiling_p)
208     {
209       thr->prof_info = NULL;
210       thr->api_info = NULL;
211     }
212 }
213 
214 void
acc_memcpy_to_device(void * d,void * h,size_t s)215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217   memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219 
220 void
acc_memcpy_to_device_async(void * d,void * h,size_t s,int async)221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223   memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225 
226 void
acc_memcpy_from_device(void * h,void * d,size_t s)227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229   memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231 
232 void
acc_memcpy_from_device_async(void * h,void * d,size_t s,int async)233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235   memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237 
238 /* Return the device pointer that corresponds to host data H.  Or NULL
239    if no mapping.  */
240 
241 void *
acc_deviceptr(void * h)242 acc_deviceptr (void *h)
243 {
244   splay_tree_key n;
245   void *d;
246   void *offset;
247 
248   goacc_lazy_initialize ();
249 
250   struct goacc_thread *thr = goacc_thread ();
251   struct gomp_device_descr *dev = thr->dev;
252 
253   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254     return h;
255 
256   /* In the following, no OpenACC Profiling Interface events can possibly be
257      generated.  */
258 
259   gomp_mutex_lock (&dev->lock);
260 
261   n = lookup_host (dev, h, 1);
262 
263   if (!n)
264     {
265       gomp_mutex_unlock (&dev->lock);
266       return NULL;
267     }
268 
269   offset = h - n->host_start;
270 
271   d = n->tgt->tgt_start + n->tgt_offset + offset;
272 
273   gomp_mutex_unlock (&dev->lock);
274 
275   return d;
276 }
277 
278 /* Return the host pointer that corresponds to device data D.  Or NULL
279    if no mapping.  */
280 
281 void *
acc_hostptr(void * d)282 acc_hostptr (void *d)
283 {
284   splay_tree_key n;
285   void *h;
286   void *offset;
287 
288   goacc_lazy_initialize ();
289 
290   struct goacc_thread *thr = goacc_thread ();
291   struct gomp_device_descr *acc_dev = thr->dev;
292 
293   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294     return d;
295 
296   /* In the following, no OpenACC Profiling Interface events can possibly be
297      generated.  */
298 
299   gomp_mutex_lock (&acc_dev->lock);
300 
301   n = lookup_dev (&acc_dev->mem_map, d, 1);
302 
303   if (!n)
304     {
305       gomp_mutex_unlock (&acc_dev->lock);
306       return NULL;
307     }
308 
309   offset = d - n->tgt->tgt_start + n->tgt_offset;
310 
311   h = n->host_start + offset;
312 
313   gomp_mutex_unlock (&acc_dev->lock);
314 
315   return h;
316 }
317 
318 /* Return 1 if host data [H,+S] is present on the device.  */
319 
320 int
acc_is_present(void * h,size_t s)321 acc_is_present (void *h, size_t s)
322 {
323   splay_tree_key n;
324 
325   if (!s || !h)
326     return 0;
327 
328   goacc_lazy_initialize ();
329 
330   struct goacc_thread *thr = goacc_thread ();
331   struct gomp_device_descr *acc_dev = thr->dev;
332 
333   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334     return h != NULL;
335 
336   /* In the following, no OpenACC Profiling Interface events can possibly be
337      generated.  */
338 
339   gomp_mutex_lock (&acc_dev->lock);
340 
341   n = lookup_host (acc_dev, h, s);
342 
343   if (n && ((uintptr_t)h < n->host_start
344 	    || (uintptr_t)h + s > n->host_end
345 	    || s > n->host_end - n->host_start))
346     n = NULL;
347 
348   gomp_mutex_unlock (&acc_dev->lock);
349 
350   return n != NULL;
351 }
352 
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354 
355 void
acc_map_data(void * h,void * d,size_t s)356 acc_map_data (void *h, void *d, size_t s)
357 {
358   size_t mapnum = 1;
359   void *hostaddrs = h;
360   void *devaddrs = d;
361   size_t sizes = s;
362   unsigned short kinds = GOMP_MAP_ALLOC;
363 
364   goacc_lazy_initialize ();
365 
366   struct goacc_thread *thr = goacc_thread ();
367   struct gomp_device_descr *acc_dev = thr->dev;
368 
369   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
370     {
371       if (d != h)
372         gomp_fatal ("cannot map data on shared-memory system");
373     }
374   else
375     {
376       struct goacc_thread *thr = goacc_thread ();
377 
378       if (!d || !h || !s)
379 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380                     (void *)h, (int)s, (void *)d, (int)s);
381 
382       acc_prof_info prof_info;
383       acc_api_info api_info;
384       bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
385 
386       gomp_mutex_lock (&acc_dev->lock);
387 
388       if (lookup_host (acc_dev, h, s))
389         {
390 	  gomp_mutex_unlock (&acc_dev->lock);
391 	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
392 		      (int)s);
393 	}
394 
395       if (lookup_dev (&thr->dev->mem_map, d, s))
396         {
397 	  gomp_mutex_unlock (&acc_dev->lock);
398 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
399 		      (int)s);
400 	}
401 
402       gomp_mutex_unlock (&acc_dev->lock);
403 
404       struct target_mem_desc *tgt
405 	= goacc_map_vars (acc_dev, NULL, mapnum, &hostaddrs, &devaddrs, &sizes,
406 			  &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407       assert (tgt);
408       assert (tgt->list_count == 1);
409       splay_tree_key n = tgt->list[0].key;
410       assert (n);
411       assert (n->refcount == 1);
412       assert (n->dynamic_refcount == 0);
413       /* Special reference counting behavior.  */
414       n->refcount = REFCOUNT_INFINITY;
415 
416       if (profiling_p)
417 	{
418 	  thr->prof_info = NULL;
419 	  thr->api_info = NULL;
420 	}
421     }
422 }
423 
424 void
acc_unmap_data(void * h)425 acc_unmap_data (void *h)
426 {
427   struct goacc_thread *thr = goacc_thread ();
428   struct gomp_device_descr *acc_dev = thr->dev;
429 
430   /* No need to call lazy open, as the address must have been mapped.  */
431 
432   /* This is a no-op on shared-memory targets.  */
433   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
434     return;
435 
436   acc_prof_info prof_info;
437   acc_api_info api_info;
438   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
439 
440   gomp_mutex_lock (&acc_dev->lock);
441 
442   splay_tree_key n = lookup_host (acc_dev, h, 1);
443 
444   if (!n)
445     {
446       gomp_mutex_unlock (&acc_dev->lock);
447       gomp_fatal ("%p is not a mapped block", (void *)h);
448     }
449 
450   size_t host_size = n->host_end - n->host_start;
451 
452   if (n->host_start != (uintptr_t) h)
453     {
454       gomp_mutex_unlock (&acc_dev->lock);
455       gomp_fatal ("[%p,%d] surrounds %p",
456 		  (void *) n->host_start, (int) host_size, (void *) h);
457     }
458   /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459      'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
460      the different 'REFCOUNT_INFINITY' cases, or simply separate
461      'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462      etc.)?  */
463   else if (n->refcount != REFCOUNT_INFINITY)
464     {
465       gomp_mutex_unlock (&acc_dev->lock);
466       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 		  " by 'acc_map_data'",
468 		  (void *) h, (int) host_size);
469     }
470 
471   struct target_mem_desc *tgt = n->tgt;
472 
473   if (tgt->refcount == REFCOUNT_INFINITY)
474     {
475       gomp_mutex_unlock (&acc_dev->lock);
476       gomp_fatal ("cannot unmap target block");
477     }
478 
479   /* Above, we've verified that the mapping must have been set up by
480      'acc_map_data'.  */
481   assert (tgt->refcount == 1);
482 
483   /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484      from freeing the target memory.  */
485   tgt->tgt_end = 0;
486   tgt->to_free = NULL;
487 
488   bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
489   assert (is_tgt_unmapped);
490 
491   gomp_mutex_unlock (&acc_dev->lock);
492 
493   if (profiling_p)
494     {
495       thr->prof_info = NULL;
496       thr->api_info = NULL;
497     }
498 }
499 
500 
501 /* Helper function to map a single dynamic data item, represented by a single
502    mapping.  The acc_dev->lock should be held on entry, and remains locked on
503    exit.  */
504 
505 static void *
goacc_map_var_existing(struct gomp_device_descr * acc_dev,void * hostaddr,size_t size,splay_tree_key n)506 goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
507 			size_t size, splay_tree_key n)
508 {
509   assert (n);
510 
511   /* Present. */
512   void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr
513 	    - n->host_start);
514 
515   if (hostaddr + size > (void *) n->host_end)
516     {
517       gomp_mutex_unlock (&acc_dev->lock);
518       gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size);
519     }
520 
521   assert (n->refcount != REFCOUNT_LINK);
522   if (n->refcount != REFCOUNT_INFINITY)
523     n->refcount++;
524   n->dynamic_refcount++;
525 
526   return d;
527 }
528 
529 /* Enter dynamic mapping for a single datum.  Return the device pointer.  */
530 
531 static void *
goacc_enter_datum(void ** hostaddrs,size_t * sizes,void * kinds,int async)532 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
533 {
534   void *d;
535   splay_tree_key n;
536 
537   if (!hostaddrs[0] || !sizes[0])
538     gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
539 
540   goacc_lazy_initialize ();
541 
542   struct goacc_thread *thr = goacc_thread ();
543   struct gomp_device_descr *acc_dev = thr->dev;
544 
545   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
546     return hostaddrs[0];
547 
548   acc_prof_info prof_info;
549   acc_api_info api_info;
550   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
551   if (profiling_p)
552     {
553       prof_info.async = async;
554       prof_info.async_queue = prof_info.async;
555     }
556 
557   gomp_mutex_lock (&acc_dev->lock);
558 
559   n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
560   if (n)
561     {
562       d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n);
563       gomp_mutex_unlock (&acc_dev->lock);
564     }
565   else
566     {
567       const size_t mapnum = 1;
568 
569       gomp_mutex_unlock (&acc_dev->lock);
570 
571       goacc_aq aq = get_goacc_asyncqueue (async);
572 
573       struct target_mem_desc *tgt
574 	= goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
575 			  kinds, true, GOMP_MAP_VARS_ENTER_DATA);
576       assert (tgt);
577       assert (tgt->list_count == 1);
578       n = tgt->list[0].key;
579       assert (n);
580       assert (n->refcount == 1);
581       assert (n->dynamic_refcount == 0);
582       n->dynamic_refcount++;
583 
584       d = (void *) tgt->tgt_start;
585     }
586 
587   if (profiling_p)
588     {
589       thr->prof_info = NULL;
590       thr->api_info = NULL;
591     }
592 
593   return d;
594 }
595 
596 void *
acc_create(void * h,size_t s)597 acc_create (void *h, size_t s)
598 {
599   unsigned short kinds[1] = { GOMP_MAP_ALLOC };
600   return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
601 }
602 
603 void
acc_create_async(void * h,size_t s,int async)604 acc_create_async (void *h, size_t s, int async)
605 {
606   unsigned short kinds[1] = { GOMP_MAP_ALLOC };
607   goacc_enter_datum (&h, &s, &kinds, async);
608 }
609 
610 /* acc_present_or_create used to be what acc_create is now.  */
611 /* acc_pcreate is acc_present_or_create by a different name.  */
612 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_create,acc_present_or_create)613 strong_alias (acc_create, acc_present_or_create)
614 strong_alias (acc_create, acc_pcreate)
615 #else
616 void *
617 acc_present_or_create (void *h, size_t s)
618 {
619   return acc_create (h, s);
620 }
621 
622 void *
623 acc_pcreate (void *h, size_t s)
624 {
625   return acc_create (h, s);
626 }
627 #endif
628 
629 void *
630 acc_copyin (void *h, size_t s)
631 {
632   unsigned short kinds[1] = { GOMP_MAP_TO };
633   return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
634 }
635 
636 void
acc_copyin_async(void * h,size_t s,int async)637 acc_copyin_async (void *h, size_t s, int async)
638 {
639   unsigned short kinds[1] = { GOMP_MAP_TO };
640   goacc_enter_datum (&h, &s, &kinds, async);
641 }
642 
643 /* acc_present_or_copyin used to be what acc_copyin is now.  */
644 /* acc_pcopyin is acc_present_or_copyin by a different name.  */
645 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_copyin,acc_present_or_copyin)646 strong_alias (acc_copyin, acc_present_or_copyin)
647 strong_alias (acc_copyin, acc_pcopyin)
648 #else
649 void *
650 acc_present_or_copyin (void *h, size_t s)
651 {
652   return acc_copyin (h, s);
653 }
654 
655 void *
656 acc_pcopyin (void *h, size_t s)
657 {
658   return acc_copyin (h, s);
659 }
660 #endif
661 
662 
663 /* Helper function to unmap a single data item.  Device lock should be held on
664    entry, and remains locked on exit.  */
665 
666 static void
667 goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
668 		    unsigned short kind, splay_tree_key n, goacc_aq aq)
669 {
670   assert (kind != GOMP_MAP_DETACH
671 	  && kind != GOMP_MAP_FORCE_DETACH);
672 
673   if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
674     {
675       size_t host_size = n->host_end - n->host_start;
676       gomp_mutex_unlock (&acc_dev->lock);
677       gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
678 		  (void *) h, (int) s, (void *) n->host_start, (int) host_size);
679     }
680 
681   bool finalize = (kind == GOMP_MAP_FORCE_FROM
682 		   || kind == GOMP_MAP_DELETE);
683 
684   assert (n->refcount != REFCOUNT_LINK);
685   if (n->refcount != REFCOUNT_INFINITY
686       && n->refcount < n->dynamic_refcount)
687     {
688       gomp_mutex_unlock (&acc_dev->lock);
689       gomp_fatal ("Dynamic reference counting assert fail\n");
690     }
691 
692   if (finalize)
693     {
694       if (n->refcount != REFCOUNT_INFINITY)
695 	n->refcount -= n->dynamic_refcount;
696       n->dynamic_refcount = 0;
697     }
698   else if (n->dynamic_refcount)
699     {
700       if (n->refcount != REFCOUNT_INFINITY)
701 	n->refcount--;
702       n->dynamic_refcount--;
703     }
704 
705   if (n->refcount == 0)
706     {
707       bool copyout = (kind == GOMP_MAP_FROM
708 		      || kind == GOMP_MAP_FORCE_FROM);
709       if (copyout)
710 	{
711 	  void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
712 			      + (uintptr_t) h - n->host_start);
713 	  gomp_copy_dev2host (acc_dev, aq, h, d, s);
714 	}
715 
716       if (aq)
717 	/* TODO We can't do the 'is_tgt_unmapped' checking -- see the
718 	   'gomp_unref_tgt' comment in
719 	   <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
720 	   PR92881.  */
721 	gomp_remove_var_async (acc_dev, n, aq);
722       else
723 	{
724 	  size_t num_mappings = 0;
725 	  /* If the target_mem_desc represents a single data mapping, we can
726 	     check that it is freed when this splay tree key's refcount reaches
727 	     zero.  Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
728 	     multiple members), fall back to skipping the test.  */
729 	  for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
730 	    if (n->tgt->list[l_i].key
731 		&& !n->tgt->list[l_i].is_attach)
732 	      ++num_mappings;
733 	  bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
734 	  assert (is_tgt_unmapped || num_mappings > 1);
735 	}
736     }
737 }
738 
739 
740 /* Exit a dynamic mapping for a single variable.  */
741 
742 static void
goacc_exit_datum(void * h,size_t s,unsigned short kind,int async)743 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
744 {
745   /* No need to call lazy open, as the data must already have been
746      mapped.  */
747 
748   kind &= 0xff;
749 
750   struct goacc_thread *thr = goacc_thread ();
751   struct gomp_device_descr *acc_dev = thr->dev;
752 
753   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
754     return;
755 
756   acc_prof_info prof_info;
757   acc_api_info api_info;
758   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
759   if (profiling_p)
760     {
761       prof_info.async = async;
762       prof_info.async_queue = prof_info.async;
763     }
764 
765   gomp_mutex_lock (&acc_dev->lock);
766 
767   splay_tree_key n = lookup_host (acc_dev, h, s);
768   /* Non-present data is a no-op: PR92726, RP92970, PR92984.  */
769   if (n)
770     {
771       goacc_aq aq = get_goacc_asyncqueue (async);
772       goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq);
773     }
774 
775   gomp_mutex_unlock (&acc_dev->lock);
776 
777   if (profiling_p)
778     {
779       thr->prof_info = NULL;
780       thr->api_info = NULL;
781     }
782 }
783 
784 void
acc_delete(void * h,size_t s)785 acc_delete (void *h , size_t s)
786 {
787   goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
788 }
789 
790 void
acc_delete_async(void * h,size_t s,int async)791 acc_delete_async (void *h , size_t s, int async)
792 {
793   goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
794 }
795 
796 void
acc_delete_finalize(void * h,size_t s)797 acc_delete_finalize (void *h , size_t s)
798 {
799   goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
800 }
801 
802 void
acc_delete_finalize_async(void * h,size_t s,int async)803 acc_delete_finalize_async (void *h , size_t s, int async)
804 {
805   goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
806 }
807 
808 void
acc_copyout(void * h,size_t s)809 acc_copyout (void *h, size_t s)
810 {
811   goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
812 }
813 
814 void
acc_copyout_async(void * h,size_t s,int async)815 acc_copyout_async (void *h, size_t s, int async)
816 {
817   goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
818 }
819 
820 void
acc_copyout_finalize(void * h,size_t s)821 acc_copyout_finalize (void *h, size_t s)
822 {
823   goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
824 }
825 
826 void
acc_copyout_finalize_async(void * h,size_t s,int async)827 acc_copyout_finalize_async (void *h, size_t s, int async)
828 {
829   goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
830 }
831 
832 static void
update_dev_host(int is_dev,void * h,size_t s,int async)833 update_dev_host (int is_dev, void *h, size_t s, int async)
834 {
835   splay_tree_key n;
836   void *d;
837 
838   goacc_lazy_initialize ();
839 
840   struct goacc_thread *thr = goacc_thread ();
841   struct gomp_device_descr *acc_dev = thr->dev;
842 
843   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
844     return;
845 
846   /* Fortran optional arguments that are non-present result in a
847      NULL host address here.  This can safely be ignored as it is
848      not possible to 'update' a non-present optional argument.  */
849   if (h == NULL)
850     return;
851 
852   acc_prof_info prof_info;
853   acc_api_info api_info;
854   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
855   if (profiling_p)
856     {
857       prof_info.async = async;
858       prof_info.async_queue = prof_info.async;
859     }
860 
861   gomp_mutex_lock (&acc_dev->lock);
862 
863   n = lookup_host (acc_dev, h, s);
864 
865   if (!n)
866     {
867       gomp_mutex_unlock (&acc_dev->lock);
868       gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
869     }
870 
871   d = (void *) (n->tgt->tgt_start + n->tgt_offset
872 		+ (uintptr_t) h - n->host_start);
873 
874   goacc_aq aq = get_goacc_asyncqueue (async);
875 
876   if (is_dev)
877     gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
878   else
879     gomp_copy_dev2host (acc_dev, aq, h, d, s);
880 
881   gomp_mutex_unlock (&acc_dev->lock);
882 
883   if (profiling_p)
884     {
885       thr->prof_info = NULL;
886       thr->api_info = NULL;
887     }
888 }
889 
890 void
acc_update_device(void * h,size_t s)891 acc_update_device (void *h, size_t s)
892 {
893   update_dev_host (1, h, s, acc_async_sync);
894 }
895 
896 void
acc_update_device_async(void * h,size_t s,int async)897 acc_update_device_async (void *h, size_t s, int async)
898 {
899   update_dev_host (1, h, s, async);
900 }
901 
902 void
acc_update_self(void * h,size_t s)903 acc_update_self (void *h, size_t s)
904 {
905   update_dev_host (0, h, s, acc_async_sync);
906 }
907 
908 void
acc_update_self_async(void * h,size_t s,int async)909 acc_update_self_async (void *h, size_t s, int async)
910 {
911   update_dev_host (0, h, s, async);
912 }
913 
914 void
acc_attach_async(void ** hostaddr,int async)915 acc_attach_async (void **hostaddr, int async)
916 {
917   struct goacc_thread *thr = goacc_thread ();
918   struct gomp_device_descr *acc_dev = thr->dev;
919   goacc_aq aq = get_goacc_asyncqueue (async);
920 
921   struct splay_tree_key_s cur_node;
922   splay_tree_key n;
923 
924   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
925     return;
926 
927   gomp_mutex_lock (&acc_dev->lock);
928 
929   cur_node.host_start = (uintptr_t) hostaddr;
930   cur_node.host_end = cur_node.host_start + sizeof (void *);
931   n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
932 
933   if (n == NULL)
934     {
935       gomp_mutex_unlock (&acc_dev->lock);
936       gomp_fatal ("struct not mapped for acc_attach");
937     }
938 
939   gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
940 		       0, NULL);
941 
942   gomp_mutex_unlock (&acc_dev->lock);
943 }
944 
945 void
acc_attach(void ** hostaddr)946 acc_attach (void **hostaddr)
947 {
948   acc_attach_async (hostaddr, acc_async_sync);
949 }
950 
951 static void
goacc_detach_internal(void ** hostaddr,int async,bool finalize)952 goacc_detach_internal (void **hostaddr, int async, bool finalize)
953 {
954   struct goacc_thread *thr = goacc_thread ();
955   struct gomp_device_descr *acc_dev = thr->dev;
956   struct splay_tree_key_s cur_node;
957   splay_tree_key n;
958   struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
959 
960   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
961     return;
962 
963   gomp_mutex_lock (&acc_dev->lock);
964 
965   cur_node.host_start = (uintptr_t) hostaddr;
966   cur_node.host_end = cur_node.host_start + sizeof (void *);
967   n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
968 
969   if (n == NULL)
970     {
971       gomp_mutex_unlock (&acc_dev->lock);
972       gomp_fatal ("struct not mapped for acc_detach");
973     }
974 
975   gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
976 
977   gomp_mutex_unlock (&acc_dev->lock);
978 }
979 
980 void
acc_detach(void ** hostaddr)981 acc_detach (void **hostaddr)
982 {
983   goacc_detach_internal (hostaddr, acc_async_sync, false);
984 }
985 
986 void
acc_detach_async(void ** hostaddr,int async)987 acc_detach_async (void **hostaddr, int async)
988 {
989   goacc_detach_internal (hostaddr, async, false);
990 }
991 
992 void
acc_detach_finalize(void ** hostaddr)993 acc_detach_finalize (void **hostaddr)
994 {
995   goacc_detach_internal (hostaddr, acc_async_sync, true);
996 }
997 
998 void
acc_detach_finalize_async(void ** hostaddr,int async)999 acc_detach_finalize_async (void **hostaddr, int async)
1000 {
1001   goacc_detach_internal (hostaddr, async, true);
1002 }
1003 
1004 /* Some types of (pointer) variables use several consecutive mappings, which
1005    must be treated as a group for enter/exit data directives.  This function
1006    returns the last mapping in such a group (inclusive), or POS for singleton
1007    mappings.  */
1008 
1009 static int
find_group_last(int pos,size_t mapnum,size_t * sizes,unsigned short * kinds)1010 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
1011 {
1012   unsigned char kind0 = kinds[pos] & 0xff;
1013   int first_pos = pos;
1014 
1015   switch (kind0)
1016     {
1017     case GOMP_MAP_TO_PSET:
1018       if (pos + 1 < mapnum
1019 	  && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1020 	return pos + 1;
1021 
1022       while (pos + 1 < mapnum
1023 	     && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1024 	pos++;
1025       /* We expect at least one GOMP_MAP_POINTER (if not a single
1026 	 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET.  */
1027       assert (pos > first_pos);
1028       break;
1029 
1030     case GOMP_MAP_STRUCT:
1031       pos += sizes[pos];
1032       break;
1033 
1034     case GOMP_MAP_POINTER:
1035     case GOMP_MAP_ALWAYS_POINTER:
1036       /* These mappings are only expected after some other mapping.  If we
1037 	 see one by itself, something has gone wrong.  */
1038       gomp_fatal ("unexpected mapping");
1039       break;
1040 
1041     case GOMP_MAP_ATTACH:
1042       break;
1043 
1044     default:
1045       /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1046 	 mapping.  */
1047       if (pos + 1 < mapnum)
1048 	{
1049 	  unsigned char kind1 = kinds[pos + 1] & 0xff;
1050 	  if (kind1 == GOMP_MAP_ALWAYS_POINTER)
1051 	    return pos + 1;
1052 	}
1053 
1054       /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1055 	 mapping.  */
1056       if (pos + 1 < mapnum
1057 	  && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
1058 	return pos + 1;
1059 
1060       /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1061 	 (etc.) mapping.  */
1062       while (pos + 1 < mapnum
1063 	     && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1064 	pos++;
1065     }
1066 
1067   return pos;
1068 }
1069 
1070 /* Map variables for OpenACC "enter data".  We can't just call
1071    goacc_map_vars once, because individual mapped variables might have
1072    "exit data" called for them at different times.  */
1073 
1074 static void
goacc_enter_data_internal(struct gomp_device_descr * acc_dev,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,goacc_aq aq)1075 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1076 			   void **hostaddrs, size_t *sizes,
1077 			   unsigned short *kinds, goacc_aq aq)
1078 {
1079   gomp_mutex_lock (&acc_dev->lock);
1080 
1081   for (size_t i = 0; i < mapnum; i++)
1082     {
1083       splay_tree_key n;
1084       size_t group_last = find_group_last (i, mapnum, sizes, kinds);
1085       bool struct_p = false;
1086       size_t size, groupnum = (group_last - i) + 1;
1087 
1088       switch (kinds[i] & 0xff)
1089 	{
1090 	case GOMP_MAP_STRUCT:
1091 	  {
1092 	    size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
1093 		   - (uintptr_t) hostaddrs[i];
1094 	    struct_p = true;
1095 	  }
1096 	  break;
1097 
1098 	case GOMP_MAP_ATTACH:
1099 	  size = sizeof (void *);
1100 	  break;
1101 
1102 	default:
1103 	  size = sizes[i];
1104 	}
1105 
1106       n = lookup_host (acc_dev, hostaddrs[i], size);
1107 
1108       if (n && struct_p)
1109 	{
1110 	  for (size_t j = i + 1; j <= group_last; j++)
1111 	    {
1112 	      struct splay_tree_key_s cur_node;
1113 	      cur_node.host_start = (uintptr_t) hostaddrs[j];
1114 	      cur_node.host_end = cur_node.host_start + sizes[j];
1115 	      splay_tree_key n2
1116 		= splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1117 	      if (!n2
1118 		  || n2->tgt != n->tgt
1119 		  || n2->host_start - n->host_start
1120 		     != n2->tgt_offset - n->tgt_offset)
1121 		{
1122 		  gomp_mutex_unlock (&acc_dev->lock);
1123 		  gomp_fatal ("Trying to map into device [%p..%p) structure "
1124 			      "element when other mapped elements from the "
1125 			      "same structure weren't mapped together with "
1126 			      "it", (void *) cur_node.host_start,
1127 			      (void *) cur_node.host_end);
1128 		}
1129 	    }
1130 	  /* This is a special case because we must increment the refcount by
1131 	     the number of mapped struct elements, rather than by one.  */
1132 	  if (n->refcount != REFCOUNT_INFINITY)
1133 	    n->refcount += groupnum - 1;
1134 	  n->dynamic_refcount += groupnum - 1;
1135 	}
1136       else if (n && groupnum == 1)
1137 	{
1138 	  void *h = hostaddrs[i];
1139 	  size_t s = sizes[i];
1140 
1141 	  if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
1142 	    {
1143 	      gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
1144 				   (uintptr_t) h, s, NULL);
1145 	      /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1146 		 reference counts ('n->refcount', 'n->dynamic_refcount').  */
1147 	    }
1148 	  else
1149 	    goacc_map_var_existing (acc_dev, h, s, n);
1150 	}
1151       else if (n && groupnum > 1)
1152 	{
1153 	  assert (n->refcount != REFCOUNT_INFINITY
1154 		  && n->refcount != REFCOUNT_LINK);
1155 
1156 	  for (size_t j = i + 1; j <= group_last; j++)
1157 	    if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
1158 	      {
1159 		splay_tree_key m
1160 		  = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
1161 		gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
1162 				     (uintptr_t) hostaddrs[j], sizes[j], NULL);
1163 	      }
1164 
1165 	  bool processed = false;
1166 
1167 	  struct target_mem_desc *tgt = n->tgt;
1168 	  for (size_t j = 0; j < tgt->list_count; j++)
1169 	    if (tgt->list[j].key == n)
1170 	      {
1171 		/* We are processing a group of mappings (e.g.
1172 		   [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
1173 		   Find the right group in the target_mem_desc's variable
1174 		   list, and increment the refcounts for each item in that
1175 		   group.  */
1176 		for (size_t k = 0; k < groupnum; k++)
1177 		  if (j + k < tgt->list_count
1178 		      && tgt->list[j + k].key
1179 		      && !tgt->list[j + k].is_attach)
1180 		    {
1181 		      tgt->list[j + k].key->refcount++;
1182 		      tgt->list[j + k].key->dynamic_refcount++;
1183 		    }
1184 		processed = true;
1185 		break;
1186 	      }
1187 
1188 	  if (!processed)
1189 	    {
1190 	      gomp_mutex_unlock (&acc_dev->lock);
1191 	      gomp_fatal ("dynamic refcount incrementing failed for "
1192 			  "pointer/pset");
1193 	    }
1194 	}
1195       else if (hostaddrs[i])
1196 	{
1197 	  /* The data is not mapped already.  Map it now, unless the first
1198 	     member in the group has a NULL pointer (e.g. a non-present
1199 	     optional parameter).  */
1200 	  gomp_mutex_unlock (&acc_dev->lock);
1201 
1202 	  struct target_mem_desc *tgt
1203 	    = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
1204 			      &sizes[i], &kinds[i], true,
1205 			      GOMP_MAP_VARS_ENTER_DATA);
1206 	  assert (tgt);
1207 
1208 	  gomp_mutex_lock (&acc_dev->lock);
1209 
1210 	  for (size_t j = 0; j < tgt->list_count; j++)
1211 	    {
1212 	      n = tgt->list[j].key;
1213 	      if (n && !tgt->list[j].is_attach)
1214 		n->dynamic_refcount++;
1215 	    }
1216 	}
1217 
1218       i = group_last;
1219     }
1220 
1221   gomp_mutex_unlock (&acc_dev->lock);
1222 }
1223 
1224 /* Unmap variables for OpenACC "exit data".  */
1225 
1226 static void
goacc_exit_data_internal(struct gomp_device_descr * acc_dev,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,goacc_aq aq)1227 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1228 			  void **hostaddrs, size_t *sizes,
1229 			  unsigned short *kinds, goacc_aq aq)
1230 {
1231   gomp_mutex_lock (&acc_dev->lock);
1232 
1233   /* Handle "detach" before copyback/deletion of mapped data.  */
1234   for (size_t i = 0; i < mapnum; ++i)
1235     {
1236       unsigned char kind = kinds[i] & 0xff;
1237       bool finalize = false;
1238       switch (kind)
1239 	{
1240 	case GOMP_MAP_FORCE_DETACH:
1241 	  finalize = true;
1242 	  /* Fallthrough.  */
1243 
1244 	case GOMP_MAP_DETACH:
1245 	  {
1246 	    struct splay_tree_key_s cur_node;
1247 	    uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1248 	    cur_node.host_start = hostaddr;
1249 	    cur_node.host_end = cur_node.host_start + sizeof (void *);
1250 	    splay_tree_key n
1251 	      = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1252 
1253 	    if (n == NULL)
1254 	      {
1255 		gomp_mutex_unlock (&acc_dev->lock);
1256 		gomp_fatal ("struct not mapped for detach operation");
1257 	      }
1258 
1259 	    gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1260 	  }
1261 	  break;
1262 	default:
1263 	  ;
1264 	}
1265     }
1266 
1267   for (size_t i = 0; i < mapnum; ++i)
1268     {
1269       unsigned char kind = kinds[i] & 0xff;
1270 
1271       switch (kind)
1272 	{
1273 	case GOMP_MAP_FROM:
1274 	case GOMP_MAP_FORCE_FROM:
1275 	case GOMP_MAP_TO_PSET:
1276 	case GOMP_MAP_POINTER:
1277 	case GOMP_MAP_DELETE:
1278 	case GOMP_MAP_RELEASE:
1279 	  {
1280 	    struct splay_tree_key_s cur_node;
1281 	    size_t size;
1282 	    if (kind == GOMP_MAP_POINTER)
1283 	      size = sizeof (void *);
1284 	    else
1285 	      size = sizes[i];
1286 	    cur_node.host_start = (uintptr_t) hostaddrs[i];
1287 	    cur_node.host_end = cur_node.host_start + size;
1288 	    splay_tree_key n
1289 	      = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1290 
1291 	    if (n == NULL)
1292 	      continue;
1293 
1294 	    goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);
1295 	  }
1296 	  break;
1297 
1298 	case GOMP_MAP_STRUCT:
1299 	  /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1300 	     for all its entries.  This special handling exists for GCC 10.1
1301 	     compatibility; afterwards, we're not generating these no-op
1302 	     'GOMP_MAP_STRUCT's anymore.  */
1303 	  break;
1304 
1305 	case GOMP_MAP_DETACH:
1306 	case GOMP_MAP_FORCE_DETACH:
1307 	  /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
1308 	     reference counts ('n->refcount', 'n->dynamic_refcount').  */
1309 	  break;
1310 
1311 	default:
1312 	  gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1313 			  kind);
1314 	}
1315     }
1316 
1317   gomp_mutex_unlock (&acc_dev->lock);
1318 }
1319 
1320 static void
goacc_enter_exit_data_internal(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,bool data_enter,int async,int num_waits,va_list * ap)1321 goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
1322 				size_t *sizes, unsigned short *kinds,
1323 				bool data_enter, int async, int num_waits,
1324 				va_list *ap)
1325 {
1326   int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1327 
1328   struct goacc_thread *thr;
1329   struct gomp_device_descr *acc_dev;
1330 
1331   goacc_lazy_initialize ();
1332 
1333   thr = goacc_thread ();
1334   acc_dev = thr->dev;
1335 
1336   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1337 
1338   acc_prof_info prof_info;
1339   if (profiling_p)
1340     {
1341       thr->prof_info = &prof_info;
1342 
1343       prof_info.event_type
1344 	= data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1345       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1346       prof_info.version = _ACC_PROF_INFO_VERSION;
1347       prof_info.device_type = acc_device_type (acc_dev->type);
1348       prof_info.device_number = acc_dev->target_id;
1349       prof_info.thread_id = -1;
1350       prof_info.async = async;
1351       prof_info.async_queue = prof_info.async;
1352       prof_info.src_file = NULL;
1353       prof_info.func_name = NULL;
1354       prof_info.line_no = -1;
1355       prof_info.end_line_no = -1;
1356       prof_info.func_line_no = -1;
1357       prof_info.func_end_line_no = -1;
1358     }
1359   acc_event_info enter_exit_data_event_info;
1360   if (profiling_p)
1361     {
1362       enter_exit_data_event_info.other_event.event_type
1363 	= prof_info.event_type;
1364       enter_exit_data_event_info.other_event.valid_bytes
1365 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1366       enter_exit_data_event_info.other_event.parent_construct
1367 	= data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1368       enter_exit_data_event_info.other_event.implicit = 0;
1369       enter_exit_data_event_info.other_event.tool_info = NULL;
1370     }
1371   acc_api_info api_info;
1372   if (profiling_p)
1373     {
1374       thr->api_info = &api_info;
1375 
1376       api_info.device_api = acc_device_api_none;
1377       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1378       api_info.device_type = prof_info.device_type;
1379       api_info.vendor = -1;
1380       api_info.device_handle = NULL;
1381       api_info.context_handle = NULL;
1382       api_info.async_handle = NULL;
1383     }
1384 
1385   if (profiling_p)
1386     goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1387 			      &api_info);
1388 
1389   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1390       || (flags & GOACC_FLAG_HOST_FALLBACK))
1391     {
1392       prof_info.device_type = acc_device_host;
1393       api_info.device_type = prof_info.device_type;
1394 
1395       goto out_prof;
1396     }
1397 
1398   if (num_waits)
1399     goacc_wait (async, num_waits, ap);
1400 
1401   goacc_aq aq = get_goacc_asyncqueue (async);
1402 
1403   if (data_enter)
1404     goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1405   else
1406     goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1407 
1408  out_prof:
1409   if (profiling_p)
1410     {
1411       prof_info.event_type
1412 	= data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1413       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1414       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1415 				&api_info);
1416 
1417       thr->prof_info = NULL;
1418       thr->api_info = NULL;
1419     }
1420 }
1421 
1422 /* Legacy entry point (GCC 11 and earlier).  */
1423 
1424 void
GOACC_enter_exit_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1425 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1426 		       size_t *sizes, unsigned short *kinds, int async,
1427 		       int num_waits, ...)
1428 {
1429   /* Determine if this is an OpenACC "enter data".  */
1430   bool data_enter = false;
1431   for (size_t i = 0; i < mapnum; ++i)
1432     {
1433       unsigned char kind = kinds[i] & 0xff;
1434 
1435       if (kind == GOMP_MAP_POINTER
1436 	  || kind == GOMP_MAP_TO_PSET
1437 	  || kind == GOMP_MAP_STRUCT)
1438 	continue;
1439 
1440       if (kind == GOMP_MAP_FORCE_ALLOC
1441 	  || kind == GOMP_MAP_FORCE_PRESENT
1442 	  || kind == GOMP_MAP_ATTACH
1443 	  || kind == GOMP_MAP_FORCE_TO
1444 	  || kind == GOMP_MAP_TO
1445 	  || kind == GOMP_MAP_ALLOC)
1446 	{
1447 	  data_enter = true;
1448 	  break;
1449 	}
1450 
1451       if (kind == GOMP_MAP_RELEASE
1452 	  || kind == GOMP_MAP_DELETE
1453 	  || kind == GOMP_MAP_DETACH
1454 	  || kind == GOMP_MAP_FORCE_DETACH
1455 	  || kind == GOMP_MAP_FROM
1456 	  || kind == GOMP_MAP_FORCE_FROM)
1457 	break;
1458 
1459       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1460 		      kind);
1461     }
1462 
1463   va_list ap;
1464   va_start (ap, num_waits);
1465   goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1466 				  data_enter, async, num_waits, &ap);
1467   va_end (ap);
1468 }
1469 
1470 void
GOACC_enter_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1471 GOACC_enter_data (int flags_m, size_t mapnum, void **hostaddrs,
1472 		  size_t *sizes, unsigned short *kinds, int async,
1473 		  int num_waits, ...)
1474 {
1475   va_list ap;
1476   va_start (ap, num_waits);
1477   goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1478 				  true, async, num_waits, &ap);
1479   va_end (ap);
1480 }
1481 
1482 void
GOACC_exit_data(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,int async,int num_waits,...)1483 GOACC_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1484 		 size_t *sizes, unsigned short *kinds, int async,
1485 		 int num_waits, ...)
1486 {
1487   va_list ap;
1488   va_start (ap, num_waits);
1489   goacc_enter_exit_data_internal (flags_m, mapnum, hostaddrs, sizes, kinds,
1490 				  false, async, num_waits, &ap);
1491   va_end (ap);
1492 }
1493 
1494 void
GOACC_declare(int flags_m,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)1495 GOACC_declare (int flags_m, size_t mapnum,
1496 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
1497 {
1498   for (size_t i = 0; i < mapnum; i++)
1499     {
1500       unsigned char kind = kinds[i] & 0xff;
1501 
1502       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1503 	continue;
1504 
1505       switch (kind)
1506 	{
1507 	case GOMP_MAP_ALLOC:
1508 	  if (acc_is_present (hostaddrs[i], sizes[i]))
1509 	    continue;
1510 	  /* FALLTHRU */
1511 	case GOMP_MAP_FORCE_ALLOC:
1512 	case GOMP_MAP_TO:
1513 	case GOMP_MAP_FORCE_TO:
1514 	  goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i],
1515 					  &kinds[i], true, GOMP_ASYNC_SYNC, 0, NULL);
1516 	  break;
1517 
1518 	case GOMP_MAP_FROM:
1519 	case GOMP_MAP_FORCE_FROM:
1520 	case GOMP_MAP_RELEASE:
1521 	case GOMP_MAP_DELETE:
1522 	  goacc_enter_exit_data_internal (flags_m, 1, &hostaddrs[i], &sizes[i],
1523 					  &kinds[i], false, GOMP_ASYNC_SYNC, 0, NULL);
1524 	  break;
1525 
1526 	case GOMP_MAP_FORCE_DEVICEPTR:
1527 	  break;
1528 
1529 	case GOMP_MAP_FORCE_PRESENT:
1530 	  if (!acc_is_present (hostaddrs[i], sizes[i]))
1531 	    gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1532 			(unsigned long) sizes[i]);
1533 	  break;
1534 
1535 	default:
1536 	  assert (0);
1537 	  break;
1538 	}
1539     }
1540 }
1541