1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2019 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <string.h>
36 #include <assert.h>
37 
38 /* Return block containing [H->S), or NULL if not contained.  The device lock
39    for DEV must be locked on entry, and remains locked on exit.  */
40 
41 static splay_tree_key
lookup_host(struct gomp_device_descr * dev,void * h,size_t s)42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
43 {
44   struct splay_tree_key_s node;
45   splay_tree_key key;
46 
47   node.host_start = (uintptr_t) h;
48   node.host_end = (uintptr_t) h + s;
49 
50   key = splay_tree_lookup (&dev->mem_map, &node);
51 
52   return key;
53 }
54 
55 /* Return block containing [D->S), or NULL if not contained.
56    The list isn't ordered by device address, so we have to iterate
57    over the whole array.  This is not expected to be a common
58    operation.  The device lock associated with TGT must be locked on entry, and
59    remains locked on exit.  */
60 
61 static splay_tree_key
lookup_dev(struct target_mem_desc * tgt,void * d,size_t s)62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63 {
64   int i;
65   struct target_mem_desc *t;
66 
67   if (!tgt)
68     return NULL;
69 
70   for (t = tgt; t != NULL; t = t->prev)
71     {
72       if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73         break;
74     }
75 
76   if (!t)
77     return NULL;
78 
79   for (i = 0; i < t->list_count; i++)
80     {
81       void * offset;
82 
83       splay_tree_key k = &t->array[i].key;
84       offset = d - t->tgt_start + k->tgt_offset;
85 
86       if (k->host_start + offset <= (void *) k->host_end)
87         return k;
88     }
89 
90   return NULL;
91 }
92 
93 /* OpenACC is silent on how memory exhaustion is indicated.  We return
94    NULL.  */
95 
96 void *
acc_malloc(size_t s)97 acc_malloc (size_t s)
98 {
99   if (!s)
100     return NULL;
101 
102   goacc_lazy_initialize ();
103 
104   struct goacc_thread *thr = goacc_thread ();
105 
106   assert (thr->dev);
107 
108   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
109     return malloc (s);
110 
111   return thr->dev->alloc_func (thr->dev->target_id, s);
112 }
113 
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115    the device address is mapped. We choose to check if it mapped,
116    and if it is, to unmap it. */
117 void
acc_free(void * d)118 acc_free (void *d)
119 {
120   splay_tree_key k;
121 
122   if (!d)
123     return;
124 
125   struct goacc_thread *thr = goacc_thread ();
126 
127   assert (thr && thr->dev);
128 
129   struct gomp_device_descr *acc_dev = thr->dev;
130 
131   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
132     return free (d);
133 
134   gomp_mutex_lock (&acc_dev->lock);
135 
136   /* We don't have to call lazy open here, as the ptr value must have
137      been returned by acc_malloc.  It's not permitted to pass NULL in
138      (unless you got that null from acc_malloc).  */
139   if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
140     {
141       void *offset;
142 
143       offset = d - k->tgt->tgt_start + k->tgt_offset;
144 
145       gomp_mutex_unlock (&acc_dev->lock);
146 
147       acc_unmap_data ((void *)(k->host_start + offset));
148     }
149   else
150     gomp_mutex_unlock (&acc_dev->lock);
151 
152   if (!acc_dev->free_func (acc_dev->target_id, d))
153     gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
154 }
155 
156 static void
memcpy_tofrom_device(bool from,void * d,void * h,size_t s,int async,const char * libfnname)157 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
158 		      const char *libfnname)
159 {
160   /* No need to call lazy open here, as the device pointer must have
161      been obtained from a routine that did that.  */
162   struct goacc_thread *thr = goacc_thread ();
163 
164   assert (thr && thr->dev);
165 
166   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
167     {
168       if (from)
169 	memmove (h, d, s);
170       else
171 	memmove (d, h, s);
172       return;
173     }
174 
175   if (async > acc_async_sync)
176     thr->dev->openacc.async_set_async_func (async);
177 
178   bool ret = (from
179 	      ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
180 	      : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
181 
182   if (async > acc_async_sync)
183     thr->dev->openacc.async_set_async_func (acc_async_sync);
184 
185   if (!ret)
186     gomp_fatal ("error in %s", libfnname);
187 }
188 
189 void
acc_memcpy_to_device(void * d,void * h,size_t s)190 acc_memcpy_to_device (void *d, void *h, size_t s)
191 {
192   memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
193 }
194 
195 void
acc_memcpy_to_device_async(void * d,void * h,size_t s,int async)196 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
197 {
198   memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
199 }
200 
201 void
acc_memcpy_from_device(void * h,void * d,size_t s)202 acc_memcpy_from_device (void *h, void *d, size_t s)
203 {
204   memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
205 }
206 
207 void
acc_memcpy_from_device_async(void * h,void * d,size_t s,int async)208 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
209 {
210   memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
211 }
212 
213 /* Return the device pointer that corresponds to host data H.  Or NULL
214    if no mapping.  */
215 
216 void *
acc_deviceptr(void * h)217 acc_deviceptr (void *h)
218 {
219   splay_tree_key n;
220   void *d;
221   void *offset;
222 
223   goacc_lazy_initialize ();
224 
225   struct goacc_thread *thr = goacc_thread ();
226   struct gomp_device_descr *dev = thr->dev;
227 
228   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
229     return h;
230 
231   gomp_mutex_lock (&dev->lock);
232 
233   n = lookup_host (dev, h, 1);
234 
235   if (!n)
236     {
237       gomp_mutex_unlock (&dev->lock);
238       return NULL;
239     }
240 
241   offset = h - n->host_start;
242 
243   d = n->tgt->tgt_start + n->tgt_offset + offset;
244 
245   gomp_mutex_unlock (&dev->lock);
246 
247   return d;
248 }
249 
250 /* Return the host pointer that corresponds to device data D.  Or NULL
251    if no mapping.  */
252 
253 void *
acc_hostptr(void * d)254 acc_hostptr (void *d)
255 {
256   splay_tree_key n;
257   void *h;
258   void *offset;
259 
260   goacc_lazy_initialize ();
261 
262   struct goacc_thread *thr = goacc_thread ();
263   struct gomp_device_descr *acc_dev = thr->dev;
264 
265   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
266     return d;
267 
268   gomp_mutex_lock (&acc_dev->lock);
269 
270   n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
271 
272   if (!n)
273     {
274       gomp_mutex_unlock (&acc_dev->lock);
275       return NULL;
276     }
277 
278   offset = d - n->tgt->tgt_start + n->tgt_offset;
279 
280   h = n->host_start + offset;
281 
282   gomp_mutex_unlock (&acc_dev->lock);
283 
284   return h;
285 }
286 
287 /* Return 1 if host data [H,+S] is present on the device.  */
288 
289 int
acc_is_present(void * h,size_t s)290 acc_is_present (void *h, size_t s)
291 {
292   splay_tree_key n;
293 
294   if (!s || !h)
295     return 0;
296 
297   goacc_lazy_initialize ();
298 
299   struct goacc_thread *thr = goacc_thread ();
300   struct gomp_device_descr *acc_dev = thr->dev;
301 
302   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
303     return h != NULL;
304 
305   gomp_mutex_lock (&acc_dev->lock);
306 
307   n = lookup_host (acc_dev, h, s);
308 
309   if (n && ((uintptr_t)h < n->host_start
310 	    || (uintptr_t)h + s > n->host_end
311 	    || s > n->host_end - n->host_start))
312     n = NULL;
313 
314   gomp_mutex_unlock (&acc_dev->lock);
315 
316   return n != NULL;
317 }
318 
319 /* Create a mapping for host [H,+S] -> device [D,+S] */
320 
321 void
acc_map_data(void * h,void * d,size_t s)322 acc_map_data (void *h, void *d, size_t s)
323 {
324   struct target_mem_desc *tgt = NULL;
325   size_t mapnum = 1;
326   void *hostaddrs = h;
327   void *devaddrs = d;
328   size_t sizes = s;
329   unsigned short kinds = GOMP_MAP_ALLOC;
330 
331   goacc_lazy_initialize ();
332 
333   struct goacc_thread *thr = goacc_thread ();
334   struct gomp_device_descr *acc_dev = thr->dev;
335 
336   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
337     {
338       if (d != h)
339         gomp_fatal ("cannot map data on shared-memory system");
340     }
341   else
342     {
343       struct goacc_thread *thr = goacc_thread ();
344 
345       if (!d || !h || !s)
346 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
347                     (void *)h, (int)s, (void *)d, (int)s);
348 
349       gomp_mutex_lock (&acc_dev->lock);
350 
351       if (lookup_host (acc_dev, h, s))
352         {
353 	  gomp_mutex_unlock (&acc_dev->lock);
354 	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
355 		      (int)s);
356 	}
357 
358       if (lookup_dev (thr->dev->openacc.data_environ, d, s))
359         {
360 	  gomp_mutex_unlock (&acc_dev->lock);
361 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
362 		      (int)s);
363 	}
364 
365       gomp_mutex_unlock (&acc_dev->lock);
366 
367       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
368 			   &kinds, true, GOMP_MAP_VARS_OPENACC);
369       tgt->list[0].key->refcount = REFCOUNT_INFINITY;
370     }
371 
372   gomp_mutex_lock (&acc_dev->lock);
373   tgt->prev = acc_dev->openacc.data_environ;
374   acc_dev->openacc.data_environ = tgt;
375   gomp_mutex_unlock (&acc_dev->lock);
376 }
377 
378 void
acc_unmap_data(void * h)379 acc_unmap_data (void *h)
380 {
381   struct goacc_thread *thr = goacc_thread ();
382   struct gomp_device_descr *acc_dev = thr->dev;
383 
384   /* No need to call lazy open, as the address must have been mapped.  */
385 
386   /* This is a no-op on shared-memory targets.  */
387   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
388     return;
389 
390   size_t host_size;
391 
392   gomp_mutex_lock (&acc_dev->lock);
393 
394   splay_tree_key n = lookup_host (acc_dev, h, 1);
395   struct target_mem_desc *t;
396 
397   if (!n)
398     {
399       gomp_mutex_unlock (&acc_dev->lock);
400       gomp_fatal ("%p is not a mapped block", (void *)h);
401     }
402 
403   host_size = n->host_end - n->host_start;
404 
405   if (n->host_start != (uintptr_t) h)
406     {
407       gomp_mutex_unlock (&acc_dev->lock);
408       gomp_fatal ("[%p,%d] surrounds %p",
409 		  (void *) n->host_start, (int) host_size, (void *) h);
410     }
411 
412   /* Mark for removal.  */
413   n->refcount = 1;
414 
415   t = n->tgt;
416 
417   if (t->refcount == 2)
418     {
419       struct target_mem_desc *tp;
420 
421       /* This is the last reference, so pull the descriptor off the
422          chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
423          freeing the device memory. */
424       t->tgt_end = 0;
425       t->to_free = 0;
426 
427       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
428 	   tp = t, t = t->prev)
429 	if (n->tgt == t)
430 	  {
431 	    if (tp)
432 	      tp->prev = t->prev;
433 	    else
434 	      acc_dev->openacc.data_environ = t->prev;
435 
436 	    break;
437 	  }
438     }
439 
440   gomp_mutex_unlock (&acc_dev->lock);
441 
442   gomp_unmap_vars (t, true);
443 }
444 
445 #define FLAG_PRESENT (1 << 0)
446 #define FLAG_CREATE (1 << 1)
447 #define FLAG_COPY (1 << 2)
448 
449 static void *
present_create_copy(unsigned f,void * h,size_t s,int async)450 present_create_copy (unsigned f, void *h, size_t s, int async)
451 {
452   void *d;
453   splay_tree_key n;
454 
455   if (!h || !s)
456     gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
457 
458   goacc_lazy_initialize ();
459 
460   struct goacc_thread *thr = goacc_thread ();
461   struct gomp_device_descr *acc_dev = thr->dev;
462 
463   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
464     return h;
465 
466   gomp_mutex_lock (&acc_dev->lock);
467 
468   n = lookup_host (acc_dev, h, s);
469   if (n)
470     {
471       /* Present. */
472       d = (void *) (n->tgt->tgt_start + n->tgt_offset);
473 
474       if (!(f & FLAG_PRESENT))
475         {
476 	  gomp_mutex_unlock (&acc_dev->lock);
477           gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
478         	      (void *)h, (int)s, (void *)d, (int)s);
479 	}
480       if ((h + s) > (void *)n->host_end)
481 	{
482 	  gomp_mutex_unlock (&acc_dev->lock);
483 	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
484 	}
485 
486       if (n->refcount != REFCOUNT_INFINITY)
487 	{
488 	  n->refcount++;
489 	  n->dynamic_refcount++;
490 	}
491       gomp_mutex_unlock (&acc_dev->lock);
492     }
493   else if (!(f & FLAG_CREATE))
494     {
495       gomp_mutex_unlock (&acc_dev->lock);
496       gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
497     }
498   else
499     {
500       struct target_mem_desc *tgt;
501       size_t mapnum = 1;
502       unsigned short kinds;
503       void *hostaddrs = h;
504 
505       if (f & FLAG_COPY)
506 	kinds = GOMP_MAP_TO;
507       else
508 	kinds = GOMP_MAP_ALLOC;
509 
510       gomp_mutex_unlock (&acc_dev->lock);
511 
512       if (async > acc_async_sync)
513 	acc_dev->openacc.async_set_async_func (async);
514 
515       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
516 			   GOMP_MAP_VARS_OPENACC);
517       /* Initialize dynamic refcount.  */
518       tgt->list[0].key->dynamic_refcount = 1;
519 
520       if (async > acc_async_sync)
521 	acc_dev->openacc.async_set_async_func (acc_async_sync);
522 
523       gomp_mutex_lock (&acc_dev->lock);
524 
525       d = tgt->to_free;
526       tgt->prev = acc_dev->openacc.data_environ;
527       acc_dev->openacc.data_environ = tgt;
528 
529       gomp_mutex_unlock (&acc_dev->lock);
530     }
531 
532   return d;
533 }
534 
535 void *
acc_create(void * h,size_t s)536 acc_create (void *h, size_t s)
537 {
538   return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
539 }
540 
541 void
acc_create_async(void * h,size_t s,int async)542 acc_create_async (void *h, size_t s, int async)
543 {
544   present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
545 }
546 
547 /* acc_present_or_create used to be what acc_create is now.  */
548 /* acc_pcreate is acc_present_or_create by a different name.  */
549 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_create,acc_present_or_create)550 strong_alias (acc_create, acc_present_or_create)
551 strong_alias (acc_create, acc_pcreate)
552 #else
553 void *
554 acc_present_or_create (void *h, size_t s)
555 {
556   return acc_create (h, s);
557 }
558 
559 void *
560 acc_pcreate (void *h, size_t s)
561 {
562   return acc_create (h, s);
563 }
564 #endif
565 
566 void *
567 acc_copyin (void *h, size_t s)
568 {
569   return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
570 			      acc_async_sync);
571 }
572 
573 void
acc_copyin_async(void * h,size_t s,int async)574 acc_copyin_async (void *h, size_t s, int async)
575 {
576   present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
577 }
578 
579 /* acc_present_or_copyin used to be what acc_copyin is now.  */
580 /* acc_pcopyin is acc_present_or_copyin by a different name.  */
581 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_copyin,acc_present_or_copyin)582 strong_alias (acc_copyin, acc_present_or_copyin)
583 strong_alias (acc_copyin, acc_pcopyin)
584 #else
585 void *
586 acc_present_or_copyin (void *h, size_t s)
587 {
588   return acc_copyin (h, s);
589 }
590 
591 void *
592 acc_pcopyin (void *h, size_t s)
593 {
594   return acc_copyin (h, s);
595 }
596 #endif
597 
598 #define FLAG_COPYOUT  (1 << 0)
599 #define FLAG_FINALIZE (1 << 1)
600 
601 static void
602 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
603 {
604   size_t host_size;
605   splay_tree_key n;
606   void *d;
607   struct goacc_thread *thr = goacc_thread ();
608   struct gomp_device_descr *acc_dev = thr->dev;
609 
610   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
611     return;
612 
613   gomp_mutex_lock (&acc_dev->lock);
614 
615   n = lookup_host (acc_dev, h, s);
616 
617   /* No need to call lazy open, as the data must already have been
618      mapped.  */
619 
620   if (!n)
621     {
622       gomp_mutex_unlock (&acc_dev->lock);
623       gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
624     }
625 
626   d = (void *) (n->tgt->tgt_start + n->tgt_offset
627 		+ (uintptr_t) h - n->host_start);
628 
629   host_size = n->host_end - n->host_start;
630 
631   if (n->host_start != (uintptr_t) h || host_size != s)
632     {
633       gomp_mutex_unlock (&acc_dev->lock);
634       gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
635 		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
636     }
637 
638   if (n->refcount == REFCOUNT_INFINITY)
639     {
640       n->refcount = 0;
641       n->dynamic_refcount = 0;
642     }
643   if (n->refcount < n->dynamic_refcount)
644     {
645       gomp_mutex_unlock (&acc_dev->lock);
646       gomp_fatal ("Dynamic reference counting assert fail\n");
647     }
648 
649   if (f & FLAG_FINALIZE)
650     {
651       n->refcount -= n->dynamic_refcount;
652       n->dynamic_refcount = 0;
653     }
654   else if (n->dynamic_refcount)
655     {
656       n->dynamic_refcount--;
657       n->refcount--;
658     }
659 
660   if (n->refcount == 0)
661     {
662       if (n->tgt->refcount == 2)
663 	{
664 	  struct target_mem_desc *tp, *t;
665 	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
666 	       tp = t, t = t->prev)
667 	    if (n->tgt == t)
668 	      {
669 		if (tp)
670 		  tp->prev = t->prev;
671 		else
672 		  acc_dev->openacc.data_environ = t->prev;
673 		break;
674 	      }
675 	}
676 
677       if (f & FLAG_COPYOUT)
678 	{
679 	  if (async > acc_async_sync)
680 	    acc_dev->openacc.async_set_async_func (async);
681 	  acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
682 	  if (async > acc_async_sync)
683 	    acc_dev->openacc.async_set_async_func (acc_async_sync);
684 	}
685 
686       gomp_remove_var (acc_dev, n);
687     }
688 
689   gomp_mutex_unlock (&acc_dev->lock);
690 }
691 
692 void
acc_delete(void * h,size_t s)693 acc_delete (void *h , size_t s)
694 {
695   delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
696 }
697 
698 void
acc_delete_async(void * h,size_t s,int async)699 acc_delete_async (void *h , size_t s, int async)
700 {
701   delete_copyout (0, h, s, async, __FUNCTION__);
702 }
703 
704 void
acc_delete_finalize(void * h,size_t s)705 acc_delete_finalize (void *h , size_t s)
706 {
707   delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
708 }
709 
710 void
acc_delete_finalize_async(void * h,size_t s,int async)711 acc_delete_finalize_async (void *h , size_t s, int async)
712 {
713   delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
714 }
715 
716 void
acc_copyout(void * h,size_t s)717 acc_copyout (void *h, size_t s)
718 {
719   delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
720 }
721 
722 void
acc_copyout_async(void * h,size_t s,int async)723 acc_copyout_async (void *h, size_t s, int async)
724 {
725   delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
726 }
727 
728 void
acc_copyout_finalize(void * h,size_t s)729 acc_copyout_finalize (void *h, size_t s)
730 {
731   delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
732 		  __FUNCTION__);
733 }
734 
735 void
acc_copyout_finalize_async(void * h,size_t s,int async)736 acc_copyout_finalize_async (void *h, size_t s, int async)
737 {
738   delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
739 }
740 
741 static void
update_dev_host(int is_dev,void * h,size_t s,int async)742 update_dev_host (int is_dev, void *h, size_t s, int async)
743 {
744   splay_tree_key n;
745   void *d;
746 
747   goacc_lazy_initialize ();
748 
749   struct goacc_thread *thr = goacc_thread ();
750   struct gomp_device_descr *acc_dev = thr->dev;
751 
752   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
753     return;
754 
755   gomp_mutex_lock (&acc_dev->lock);
756 
757   n = lookup_host (acc_dev, h, s);
758 
759   if (!n)
760     {
761       gomp_mutex_unlock (&acc_dev->lock);
762       gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
763     }
764 
765   d = (void *) (n->tgt->tgt_start + n->tgt_offset
766 		+ (uintptr_t) h - n->host_start);
767 
768   if (async > acc_async_sync)
769     acc_dev->openacc.async_set_async_func (async);
770 
771   if (is_dev)
772     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
773   else
774     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
775 
776   if (async > acc_async_sync)
777     acc_dev->openacc.async_set_async_func (acc_async_sync);
778 
779   gomp_mutex_unlock (&acc_dev->lock);
780 }
781 
782 void
acc_update_device(void * h,size_t s)783 acc_update_device (void *h, size_t s)
784 {
785   update_dev_host (1, h, s, acc_async_sync);
786 }
787 
788 void
acc_update_device_async(void * h,size_t s,int async)789 acc_update_device_async (void *h, size_t s, int async)
790 {
791   update_dev_host (1, h, s, async);
792 }
793 
794 void
acc_update_self(void * h,size_t s)795 acc_update_self (void *h, size_t s)
796 {
797   update_dev_host (0, h, s, acc_async_sync);
798 }
799 
800 void
acc_update_self_async(void * h,size_t s,int async)801 acc_update_self_async (void *h, size_t s, int async)
802 {
803   update_dev_host (0, h, s, async);
804 }
805 
806 void
gomp_acc_insert_pointer(size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds)807 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
808 			 void *kinds)
809 {
810   struct target_mem_desc *tgt;
811   struct goacc_thread *thr = goacc_thread ();
812   struct gomp_device_descr *acc_dev = thr->dev;
813 
814   if (acc_is_present (*hostaddrs, *sizes))
815     {
816       splay_tree_key n;
817       gomp_mutex_lock (&acc_dev->lock);
818       n = lookup_host (acc_dev, *hostaddrs, *sizes);
819       gomp_mutex_unlock (&acc_dev->lock);
820 
821       tgt = n->tgt;
822       for (size_t i = 0; i < tgt->list_count; i++)
823 	if (tgt->list[i].key == n)
824 	  {
825 	    for (size_t j = 0; j < mapnum; j++)
826 	      if (i + j < tgt->list_count && tgt->list[i + j].key)
827 		{
828 		  tgt->list[i + j].key->refcount++;
829 		  tgt->list[i + j].key->dynamic_refcount++;
830 		}
831 	    return;
832 	  }
833       /* Should not reach here.  */
834       gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
835     }
836 
837   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
838   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
839 		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
840   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
841 
842   /* Initialize dynamic refcount.  */
843   tgt->list[0].key->dynamic_refcount = 1;
844 
845   gomp_mutex_lock (&acc_dev->lock);
846   tgt->prev = acc_dev->openacc.data_environ;
847   acc_dev->openacc.data_environ = tgt;
848   gomp_mutex_unlock (&acc_dev->lock);
849 }
850 
851 void
gomp_acc_remove_pointer(void * h,size_t s,bool force_copyfrom,int async,int finalize,int mapnum)852 gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
853 			 int finalize, int mapnum)
854 {
855   struct goacc_thread *thr = goacc_thread ();
856   struct gomp_device_descr *acc_dev = thr->dev;
857   splay_tree_key n;
858   struct target_mem_desc *t;
859   int minrefs = (mapnum == 1) ? 2 : 3;
860 
861   if (!acc_is_present (h, s))
862     return;
863 
864   gomp_mutex_lock (&acc_dev->lock);
865 
866   n = lookup_host (acc_dev, h, 1);
867 
868   if (!n)
869     {
870       gomp_mutex_unlock (&acc_dev->lock);
871       gomp_fatal ("%p is not a mapped block", (void *)h);
872     }
873 
874   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
875 
876   t = n->tgt;
877 
878   if (n->refcount < n->dynamic_refcount)
879     {
880       gomp_mutex_unlock (&acc_dev->lock);
881       gomp_fatal ("Dynamic reference counting assert fail\n");
882     }
883 
884   if (finalize)
885     {
886       n->refcount -= n->dynamic_refcount;
887       n->dynamic_refcount = 0;
888     }
889   else if (n->dynamic_refcount)
890     {
891       n->dynamic_refcount--;
892       n->refcount--;
893     }
894 
895   gomp_mutex_unlock (&acc_dev->lock);
896 
897   if (n->refcount == 0)
898     {
899       if (t->refcount == minrefs)
900 	{
901 	  /* This is the last reference, so pull the descriptor off the
902 	     chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
903 	     freeing the device memory. */
904 	  struct target_mem_desc *tp;
905 	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
906 	       tp = t, t = t->prev)
907 	    {
908 	      if (n->tgt == t)
909 		{
910 		  if (tp)
911 		    tp->prev = t->prev;
912 		  else
913 		    acc_dev->openacc.data_environ = t->prev;
914 		  break;
915 		}
916 	    }
917 	}
918 
919       /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
920       n->refcount = 1;
921       t->refcount = minrefs;
922       for (size_t i = 0; i < t->list_count; i++)
923 	if (t->list[i].key == n)
924 	  {
925 	    t->list[i].copy_from = force_copyfrom ? 1 : 0;
926 	    break;
927 	  }
928 
929       /* If running synchronously, unmap immediately.  */
930       if (async < acc_async_noval)
931 	gomp_unmap_vars (t, true);
932       else
933 	t->device_descr->openacc.register_async_cleanup_func (t, async);
934     }
935 
936   gomp_mutex_unlock (&acc_dev->lock);
937 
938   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
939 }
940