1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2018 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <string.h>
36 #include <assert.h>
37 
38 /* Return block containing [H->S), or NULL if not contained.  The device lock
39    for DEV must be locked on entry, and remains locked on exit.  */
40 
41 static splay_tree_key
42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
43 {
44   struct splay_tree_key_s node;
45   splay_tree_key key;
46 
47   node.host_start = (uintptr_t) h;
48   node.host_end = (uintptr_t) h + s;
49 
50   key = splay_tree_lookup (&dev->mem_map, &node);
51 
52   return key;
53 }
54 
55 /* Return block containing [D->S), or NULL if not contained.
56    The list isn't ordered by device address, so we have to iterate
57    over the whole array.  This is not expected to be a common
58    operation.  The device lock associated with TGT must be locked on entry, and
59    remains locked on exit.  */
60 
61 static splay_tree_key
62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63 {
64   int i;
65   struct target_mem_desc *t;
66 
67   if (!tgt)
68     return NULL;
69 
70   for (t = tgt; t != NULL; t = t->prev)
71     {
72       if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73         break;
74     }
75 
76   if (!t)
77     return NULL;
78 
79   for (i = 0; i < t->list_count; i++)
80     {
81       void * offset;
82 
83       splay_tree_key k = &t->array[i].key;
84       offset = d - t->tgt_start + k->tgt_offset;
85 
86       if (k->host_start + offset <= (void *) k->host_end)
87         return k;
88     }
89 
90   return NULL;
91 }
92 
93 /* OpenACC is silent on how memory exhaustion is indicated.  We return
94    NULL.  */
95 
96 void *
97 acc_malloc (size_t s)
98 {
99   if (!s)
100     return NULL;
101 
102   goacc_lazy_initialize ();
103 
104   struct goacc_thread *thr = goacc_thread ();
105 
106   assert (thr->dev);
107 
108   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
109     return malloc (s);
110 
111   return thr->dev->alloc_func (thr->dev->target_id, s);
112 }
113 
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115    the device address is mapped. We choose to check if it mapped,
116    and if it is, to unmap it. */
117 void
118 acc_free (void *d)
119 {
120   splay_tree_key k;
121 
122   if (!d)
123     return;
124 
125   struct goacc_thread *thr = goacc_thread ();
126 
127   assert (thr && thr->dev);
128 
129   struct gomp_device_descr *acc_dev = thr->dev;
130 
131   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
132     return free (d);
133 
134   gomp_mutex_lock (&acc_dev->lock);
135 
136   /* We don't have to call lazy open here, as the ptr value must have
137      been returned by acc_malloc.  It's not permitted to pass NULL in
138      (unless you got that null from acc_malloc).  */
139   if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
140     {
141       void *offset;
142 
143       offset = d - k->tgt->tgt_start + k->tgt_offset;
144 
145       gomp_mutex_unlock (&acc_dev->lock);
146 
147       acc_unmap_data ((void *)(k->host_start + offset));
148     }
149   else
150     gomp_mutex_unlock (&acc_dev->lock);
151 
152   if (!acc_dev->free_func (acc_dev->target_id, d))
153     gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
154 }
155 
156 void
157 acc_memcpy_to_device (void *d, void *h, size_t s)
158 {
159   /* No need to call lazy open here, as the device pointer must have
160      been obtained from a routine that did that.  */
161   struct goacc_thread *thr = goacc_thread ();
162 
163   assert (thr && thr->dev);
164 
165   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
166     {
167       memmove (d, h, s);
168       return;
169     }
170 
171   if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
172     gomp_fatal ("error in %s", __FUNCTION__);
173 }
174 
175 void
176 acc_memcpy_from_device (void *h, void *d, size_t s)
177 {
178   /* No need to call lazy open here, as the device pointer must have
179      been obtained from a routine that did that.  */
180   struct goacc_thread *thr = goacc_thread ();
181 
182   assert (thr && thr->dev);
183 
184   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
185     {
186       memmove (h, d, s);
187       return;
188     }
189 
190   if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
191     gomp_fatal ("error in %s", __FUNCTION__);
192 }
193 
194 /* Return the device pointer that corresponds to host data H.  Or NULL
195    if no mapping.  */
196 
197 void *
198 acc_deviceptr (void *h)
199 {
200   splay_tree_key n;
201   void *d;
202   void *offset;
203 
204   goacc_lazy_initialize ();
205 
206   struct goacc_thread *thr = goacc_thread ();
207   struct gomp_device_descr *dev = thr->dev;
208 
209   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
210     return h;
211 
212   gomp_mutex_lock (&dev->lock);
213 
214   n = lookup_host (dev, h, 1);
215 
216   if (!n)
217     {
218       gomp_mutex_unlock (&dev->lock);
219       return NULL;
220     }
221 
222   offset = h - n->host_start;
223 
224   d = n->tgt->tgt_start + n->tgt_offset + offset;
225 
226   gomp_mutex_unlock (&dev->lock);
227 
228   return d;
229 }
230 
231 /* Return the host pointer that corresponds to device data D.  Or NULL
232    if no mapping.  */
233 
234 void *
235 acc_hostptr (void *d)
236 {
237   splay_tree_key n;
238   void *h;
239   void *offset;
240 
241   goacc_lazy_initialize ();
242 
243   struct goacc_thread *thr = goacc_thread ();
244   struct gomp_device_descr *acc_dev = thr->dev;
245 
246   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
247     return d;
248 
249   gomp_mutex_lock (&acc_dev->lock);
250 
251   n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
252 
253   if (!n)
254     {
255       gomp_mutex_unlock (&acc_dev->lock);
256       return NULL;
257     }
258 
259   offset = d - n->tgt->tgt_start + n->tgt_offset;
260 
261   h = n->host_start + offset;
262 
263   gomp_mutex_unlock (&acc_dev->lock);
264 
265   return h;
266 }
267 
268 /* Return 1 if host data [H,+S] is present on the device.  */
269 
270 int
271 acc_is_present (void *h, size_t s)
272 {
273   splay_tree_key n;
274 
275   if (!s || !h)
276     return 0;
277 
278   goacc_lazy_initialize ();
279 
280   struct goacc_thread *thr = goacc_thread ();
281   struct gomp_device_descr *acc_dev = thr->dev;
282 
283   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
284     return h != NULL;
285 
286   gomp_mutex_lock (&acc_dev->lock);
287 
288   n = lookup_host (acc_dev, h, s);
289 
290   if (n && ((uintptr_t)h < n->host_start
291 	    || (uintptr_t)h + s > n->host_end
292 	    || s > n->host_end - n->host_start))
293     n = NULL;
294 
295   gomp_mutex_unlock (&acc_dev->lock);
296 
297   return n != NULL;
298 }
299 
300 /* Create a mapping for host [H,+S] -> device [D,+S] */
301 
302 void
303 acc_map_data (void *h, void *d, size_t s)
304 {
305   struct target_mem_desc *tgt = NULL;
306   size_t mapnum = 1;
307   void *hostaddrs = h;
308   void *devaddrs = d;
309   size_t sizes = s;
310   unsigned short kinds = GOMP_MAP_ALLOC;
311 
312   goacc_lazy_initialize ();
313 
314   struct goacc_thread *thr = goacc_thread ();
315   struct gomp_device_descr *acc_dev = thr->dev;
316 
317   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
318     {
319       if (d != h)
320         gomp_fatal ("cannot map data on shared-memory system");
321     }
322   else
323     {
324       struct goacc_thread *thr = goacc_thread ();
325 
326       if (!d || !h || !s)
327 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
328                     (void *)h, (int)s, (void *)d, (int)s);
329 
330       gomp_mutex_lock (&acc_dev->lock);
331 
332       if (lookup_host (acc_dev, h, s))
333         {
334 	  gomp_mutex_unlock (&acc_dev->lock);
335 	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
336 		      (int)s);
337 	}
338 
339       if (lookup_dev (thr->dev->openacc.data_environ, d, s))
340         {
341 	  gomp_mutex_unlock (&acc_dev->lock);
342 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
343 		      (int)s);
344 	}
345 
346       gomp_mutex_unlock (&acc_dev->lock);
347 
348       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
349 			   &kinds, true, GOMP_MAP_VARS_OPENACC);
350     }
351 
352   gomp_mutex_lock (&acc_dev->lock);
353   tgt->prev = acc_dev->openacc.data_environ;
354   acc_dev->openacc.data_environ = tgt;
355   gomp_mutex_unlock (&acc_dev->lock);
356 }
357 
358 void
359 acc_unmap_data (void *h)
360 {
361   struct goacc_thread *thr = goacc_thread ();
362   struct gomp_device_descr *acc_dev = thr->dev;
363 
364   /* No need to call lazy open, as the address must have been mapped.  */
365 
366   /* This is a no-op on shared-memory targets.  */
367   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
368     return;
369 
370   size_t host_size;
371 
372   gomp_mutex_lock (&acc_dev->lock);
373 
374   splay_tree_key n = lookup_host (acc_dev, h, 1);
375   struct target_mem_desc *t;
376 
377   if (!n)
378     {
379       gomp_mutex_unlock (&acc_dev->lock);
380       gomp_fatal ("%p is not a mapped block", (void *)h);
381     }
382 
383   host_size = n->host_end - n->host_start;
384 
385   if (n->host_start != (uintptr_t) h)
386     {
387       gomp_mutex_unlock (&acc_dev->lock);
388       gomp_fatal ("[%p,%d] surrounds %p",
389 		  (void *) n->host_start, (int) host_size, (void *) h);
390     }
391 
392   t = n->tgt;
393 
394   if (t->refcount == 2)
395     {
396       struct target_mem_desc *tp;
397 
398       /* This is the last reference, so pull the descriptor off the
399          chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
400          freeing the device memory. */
401       t->tgt_end = 0;
402       t->to_free = 0;
403 
404       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
405 	   tp = t, t = t->prev)
406 	if (n->tgt == t)
407 	  {
408 	    if (tp)
409 	      tp->prev = t->prev;
410 	    else
411 	      acc_dev->openacc.data_environ = t->prev;
412 
413 	    break;
414 	  }
415     }
416 
417   gomp_mutex_unlock (&acc_dev->lock);
418 
419   gomp_unmap_vars (t, true);
420 }
421 
422 #define FLAG_PRESENT (1 << 0)
423 #define FLAG_CREATE (1 << 1)
424 #define FLAG_COPY (1 << 2)
425 
426 static void *
427 present_create_copy (unsigned f, void *h, size_t s)
428 {
429   void *d;
430   splay_tree_key n;
431 
432   if (!h || !s)
433     gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
434 
435   goacc_lazy_initialize ();
436 
437   struct goacc_thread *thr = goacc_thread ();
438   struct gomp_device_descr *acc_dev = thr->dev;
439 
440   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
441     return h;
442 
443   gomp_mutex_lock (&acc_dev->lock);
444 
445   n = lookup_host (acc_dev, h, s);
446   if (n)
447     {
448       /* Present. */
449       d = (void *) (n->tgt->tgt_start + n->tgt_offset);
450 
451       if (!(f & FLAG_PRESENT))
452         {
453 	  gomp_mutex_unlock (&acc_dev->lock);
454           gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
455         	      (void *)h, (int)s, (void *)d, (int)s);
456 	}
457       if ((h + s) > (void *)n->host_end)
458 	{
459 	  gomp_mutex_unlock (&acc_dev->lock);
460 	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
461 	}
462 
463       gomp_mutex_unlock (&acc_dev->lock);
464     }
465   else if (!(f & FLAG_CREATE))
466     {
467       gomp_mutex_unlock (&acc_dev->lock);
468       gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
469     }
470   else
471     {
472       struct target_mem_desc *tgt;
473       size_t mapnum = 1;
474       unsigned short kinds;
475       void *hostaddrs = h;
476 
477       if (f & FLAG_COPY)
478 	kinds = GOMP_MAP_TO;
479       else
480 	kinds = GOMP_MAP_ALLOC;
481 
482       gomp_mutex_unlock (&acc_dev->lock);
483 
484       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
485 			   GOMP_MAP_VARS_OPENACC);
486 
487       gomp_mutex_lock (&acc_dev->lock);
488 
489       d = tgt->to_free;
490       tgt->prev = acc_dev->openacc.data_environ;
491       acc_dev->openacc.data_environ = tgt;
492 
493       gomp_mutex_unlock (&acc_dev->lock);
494     }
495 
496   return d;
497 }
498 
499 void *
500 acc_create (void *h, size_t s)
501 {
502   return present_create_copy (FLAG_CREATE, h, s);
503 }
504 
505 void *
506 acc_copyin (void *h, size_t s)
507 {
508   return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
509 }
510 
511 void *
512 acc_present_or_create (void *h, size_t s)
513 {
514   return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
515 }
516 
517 /* acc_pcreate is acc_present_or_create by a different name.  */
518 #ifdef HAVE_ATTRIBUTE_ALIAS
519 strong_alias (acc_present_or_create, acc_pcreate)
520 #else
521 void *
522 acc_pcreate (void *h, size_t s)
523 {
524   return acc_present_or_create (h, s);
525 }
526 #endif
527 
528 void *
529 acc_present_or_copyin (void *h, size_t s)
530 {
531   return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
532 }
533 
534 /* acc_pcopyin is acc_present_or_copyin by a different name.  */
535 #ifdef HAVE_ATTRIBUTE_ALIAS
536 strong_alias (acc_present_or_copyin, acc_pcopyin)
537 #else
538 void *
539 acc_pcopyin (void *h, size_t s)
540 {
541   return acc_present_or_copyin (h, s);
542 }
543 #endif
544 
545 #define FLAG_COPYOUT (1 << 0)
546 
547 static void
548 delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
549 {
550   size_t host_size;
551   splay_tree_key n;
552   void *d;
553   struct goacc_thread *thr = goacc_thread ();
554   struct gomp_device_descr *acc_dev = thr->dev;
555 
556   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
557     return;
558 
559   gomp_mutex_lock (&acc_dev->lock);
560 
561   n = lookup_host (acc_dev, h, s);
562 
563   /* No need to call lazy open, as the data must already have been
564      mapped.  */
565 
566   if (!n)
567     {
568       gomp_mutex_unlock (&acc_dev->lock);
569       gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
570     }
571 
572   d = (void *) (n->tgt->tgt_start + n->tgt_offset
573 		+ (uintptr_t) h - n->host_start);
574 
575   host_size = n->host_end - n->host_start;
576 
577   if (n->host_start != (uintptr_t) h || host_size != s)
578     {
579       gomp_mutex_unlock (&acc_dev->lock);
580       gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
581 		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
582     }
583 
584   gomp_mutex_unlock (&acc_dev->lock);
585 
586   if (f & FLAG_COPYOUT)
587     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
588 
589   acc_unmap_data (h);
590 
591   if (!acc_dev->free_func (acc_dev->target_id, d))
592     gomp_fatal ("error in freeing device memory in %s", libfnname);
593 }
594 
595 void
596 acc_delete (void *h , size_t s)
597 {
598   delete_copyout (0, h, s, __FUNCTION__);
599 }
600 
601 void
602 acc_copyout (void *h, size_t s)
603 {
604   delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
605 }
606 
607 static void
608 update_dev_host (int is_dev, void *h, size_t s)
609 {
610   splay_tree_key n;
611   void *d;
612 
613   goacc_lazy_initialize ();
614 
615   struct goacc_thread *thr = goacc_thread ();
616   struct gomp_device_descr *acc_dev = thr->dev;
617 
618   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
619     return;
620 
621   gomp_mutex_lock (&acc_dev->lock);
622 
623   n = lookup_host (acc_dev, h, s);
624 
625   if (!n)
626     {
627       gomp_mutex_unlock (&acc_dev->lock);
628       gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
629     }
630 
631   d = (void *) (n->tgt->tgt_start + n->tgt_offset
632 		+ (uintptr_t) h - n->host_start);
633 
634   if (is_dev)
635     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
636   else
637     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
638 
639   gomp_mutex_unlock (&acc_dev->lock);
640 }
641 
642 void
643 acc_update_device (void *h, size_t s)
644 {
645   update_dev_host (1, h, s);
646 }
647 
648 void
649 acc_update_self (void *h, size_t s)
650 {
651   update_dev_host (0, h, s);
652 }
653 
654 void
655 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
656 			 void *kinds)
657 {
658   struct target_mem_desc *tgt;
659   struct goacc_thread *thr = goacc_thread ();
660   struct gomp_device_descr *acc_dev = thr->dev;
661 
662   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
663   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
664 		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
665   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
666 
667   gomp_mutex_lock (&acc_dev->lock);
668   tgt->prev = acc_dev->openacc.data_environ;
669   acc_dev->openacc.data_environ = tgt;
670   gomp_mutex_unlock (&acc_dev->lock);
671 }
672 
673 void
674 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
675 {
676   struct goacc_thread *thr = goacc_thread ();
677   struct gomp_device_descr *acc_dev = thr->dev;
678   splay_tree_key n;
679   struct target_mem_desc *t;
680   int minrefs = (mapnum == 1) ? 2 : 3;
681 
682   gomp_mutex_lock (&acc_dev->lock);
683 
684   n = lookup_host (acc_dev, h, 1);
685 
686   if (!n)
687     {
688       gomp_mutex_unlock (&acc_dev->lock);
689       gomp_fatal ("%p is not a mapped block", (void *)h);
690     }
691 
692   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
693 
694   t = n->tgt;
695 
696   struct target_mem_desc *tp;
697 
698   if (t->refcount == minrefs)
699     {
700       /* This is the last reference, so pull the descriptor off the
701 	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
702 	 freeing the device memory. */
703       t->tgt_end = 0;
704       t->to_free = 0;
705 
706       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
707 	   tp = t, t = t->prev)
708 	{
709 	  if (n->tgt == t)
710 	    {
711 	      if (tp)
712 		tp->prev = t->prev;
713 	      else
714 		acc_dev->openacc.data_environ = t->prev;
715 	      break;
716 	    }
717 	}
718     }
719 
720   if (force_copyfrom)
721     t->list[0].copy_from = 1;
722 
723   gomp_mutex_unlock (&acc_dev->lock);
724 
725   /* If running synchronously, unmap immediately.  */
726   if (async < acc_async_noval)
727     gomp_unmap_vars (t, true);
728   else
729     t->device_descr->openacc.register_async_cleanup_func (t, async);
730 
731   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
732 }
733