1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2016 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <assert.h>
36 
37 /* Return block containing [H->S), or NULL if not contained.  The device lock
38    for DEV must be locked on entry, and remains locked on exit.  */
39 
40 static splay_tree_key
lookup_host(struct gomp_device_descr * dev,void * h,size_t s)41 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42 {
43   struct splay_tree_key_s node;
44   splay_tree_key key;
45 
46   node.host_start = (uintptr_t) h;
47   node.host_end = (uintptr_t) h + s;
48 
49   key = splay_tree_lookup (&dev->mem_map, &node);
50 
51   return key;
52 }
53 
54 /* Return block containing [D->S), or NULL if not contained.
55    The list isn't ordered by device address, so we have to iterate
56    over the whole array.  This is not expected to be a common
57    operation.  The device lock associated with TGT must be locked on entry, and
58    remains locked on exit.  */
59 
60 static splay_tree_key
lookup_dev(struct target_mem_desc * tgt,void * d,size_t s)61 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
62 {
63   int i;
64   struct target_mem_desc *t;
65 
66   if (!tgt)
67     return NULL;
68 
69   for (t = tgt; t != NULL; t = t->prev)
70     {
71       if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
72         break;
73     }
74 
75   if (!t)
76     return NULL;
77 
78   for (i = 0; i < t->list_count; i++)
79     {
80       void * offset;
81 
82       splay_tree_key k = &t->array[i].key;
83       offset = d - t->tgt_start + k->tgt_offset;
84 
85       if (k->host_start + offset <= (void *) k->host_end)
86         return k;
87     }
88 
89   return NULL;
90 }
91 
92 /* OpenACC is silent on how memory exhaustion is indicated.  We return
93    NULL.  */
94 
95 void *
acc_malloc(size_t s)96 acc_malloc (size_t s)
97 {
98   if (!s)
99     return NULL;
100 
101   goacc_lazy_initialize ();
102 
103   struct goacc_thread *thr = goacc_thread ();
104 
105   assert (thr->dev);
106 
107   return thr->dev->alloc_func (thr->dev->target_id, s);
108 }
109 
110 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
111    the device address is mapped. We choose to check if it mapped,
112    and if it is, to unmap it. */
113 void
acc_free(void * d)114 acc_free (void *d)
115 {
116   splay_tree_key k;
117 
118   if (!d)
119     return;
120 
121   struct goacc_thread *thr = goacc_thread ();
122 
123   assert (thr && thr->dev);
124 
125   struct gomp_device_descr *acc_dev = thr->dev;
126 
127   gomp_mutex_lock (&acc_dev->lock);
128 
129   /* We don't have to call lazy open here, as the ptr value must have
130      been returned by acc_malloc.  It's not permitted to pass NULL in
131      (unless you got that null from acc_malloc).  */
132   if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
133     {
134       void *offset;
135 
136       offset = d - k->tgt->tgt_start + k->tgt_offset;
137 
138       gomp_mutex_unlock (&acc_dev->lock);
139 
140       acc_unmap_data ((void *)(k->host_start + offset));
141     }
142   else
143     gomp_mutex_unlock (&acc_dev->lock);
144 
145   acc_dev->free_func (acc_dev->target_id, d);
146 }
147 
148 void
acc_memcpy_to_device(void * d,void * h,size_t s)149 acc_memcpy_to_device (void *d, void *h, size_t s)
150 {
151   /* No need to call lazy open here, as the device pointer must have
152      been obtained from a routine that did that.  */
153   struct goacc_thread *thr = goacc_thread ();
154 
155   assert (thr && thr->dev);
156 
157   thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
158 }
159 
160 void
acc_memcpy_from_device(void * h,void * d,size_t s)161 acc_memcpy_from_device (void *h, void *d, size_t s)
162 {
163   /* No need to call lazy open here, as the device pointer must have
164      been obtained from a routine that did that.  */
165   struct goacc_thread *thr = goacc_thread ();
166 
167   assert (thr && thr->dev);
168 
169   thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
170 }
171 
172 /* Return the device pointer that corresponds to host data H.  Or NULL
173    if no mapping.  */
174 
175 void *
acc_deviceptr(void * h)176 acc_deviceptr (void *h)
177 {
178   splay_tree_key n;
179   void *d;
180   void *offset;
181 
182   goacc_lazy_initialize ();
183 
184   struct goacc_thread *thr = goacc_thread ();
185   struct gomp_device_descr *dev = thr->dev;
186 
187   gomp_mutex_lock (&dev->lock);
188 
189   n = lookup_host (dev, h, 1);
190 
191   if (!n)
192     {
193       gomp_mutex_unlock (&dev->lock);
194       return NULL;
195     }
196 
197   offset = h - n->host_start;
198 
199   d = n->tgt->tgt_start + n->tgt_offset + offset;
200 
201   gomp_mutex_unlock (&dev->lock);
202 
203   return d;
204 }
205 
206 /* Return the host pointer that corresponds to device data D.  Or NULL
207    if no mapping.  */
208 
209 void *
acc_hostptr(void * d)210 acc_hostptr (void *d)
211 {
212   splay_tree_key n;
213   void *h;
214   void *offset;
215 
216   goacc_lazy_initialize ();
217 
218   struct goacc_thread *thr = goacc_thread ();
219   struct gomp_device_descr *acc_dev = thr->dev;
220 
221   gomp_mutex_lock (&acc_dev->lock);
222 
223   n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
224 
225   if (!n)
226     {
227       gomp_mutex_unlock (&acc_dev->lock);
228       return NULL;
229     }
230 
231   offset = d - n->tgt->tgt_start + n->tgt_offset;
232 
233   h = n->host_start + offset;
234 
235   gomp_mutex_unlock (&acc_dev->lock);
236 
237   return h;
238 }
239 
240 /* Return 1 if host data [H,+S] is present on the device.  */
241 
242 int
acc_is_present(void * h,size_t s)243 acc_is_present (void *h, size_t s)
244 {
245   splay_tree_key n;
246 
247   if (!s || !h)
248     return 0;
249 
250   goacc_lazy_initialize ();
251 
252   struct goacc_thread *thr = goacc_thread ();
253   struct gomp_device_descr *acc_dev = thr->dev;
254 
255   gomp_mutex_lock (&acc_dev->lock);
256 
257   n = lookup_host (acc_dev, h, s);
258 
259   if (n && ((uintptr_t)h < n->host_start
260 	    || (uintptr_t)h + s > n->host_end
261 	    || s > n->host_end - n->host_start))
262     n = NULL;
263 
264   gomp_mutex_unlock (&acc_dev->lock);
265 
266   return n != NULL;
267 }
268 
269 /* Create a mapping for host [H,+S] -> device [D,+S] */
270 
271 void
acc_map_data(void * h,void * d,size_t s)272 acc_map_data (void *h, void *d, size_t s)
273 {
274   struct target_mem_desc *tgt;
275   size_t mapnum = 1;
276   void *hostaddrs = h;
277   void *devaddrs = d;
278   size_t sizes = s;
279   unsigned short kinds = GOMP_MAP_ALLOC;
280 
281   goacc_lazy_initialize ();
282 
283   struct goacc_thread *thr = goacc_thread ();
284   struct gomp_device_descr *acc_dev = thr->dev;
285 
286   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
287     {
288       if (d != h)
289         gomp_fatal ("cannot map data on shared-memory system");
290 
291       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
292 			   GOMP_MAP_VARS_OPENACC);
293     }
294   else
295     {
296       struct goacc_thread *thr = goacc_thread ();
297 
298       if (!d || !h || !s)
299 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
300                     (void *)h, (int)s, (void *)d, (int)s);
301 
302       gomp_mutex_lock (&acc_dev->lock);
303 
304       if (lookup_host (acc_dev, h, s))
305         {
306 	  gomp_mutex_unlock (&acc_dev->lock);
307 	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
308 		      (int)s);
309 	}
310 
311       if (lookup_dev (thr->dev->openacc.data_environ, d, s))
312         {
313 	  gomp_mutex_unlock (&acc_dev->lock);
314 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
315 		      (int)s);
316 	}
317 
318       gomp_mutex_unlock (&acc_dev->lock);
319 
320       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
321 			   &kinds, true, GOMP_MAP_VARS_OPENACC);
322     }
323 
324   gomp_mutex_lock (&acc_dev->lock);
325   tgt->prev = acc_dev->openacc.data_environ;
326   acc_dev->openacc.data_environ = tgt;
327   gomp_mutex_unlock (&acc_dev->lock);
328 }
329 
330 void
acc_unmap_data(void * h)331 acc_unmap_data (void *h)
332 {
333   struct goacc_thread *thr = goacc_thread ();
334   struct gomp_device_descr *acc_dev = thr->dev;
335 
336   /* No need to call lazy open, as the address must have been mapped.  */
337 
338   size_t host_size;
339 
340   gomp_mutex_lock (&acc_dev->lock);
341 
342   splay_tree_key n = lookup_host (acc_dev, h, 1);
343   struct target_mem_desc *t;
344 
345   if (!n)
346     {
347       gomp_mutex_unlock (&acc_dev->lock);
348       gomp_fatal ("%p is not a mapped block", (void *)h);
349     }
350 
351   host_size = n->host_end - n->host_start;
352 
353   if (n->host_start != (uintptr_t) h)
354     {
355       gomp_mutex_unlock (&acc_dev->lock);
356       gomp_fatal ("[%p,%d] surrounds %p",
357 		  (void *) n->host_start, (int) host_size, (void *) h);
358     }
359 
360   t = n->tgt;
361 
362   if (t->refcount == 2)
363     {
364       struct target_mem_desc *tp;
365 
366       /* This is the last reference, so pull the descriptor off the
367          chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
368          freeing the device memory. */
369       t->tgt_end = 0;
370       t->to_free = 0;
371 
372       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
373 	   tp = t, t = t->prev)
374 	if (n->tgt == t)
375 	  {
376 	    if (tp)
377 	      tp->prev = t->prev;
378 	    else
379 	      acc_dev->openacc.data_environ = t->prev;
380 
381 	    break;
382 	  }
383     }
384 
385   gomp_mutex_unlock (&acc_dev->lock);
386 
387   gomp_unmap_vars (t, true);
388 }
389 
390 #define FLAG_PRESENT (1 << 0)
391 #define FLAG_CREATE (1 << 1)
392 #define FLAG_COPY (1 << 2)
393 
394 static void *
present_create_copy(unsigned f,void * h,size_t s)395 present_create_copy (unsigned f, void *h, size_t s)
396 {
397   void *d;
398   splay_tree_key n;
399 
400   if (!h || !s)
401     gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
402 
403   goacc_lazy_initialize ();
404 
405   struct goacc_thread *thr = goacc_thread ();
406   struct gomp_device_descr *acc_dev = thr->dev;
407 
408   gomp_mutex_lock (&acc_dev->lock);
409 
410   n = lookup_host (acc_dev, h, s);
411   if (n)
412     {
413       /* Present. */
414       d = (void *) (n->tgt->tgt_start + n->tgt_offset);
415 
416       if (!(f & FLAG_PRESENT))
417         {
418 	  gomp_mutex_unlock (&acc_dev->lock);
419           gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
420         	      (void *)h, (int)s, (void *)d, (int)s);
421 	}
422       if ((h + s) > (void *)n->host_end)
423 	{
424 	  gomp_mutex_unlock (&acc_dev->lock);
425 	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
426 	}
427 
428       gomp_mutex_unlock (&acc_dev->lock);
429     }
430   else if (!(f & FLAG_CREATE))
431     {
432       gomp_mutex_unlock (&acc_dev->lock);
433       gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
434     }
435   else
436     {
437       struct target_mem_desc *tgt;
438       size_t mapnum = 1;
439       unsigned short kinds;
440       void *hostaddrs = h;
441 
442       if (f & FLAG_COPY)
443 	kinds = GOMP_MAP_TO;
444       else
445 	kinds = GOMP_MAP_ALLOC;
446 
447       gomp_mutex_unlock (&acc_dev->lock);
448 
449       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
450 			   GOMP_MAP_VARS_OPENACC);
451 
452       gomp_mutex_lock (&acc_dev->lock);
453 
454       d = tgt->to_free;
455       tgt->prev = acc_dev->openacc.data_environ;
456       acc_dev->openacc.data_environ = tgt;
457 
458       gomp_mutex_unlock (&acc_dev->lock);
459     }
460 
461   return d;
462 }
463 
464 void *
acc_create(void * h,size_t s)465 acc_create (void *h, size_t s)
466 {
467   return present_create_copy (FLAG_CREATE, h, s);
468 }
469 
470 void *
acc_copyin(void * h,size_t s)471 acc_copyin (void *h, size_t s)
472 {
473   return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
474 }
475 
476 void *
acc_present_or_create(void * h,size_t s)477 acc_present_or_create (void *h, size_t s)
478 {
479   return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
480 }
481 
482 void *
acc_present_or_copyin(void * h,size_t s)483 acc_present_or_copyin (void *h, size_t s)
484 {
485   return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
486 }
487 
488 #define FLAG_COPYOUT (1 << 0)
489 
490 static void
delete_copyout(unsigned f,void * h,size_t s)491 delete_copyout (unsigned f, void *h, size_t s)
492 {
493   size_t host_size;
494   splay_tree_key n;
495   void *d;
496   struct goacc_thread *thr = goacc_thread ();
497   struct gomp_device_descr *acc_dev = thr->dev;
498 
499   gomp_mutex_lock (&acc_dev->lock);
500 
501   n = lookup_host (acc_dev, h, s);
502 
503   /* No need to call lazy open, as the data must already have been
504      mapped.  */
505 
506   if (!n)
507     {
508       gomp_mutex_unlock (&acc_dev->lock);
509       gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
510     }
511 
512   d = (void *) (n->tgt->tgt_start + n->tgt_offset
513 		+ (uintptr_t) h - n->host_start);
514 
515   host_size = n->host_end - n->host_start;
516 
517   if (n->host_start != (uintptr_t) h || host_size != s)
518     {
519       gomp_mutex_unlock (&acc_dev->lock);
520       gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
521 		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
522     }
523 
524   gomp_mutex_unlock (&acc_dev->lock);
525 
526   if (f & FLAG_COPYOUT)
527     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
528 
529   acc_unmap_data (h);
530 
531   acc_dev->free_func (acc_dev->target_id, d);
532 }
533 
534 void
acc_delete(void * h,size_t s)535 acc_delete (void *h , size_t s)
536 {
537   delete_copyout (0, h, s);
538 }
539 
acc_copyout(void * h,size_t s)540 void acc_copyout (void *h, size_t s)
541 {
542   delete_copyout (FLAG_COPYOUT, h, s);
543 }
544 
545 static void
update_dev_host(int is_dev,void * h,size_t s)546 update_dev_host (int is_dev, void *h, size_t s)
547 {
548   splay_tree_key n;
549   void *d;
550 
551   goacc_lazy_initialize ();
552 
553   struct goacc_thread *thr = goacc_thread ();
554   struct gomp_device_descr *acc_dev = thr->dev;
555 
556   gomp_mutex_lock (&acc_dev->lock);
557 
558   n = lookup_host (acc_dev, h, s);
559 
560   if (!n)
561     {
562       gomp_mutex_unlock (&acc_dev->lock);
563       gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
564     }
565 
566   d = (void *) (n->tgt->tgt_start + n->tgt_offset
567 		+ (uintptr_t) h - n->host_start);
568 
569   gomp_mutex_unlock (&acc_dev->lock);
570 
571   if (is_dev)
572     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
573   else
574     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
575 }
576 
577 void
acc_update_device(void * h,size_t s)578 acc_update_device (void *h, size_t s)
579 {
580   update_dev_host (1, h, s);
581 }
582 
583 void
acc_update_self(void * h,size_t s)584 acc_update_self (void *h, size_t s)
585 {
586   update_dev_host (0, h, s);
587 }
588 
589 void
gomp_acc_insert_pointer(size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds)590 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
591 			 void *kinds)
592 {
593   struct target_mem_desc *tgt;
594   struct goacc_thread *thr = goacc_thread ();
595   struct gomp_device_descr *acc_dev = thr->dev;
596 
597   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
598   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
599 		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
600   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
601 
602   gomp_mutex_lock (&acc_dev->lock);
603   tgt->prev = acc_dev->openacc.data_environ;
604   acc_dev->openacc.data_environ = tgt;
605   gomp_mutex_unlock (&acc_dev->lock);
606 }
607 
608 void
gomp_acc_remove_pointer(void * h,bool force_copyfrom,int async,int mapnum)609 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
610 {
611   struct goacc_thread *thr = goacc_thread ();
612   struct gomp_device_descr *acc_dev = thr->dev;
613   splay_tree_key n;
614   struct target_mem_desc *t;
615   int minrefs = (mapnum == 1) ? 2 : 3;
616 
617   gomp_mutex_lock (&acc_dev->lock);
618 
619   n = lookup_host (acc_dev, h, 1);
620 
621   if (!n)
622     {
623       gomp_mutex_unlock (&acc_dev->lock);
624       gomp_fatal ("%p is not a mapped block", (void *)h);
625     }
626 
627   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
628 
629   t = n->tgt;
630 
631   struct target_mem_desc *tp;
632 
633   if (t->refcount == minrefs)
634     {
635       /* This is the last reference, so pull the descriptor off the
636 	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
637 	 freeing the device memory. */
638       t->tgt_end = 0;
639       t->to_free = 0;
640 
641       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
642 	   tp = t, t = t->prev)
643 	{
644 	  if (n->tgt == t)
645 	    {
646 	      if (tp)
647 		tp->prev = t->prev;
648 	      else
649 		acc_dev->openacc.data_environ = t->prev;
650 	      break;
651 	    }
652 	}
653     }
654 
655   if (force_copyfrom)
656     t->list[0].copy_from = 1;
657 
658   gomp_mutex_unlock (&acc_dev->lock);
659 
660   /* If running synchronously, unmap immediately.  */
661   if (async < acc_async_noval)
662     gomp_unmap_vars (t, true);
663   else
664     {
665       gomp_copy_from_async (t);
666       acc_dev->openacc.register_async_cleanup_func (t);
667     }
668 
669   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
670 }
671