xref: /netbsd/external/gpl3/gcc/dist/libgomp/oacc-mem.c (revision cede1f5b)
1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include "splay-tree.h"
35 #include <stdint.h>
36 #include <assert.h>
37 
38 /* Return block containing [H->S), or NULL if not contained.  */
39 
40 static splay_tree_key
41 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42 {
43   struct splay_tree_key_s node;
44   splay_tree_key key;
45 
46   node.host_start = (uintptr_t) h;
47   node.host_end = (uintptr_t) h + s;
48 
49   gomp_mutex_lock (&dev->lock);
50   key = splay_tree_lookup (&dev->mem_map, &node);
51   gomp_mutex_unlock (&dev->lock);
52 
53   return key;
54 }
55 
56 /* Return block containing [D->S), or NULL if not contained.
57    The list isn't ordered by device address, so we have to iterate
58    over the whole array.  This is not expected to be a common
59    operation.  */
60 
61 static splay_tree_key
62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63 {
64   int i;
65   struct target_mem_desc *t;
66 
67   if (!tgt)
68     return NULL;
69 
70   gomp_mutex_lock (&tgt->device_descr->lock);
71 
72   for (t = tgt; t != NULL; t = t->prev)
73     {
74       if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
75         break;
76     }
77 
78   gomp_mutex_unlock (&tgt->device_descr->lock);
79 
80   if (!t)
81     return NULL;
82 
83   for (i = 0; i < t->list_count; i++)
84     {
85       void * offset;
86 
87       splay_tree_key k = &t->array[i].key;
88       offset = d - t->tgt_start + k->tgt_offset;
89 
90       if (k->host_start + offset <= (void *) k->host_end)
91         return k;
92     }
93 
94   return NULL;
95 }
96 
97 /* OpenACC is silent on how memory exhaustion is indicated.  We return
98    NULL.  */
99 
100 void *
101 acc_malloc (size_t s)
102 {
103   if (!s)
104     return NULL;
105 
106   goacc_lazy_initialize ();
107 
108   struct goacc_thread *thr = goacc_thread ();
109 
110   assert (thr->dev);
111 
112   return thr->dev->alloc_func (thr->dev->target_id, s);
113 }
114 
115 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
116    the device address is mapped. We choose to check if it mapped,
117    and if it is, to unmap it. */
118 void
119 acc_free (void *d)
120 {
121   splay_tree_key k;
122   struct goacc_thread *thr = goacc_thread ();
123 
124   if (!d)
125     return;
126 
127   assert (thr && thr->dev);
128 
129   /* We don't have to call lazy open here, as the ptr value must have
130      been returned by acc_malloc.  It's not permitted to pass NULL in
131      (unless you got that null from acc_malloc).  */
132   if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
133    {
134      void *offset;
135 
136      offset = d - k->tgt->tgt_start + k->tgt_offset;
137 
138      acc_unmap_data ((void *)(k->host_start + offset));
139    }
140 
141   thr->dev->free_func (thr->dev->target_id, d);
142 }
143 
144 void
145 acc_memcpy_to_device (void *d, void *h, size_t s)
146 {
147   /* No need to call lazy open here, as the device pointer must have
148      been obtained from a routine that did that.  */
149   struct goacc_thread *thr = goacc_thread ();
150 
151   assert (thr && thr->dev);
152 
153   thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
154 }
155 
156 void
157 acc_memcpy_from_device (void *h, void *d, size_t s)
158 {
159   /* No need to call lazy open here, as the device pointer must have
160      been obtained from a routine that did that.  */
161   struct goacc_thread *thr = goacc_thread ();
162 
163   assert (thr && thr->dev);
164 
165   thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
166 }
167 
168 /* Return the device pointer that corresponds to host data H.  Or NULL
169    if no mapping.  */
170 
171 void *
172 acc_deviceptr (void *h)
173 {
174   splay_tree_key n;
175   void *d;
176   void *offset;
177 
178   goacc_lazy_initialize ();
179 
180   struct goacc_thread *thr = goacc_thread ();
181 
182   n = lookup_host (thr->dev, h, 1);
183 
184   if (!n)
185     return NULL;
186 
187   offset = h - n->host_start;
188 
189   d = n->tgt->tgt_start + n->tgt_offset + offset;
190 
191   return d;
192 }
193 
194 /* Return the host pointer that corresponds to device data D.  Or NULL
195    if no mapping.  */
196 
197 void *
198 acc_hostptr (void *d)
199 {
200   splay_tree_key n;
201   void *h;
202   void *offset;
203 
204   goacc_lazy_initialize ();
205 
206   struct goacc_thread *thr = goacc_thread ();
207 
208   n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
209 
210   if (!n)
211     return NULL;
212 
213   offset = d - n->tgt->tgt_start + n->tgt_offset;
214 
215   h = n->host_start + offset;
216 
217   return h;
218 }
219 
220 /* Return 1 if host data [H,+S] is present on the device.  */
221 
222 int
223 acc_is_present (void *h, size_t s)
224 {
225   splay_tree_key n;
226 
227   if (!s || !h)
228     return 0;
229 
230   goacc_lazy_initialize ();
231 
232   struct goacc_thread *thr = goacc_thread ();
233   struct gomp_device_descr *acc_dev = thr->dev;
234 
235   n = lookup_host (acc_dev, h, s);
236 
237   if (n && ((uintptr_t)h < n->host_start
238 	    || (uintptr_t)h + s > n->host_end
239 	    || s > n->host_end - n->host_start))
240     n = NULL;
241 
242   return n != NULL;
243 }
244 
245 /* Create a mapping for host [H,+S] -> device [D,+S] */
246 
247 void
248 acc_map_data (void *h, void *d, size_t s)
249 {
250   struct target_mem_desc *tgt;
251   size_t mapnum = 1;
252   void *hostaddrs = h;
253   void *devaddrs = d;
254   size_t sizes = s;
255   unsigned short kinds = GOMP_MAP_ALLOC;
256 
257   goacc_lazy_initialize ();
258 
259   struct goacc_thread *thr = goacc_thread ();
260   struct gomp_device_descr *acc_dev = thr->dev;
261 
262   if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
263     {
264       if (d != h)
265         gomp_fatal ("cannot map data on shared-memory system");
266 
267       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
268     }
269   else
270     {
271       struct goacc_thread *thr = goacc_thread ();
272 
273       if (!d || !h || !s)
274 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
275                     (void *)h, (int)s, (void *)d, (int)s);
276 
277       if (lookup_host (acc_dev, h, s))
278 	gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
279 		    (int)s);
280 
281       if (lookup_dev (thr->dev->openacc.data_environ, d, s))
282 	gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
283 		    (int)s);
284 
285       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
286 			   &kinds, true, false);
287     }
288 
289   tgt->prev = acc_dev->openacc.data_environ;
290   acc_dev->openacc.data_environ = tgt;
291 }
292 
293 void
294 acc_unmap_data (void *h)
295 {
296   struct goacc_thread *thr = goacc_thread ();
297   struct gomp_device_descr *acc_dev = thr->dev;
298 
299   /* No need to call lazy open, as the address must have been mapped.  */
300 
301   size_t host_size;
302   splay_tree_key n = lookup_host (acc_dev, h, 1);
303   struct target_mem_desc *t;
304 
305   if (!n)
306     gomp_fatal ("%p is not a mapped block", (void *)h);
307 
308   host_size = n->host_end - n->host_start;
309 
310   if (n->host_start != (uintptr_t) h)
311     gomp_fatal ("[%p,%d] surrounds1 %p",
312 		(void *) n->host_start, (int) host_size, (void *) h);
313 
314   t = n->tgt;
315 
316   if (t->refcount == 2)
317     {
318       struct target_mem_desc *tp;
319 
320       /* This is the last reference, so pull the descriptor off the
321          chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
322          freeing the device memory. */
323       t->tgt_end = 0;
324       t->to_free = 0;
325 
326       gomp_mutex_lock (&acc_dev->lock);
327 
328       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
329 	   tp = t, t = t->prev)
330 	if (n->tgt == t)
331 	  {
332 	    if (tp)
333 	      tp->prev = t->prev;
334 	    else
335 	      acc_dev->openacc.data_environ = t->prev;
336 
337 	    break;
338 	  }
339 
340       gomp_mutex_unlock (&acc_dev->lock);
341     }
342 
343   gomp_unmap_vars (t, true);
344 }
345 
346 #define FLAG_PRESENT (1 << 0)
347 #define FLAG_CREATE (1 << 1)
348 #define FLAG_COPY (1 << 2)
349 
350 static void *
351 present_create_copy (unsigned f, void *h, size_t s)
352 {
353   void *d;
354   splay_tree_key n;
355 
356   if (!h || !s)
357     gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
358 
359   goacc_lazy_initialize ();
360 
361   struct goacc_thread *thr = goacc_thread ();
362   struct gomp_device_descr *acc_dev = thr->dev;
363 
364   n = lookup_host (acc_dev, h, s);
365   if (n)
366     {
367       /* Present. */
368       d = (void *) (n->tgt->tgt_start + n->tgt_offset);
369 
370       if (!(f & FLAG_PRESENT))
371         gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
372             (void *)h, (int)s, (void *)d, (int)s);
373       if ((h + s) > (void *)n->host_end)
374         gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
375     }
376   else if (!(f & FLAG_CREATE))
377     {
378       gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
379     }
380   else
381     {
382       struct target_mem_desc *tgt;
383       size_t mapnum = 1;
384       unsigned short kinds;
385       void *hostaddrs = h;
386 
387       if (f & FLAG_COPY)
388 	kinds = GOMP_MAP_TO;
389       else
390 	kinds = GOMP_MAP_ALLOC;
391 
392       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
393 			   false);
394 
395       gomp_mutex_lock (&acc_dev->lock);
396 
397       d = tgt->to_free;
398       tgt->prev = acc_dev->openacc.data_environ;
399       acc_dev->openacc.data_environ = tgt;
400 
401       gomp_mutex_unlock (&acc_dev->lock);
402     }
403 
404   return d;
405 }
406 
407 void *
408 acc_create (void *h, size_t s)
409 {
410   return present_create_copy (FLAG_CREATE, h, s);
411 }
412 
413 void *
414 acc_copyin (void *h, size_t s)
415 {
416   return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
417 }
418 
419 void *
420 acc_present_or_create (void *h, size_t s)
421 {
422   return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
423 }
424 
425 void *
426 acc_present_or_copyin (void *h, size_t s)
427 {
428   return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
429 }
430 
431 #define FLAG_COPYOUT (1 << 0)
432 
433 static void
434 delete_copyout (unsigned f, void *h, size_t s)
435 {
436   size_t host_size;
437   splay_tree_key n;
438   void *d;
439   struct goacc_thread *thr = goacc_thread ();
440   struct gomp_device_descr *acc_dev = thr->dev;
441 
442   n = lookup_host (acc_dev, h, s);
443 
444   /* No need to call lazy open, as the data must already have been
445      mapped.  */
446 
447   if (!n)
448     gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
449 
450   d = (void *) (n->tgt->tgt_start + n->tgt_offset
451 		+ (uintptr_t) h - n->host_start);
452 
453   host_size = n->host_end - n->host_start;
454 
455   if (n->host_start != (uintptr_t) h || host_size != s)
456     gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
457 		(void *) n->host_start, (int) host_size, (void *) h, (int) s);
458 
459   if (f & FLAG_COPYOUT)
460     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
461 
462   acc_unmap_data (h);
463 
464   acc_dev->free_func (acc_dev->target_id, d);
465 }
466 
467 void
468 acc_delete (void *h , size_t s)
469 {
470   delete_copyout (0, h, s);
471 }
472 
473 void acc_copyout (void *h, size_t s)
474 {
475   delete_copyout (FLAG_COPYOUT, h, s);
476 }
477 
478 static void
479 update_dev_host (int is_dev, void *h, size_t s)
480 {
481   splay_tree_key n;
482   void *d;
483   struct goacc_thread *thr = goacc_thread ();
484   struct gomp_device_descr *acc_dev = thr->dev;
485 
486   n = lookup_host (acc_dev, h, s);
487 
488   /* No need to call lazy open, as the data must already have been
489      mapped.  */
490 
491   if (!n)
492     gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
493 
494   d = (void *) (n->tgt->tgt_start + n->tgt_offset
495 		+ (uintptr_t) h - n->host_start);
496 
497   if (is_dev)
498     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
499   else
500     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
501 }
502 
503 void
504 acc_update_device (void *h, size_t s)
505 {
506   update_dev_host (1, h, s);
507 }
508 
509 void
510 acc_update_self (void *h, size_t s)
511 {
512   update_dev_host (0, h, s);
513 }
514 
515 void
516 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
517 			 void *kinds)
518 {
519   struct target_mem_desc *tgt;
520   struct goacc_thread *thr = goacc_thread ();
521   struct gomp_device_descr *acc_dev = thr->dev;
522 
523   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
524   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
525 		       NULL, sizes, kinds, true, false);
526   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
527   tgt->prev = acc_dev->openacc.data_environ;
528   acc_dev->openacc.data_environ = tgt;
529 }
530 
531 void
532 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
533 {
534   struct goacc_thread *thr = goacc_thread ();
535   struct gomp_device_descr *acc_dev = thr->dev;
536   splay_tree_key n;
537   struct target_mem_desc *t;
538   int minrefs = (mapnum == 1) ? 2 : 3;
539 
540   n = lookup_host (acc_dev, h, 1);
541 
542   if (!n)
543     gomp_fatal ("%p is not a mapped block", (void *)h);
544 
545   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
546 
547   t = n->tgt;
548 
549   struct target_mem_desc *tp;
550 
551   gomp_mutex_lock (&acc_dev->lock);
552 
553   if (t->refcount == minrefs)
554     {
555       /* This is the last reference, so pull the descriptor off the
556 	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
557 	 freeing the device memory. */
558       t->tgt_end = 0;
559       t->to_free = 0;
560 
561       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
562 	   tp = t, t = t->prev)
563 	{
564 	  if (n->tgt == t)
565 	    {
566 	      if (tp)
567 		tp->prev = t->prev;
568 	      else
569 		acc_dev->openacc.data_environ = t->prev;
570 	      break;
571 	    }
572 	}
573     }
574 
575   if (force_copyfrom)
576     t->list[0]->copy_from = 1;
577 
578   gomp_mutex_unlock (&acc_dev->lock);
579 
580   /* If running synchronously, unmap immediately.  */
581   if (async < acc_async_noval)
582     gomp_unmap_vars (t, true);
583   else
584     {
585       gomp_copy_from_async (t);
586       acc_dev->openacc.register_async_cleanup_func (t);
587     }
588 
589   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
590 }
591