1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2016 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <assert.h>
36
37 /* Return block containing [H->S), or NULL if not contained. The device lock
38 for DEV must be locked on entry, and remains locked on exit. */
39
40 static splay_tree_key
lookup_host(struct gomp_device_descr * dev,void * h,size_t s)41 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42 {
43 struct splay_tree_key_s node;
44 splay_tree_key key;
45
46 node.host_start = (uintptr_t) h;
47 node.host_end = (uintptr_t) h + s;
48
49 key = splay_tree_lookup (&dev->mem_map, &node);
50
51 return key;
52 }
53
54 /* Return block containing [D->S), or NULL if not contained.
55 The list isn't ordered by device address, so we have to iterate
56 over the whole array. This is not expected to be a common
57 operation. The device lock associated with TGT must be locked on entry, and
58 remains locked on exit. */
59
60 static splay_tree_key
lookup_dev(struct target_mem_desc * tgt,void * d,size_t s)61 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
62 {
63 int i;
64 struct target_mem_desc *t;
65
66 if (!tgt)
67 return NULL;
68
69 for (t = tgt; t != NULL; t = t->prev)
70 {
71 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
72 break;
73 }
74
75 if (!t)
76 return NULL;
77
78 for (i = 0; i < t->list_count; i++)
79 {
80 void * offset;
81
82 splay_tree_key k = &t->array[i].key;
83 offset = d - t->tgt_start + k->tgt_offset;
84
85 if (k->host_start + offset <= (void *) k->host_end)
86 return k;
87 }
88
89 return NULL;
90 }
91
92 /* OpenACC is silent on how memory exhaustion is indicated. We return
93 NULL. */
94
95 void *
acc_malloc(size_t s)96 acc_malloc (size_t s)
97 {
98 if (!s)
99 return NULL;
100
101 goacc_lazy_initialize ();
102
103 struct goacc_thread *thr = goacc_thread ();
104
105 assert (thr->dev);
106
107 return thr->dev->alloc_func (thr->dev->target_id, s);
108 }
109
110 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
111 the device address is mapped. We choose to check if it mapped,
112 and if it is, to unmap it. */
113 void
acc_free(void * d)114 acc_free (void *d)
115 {
116 splay_tree_key k;
117
118 if (!d)
119 return;
120
121 struct goacc_thread *thr = goacc_thread ();
122
123 assert (thr && thr->dev);
124
125 struct gomp_device_descr *acc_dev = thr->dev;
126
127 gomp_mutex_lock (&acc_dev->lock);
128
129 /* We don't have to call lazy open here, as the ptr value must have
130 been returned by acc_malloc. It's not permitted to pass NULL in
131 (unless you got that null from acc_malloc). */
132 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
133 {
134 void *offset;
135
136 offset = d - k->tgt->tgt_start + k->tgt_offset;
137
138 gomp_mutex_unlock (&acc_dev->lock);
139
140 acc_unmap_data ((void *)(k->host_start + offset));
141 }
142 else
143 gomp_mutex_unlock (&acc_dev->lock);
144
145 acc_dev->free_func (acc_dev->target_id, d);
146 }
147
148 void
acc_memcpy_to_device(void * d,void * h,size_t s)149 acc_memcpy_to_device (void *d, void *h, size_t s)
150 {
151 /* No need to call lazy open here, as the device pointer must have
152 been obtained from a routine that did that. */
153 struct goacc_thread *thr = goacc_thread ();
154
155 assert (thr && thr->dev);
156
157 thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
158 }
159
160 void
acc_memcpy_from_device(void * h,void * d,size_t s)161 acc_memcpy_from_device (void *h, void *d, size_t s)
162 {
163 /* No need to call lazy open here, as the device pointer must have
164 been obtained from a routine that did that. */
165 struct goacc_thread *thr = goacc_thread ();
166
167 assert (thr && thr->dev);
168
169 thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
170 }
171
172 /* Return the device pointer that corresponds to host data H. Or NULL
173 if no mapping. */
174
175 void *
acc_deviceptr(void * h)176 acc_deviceptr (void *h)
177 {
178 splay_tree_key n;
179 void *d;
180 void *offset;
181
182 goacc_lazy_initialize ();
183
184 struct goacc_thread *thr = goacc_thread ();
185 struct gomp_device_descr *dev = thr->dev;
186
187 gomp_mutex_lock (&dev->lock);
188
189 n = lookup_host (dev, h, 1);
190
191 if (!n)
192 {
193 gomp_mutex_unlock (&dev->lock);
194 return NULL;
195 }
196
197 offset = h - n->host_start;
198
199 d = n->tgt->tgt_start + n->tgt_offset + offset;
200
201 gomp_mutex_unlock (&dev->lock);
202
203 return d;
204 }
205
206 /* Return the host pointer that corresponds to device data D. Or NULL
207 if no mapping. */
208
209 void *
acc_hostptr(void * d)210 acc_hostptr (void *d)
211 {
212 splay_tree_key n;
213 void *h;
214 void *offset;
215
216 goacc_lazy_initialize ();
217
218 struct goacc_thread *thr = goacc_thread ();
219 struct gomp_device_descr *acc_dev = thr->dev;
220
221 gomp_mutex_lock (&acc_dev->lock);
222
223 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
224
225 if (!n)
226 {
227 gomp_mutex_unlock (&acc_dev->lock);
228 return NULL;
229 }
230
231 offset = d - n->tgt->tgt_start + n->tgt_offset;
232
233 h = n->host_start + offset;
234
235 gomp_mutex_unlock (&acc_dev->lock);
236
237 return h;
238 }
239
240 /* Return 1 if host data [H,+S] is present on the device. */
241
242 int
acc_is_present(void * h,size_t s)243 acc_is_present (void *h, size_t s)
244 {
245 splay_tree_key n;
246
247 if (!s || !h)
248 return 0;
249
250 goacc_lazy_initialize ();
251
252 struct goacc_thread *thr = goacc_thread ();
253 struct gomp_device_descr *acc_dev = thr->dev;
254
255 gomp_mutex_lock (&acc_dev->lock);
256
257 n = lookup_host (acc_dev, h, s);
258
259 if (n && ((uintptr_t)h < n->host_start
260 || (uintptr_t)h + s > n->host_end
261 || s > n->host_end - n->host_start))
262 n = NULL;
263
264 gomp_mutex_unlock (&acc_dev->lock);
265
266 return n != NULL;
267 }
268
269 /* Create a mapping for host [H,+S] -> device [D,+S] */
270
271 void
acc_map_data(void * h,void * d,size_t s)272 acc_map_data (void *h, void *d, size_t s)
273 {
274 struct target_mem_desc *tgt;
275 size_t mapnum = 1;
276 void *hostaddrs = h;
277 void *devaddrs = d;
278 size_t sizes = s;
279 unsigned short kinds = GOMP_MAP_ALLOC;
280
281 goacc_lazy_initialize ();
282
283 struct goacc_thread *thr = goacc_thread ();
284 struct gomp_device_descr *acc_dev = thr->dev;
285
286 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
287 {
288 if (d != h)
289 gomp_fatal ("cannot map data on shared-memory system");
290
291 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
292 GOMP_MAP_VARS_OPENACC);
293 }
294 else
295 {
296 struct goacc_thread *thr = goacc_thread ();
297
298 if (!d || !h || !s)
299 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
300 (void *)h, (int)s, (void *)d, (int)s);
301
302 gomp_mutex_lock (&acc_dev->lock);
303
304 if (lookup_host (acc_dev, h, s))
305 {
306 gomp_mutex_unlock (&acc_dev->lock);
307 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
308 (int)s);
309 }
310
311 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
312 {
313 gomp_mutex_unlock (&acc_dev->lock);
314 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
315 (int)s);
316 }
317
318 gomp_mutex_unlock (&acc_dev->lock);
319
320 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
321 &kinds, true, GOMP_MAP_VARS_OPENACC);
322 }
323
324 gomp_mutex_lock (&acc_dev->lock);
325 tgt->prev = acc_dev->openacc.data_environ;
326 acc_dev->openacc.data_environ = tgt;
327 gomp_mutex_unlock (&acc_dev->lock);
328 }
329
330 void
acc_unmap_data(void * h)331 acc_unmap_data (void *h)
332 {
333 struct goacc_thread *thr = goacc_thread ();
334 struct gomp_device_descr *acc_dev = thr->dev;
335
336 /* No need to call lazy open, as the address must have been mapped. */
337
338 size_t host_size;
339
340 gomp_mutex_lock (&acc_dev->lock);
341
342 splay_tree_key n = lookup_host (acc_dev, h, 1);
343 struct target_mem_desc *t;
344
345 if (!n)
346 {
347 gomp_mutex_unlock (&acc_dev->lock);
348 gomp_fatal ("%p is not a mapped block", (void *)h);
349 }
350
351 host_size = n->host_end - n->host_start;
352
353 if (n->host_start != (uintptr_t) h)
354 {
355 gomp_mutex_unlock (&acc_dev->lock);
356 gomp_fatal ("[%p,%d] surrounds %p",
357 (void *) n->host_start, (int) host_size, (void *) h);
358 }
359
360 t = n->tgt;
361
362 if (t->refcount == 2)
363 {
364 struct target_mem_desc *tp;
365
366 /* This is the last reference, so pull the descriptor off the
367 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
368 freeing the device memory. */
369 t->tgt_end = 0;
370 t->to_free = 0;
371
372 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
373 tp = t, t = t->prev)
374 if (n->tgt == t)
375 {
376 if (tp)
377 tp->prev = t->prev;
378 else
379 acc_dev->openacc.data_environ = t->prev;
380
381 break;
382 }
383 }
384
385 gomp_mutex_unlock (&acc_dev->lock);
386
387 gomp_unmap_vars (t, true);
388 }
389
390 #define FLAG_PRESENT (1 << 0)
391 #define FLAG_CREATE (1 << 1)
392 #define FLAG_COPY (1 << 2)
393
394 static void *
present_create_copy(unsigned f,void * h,size_t s)395 present_create_copy (unsigned f, void *h, size_t s)
396 {
397 void *d;
398 splay_tree_key n;
399
400 if (!h || !s)
401 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
402
403 goacc_lazy_initialize ();
404
405 struct goacc_thread *thr = goacc_thread ();
406 struct gomp_device_descr *acc_dev = thr->dev;
407
408 gomp_mutex_lock (&acc_dev->lock);
409
410 n = lookup_host (acc_dev, h, s);
411 if (n)
412 {
413 /* Present. */
414 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
415
416 if (!(f & FLAG_PRESENT))
417 {
418 gomp_mutex_unlock (&acc_dev->lock);
419 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
420 (void *)h, (int)s, (void *)d, (int)s);
421 }
422 if ((h + s) > (void *)n->host_end)
423 {
424 gomp_mutex_unlock (&acc_dev->lock);
425 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
426 }
427
428 gomp_mutex_unlock (&acc_dev->lock);
429 }
430 else if (!(f & FLAG_CREATE))
431 {
432 gomp_mutex_unlock (&acc_dev->lock);
433 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
434 }
435 else
436 {
437 struct target_mem_desc *tgt;
438 size_t mapnum = 1;
439 unsigned short kinds;
440 void *hostaddrs = h;
441
442 if (f & FLAG_COPY)
443 kinds = GOMP_MAP_TO;
444 else
445 kinds = GOMP_MAP_ALLOC;
446
447 gomp_mutex_unlock (&acc_dev->lock);
448
449 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
450 GOMP_MAP_VARS_OPENACC);
451
452 gomp_mutex_lock (&acc_dev->lock);
453
454 d = tgt->to_free;
455 tgt->prev = acc_dev->openacc.data_environ;
456 acc_dev->openacc.data_environ = tgt;
457
458 gomp_mutex_unlock (&acc_dev->lock);
459 }
460
461 return d;
462 }
463
464 void *
acc_create(void * h,size_t s)465 acc_create (void *h, size_t s)
466 {
467 return present_create_copy (FLAG_CREATE, h, s);
468 }
469
470 void *
acc_copyin(void * h,size_t s)471 acc_copyin (void *h, size_t s)
472 {
473 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
474 }
475
476 void *
acc_present_or_create(void * h,size_t s)477 acc_present_or_create (void *h, size_t s)
478 {
479 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
480 }
481
482 void *
acc_present_or_copyin(void * h,size_t s)483 acc_present_or_copyin (void *h, size_t s)
484 {
485 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
486 }
487
488 #define FLAG_COPYOUT (1 << 0)
489
490 static void
delete_copyout(unsigned f,void * h,size_t s)491 delete_copyout (unsigned f, void *h, size_t s)
492 {
493 size_t host_size;
494 splay_tree_key n;
495 void *d;
496 struct goacc_thread *thr = goacc_thread ();
497 struct gomp_device_descr *acc_dev = thr->dev;
498
499 gomp_mutex_lock (&acc_dev->lock);
500
501 n = lookup_host (acc_dev, h, s);
502
503 /* No need to call lazy open, as the data must already have been
504 mapped. */
505
506 if (!n)
507 {
508 gomp_mutex_unlock (&acc_dev->lock);
509 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
510 }
511
512 d = (void *) (n->tgt->tgt_start + n->tgt_offset
513 + (uintptr_t) h - n->host_start);
514
515 host_size = n->host_end - n->host_start;
516
517 if (n->host_start != (uintptr_t) h || host_size != s)
518 {
519 gomp_mutex_unlock (&acc_dev->lock);
520 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
521 (void *) n->host_start, (int) host_size, (void *) h, (int) s);
522 }
523
524 gomp_mutex_unlock (&acc_dev->lock);
525
526 if (f & FLAG_COPYOUT)
527 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
528
529 acc_unmap_data (h);
530
531 acc_dev->free_func (acc_dev->target_id, d);
532 }
533
534 void
acc_delete(void * h,size_t s)535 acc_delete (void *h , size_t s)
536 {
537 delete_copyout (0, h, s);
538 }
539
acc_copyout(void * h,size_t s)540 void acc_copyout (void *h, size_t s)
541 {
542 delete_copyout (FLAG_COPYOUT, h, s);
543 }
544
545 static void
update_dev_host(int is_dev,void * h,size_t s)546 update_dev_host (int is_dev, void *h, size_t s)
547 {
548 splay_tree_key n;
549 void *d;
550
551 goacc_lazy_initialize ();
552
553 struct goacc_thread *thr = goacc_thread ();
554 struct gomp_device_descr *acc_dev = thr->dev;
555
556 gomp_mutex_lock (&acc_dev->lock);
557
558 n = lookup_host (acc_dev, h, s);
559
560 if (!n)
561 {
562 gomp_mutex_unlock (&acc_dev->lock);
563 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
564 }
565
566 d = (void *) (n->tgt->tgt_start + n->tgt_offset
567 + (uintptr_t) h - n->host_start);
568
569 gomp_mutex_unlock (&acc_dev->lock);
570
571 if (is_dev)
572 acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
573 else
574 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
575 }
576
577 void
acc_update_device(void * h,size_t s)578 acc_update_device (void *h, size_t s)
579 {
580 update_dev_host (1, h, s);
581 }
582
583 void
acc_update_self(void * h,size_t s)584 acc_update_self (void *h, size_t s)
585 {
586 update_dev_host (0, h, s);
587 }
588
589 void
gomp_acc_insert_pointer(size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds)590 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
591 void *kinds)
592 {
593 struct target_mem_desc *tgt;
594 struct goacc_thread *thr = goacc_thread ();
595 struct gomp_device_descr *acc_dev = thr->dev;
596
597 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
598 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
599 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
600 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
601
602 gomp_mutex_lock (&acc_dev->lock);
603 tgt->prev = acc_dev->openacc.data_environ;
604 acc_dev->openacc.data_environ = tgt;
605 gomp_mutex_unlock (&acc_dev->lock);
606 }
607
608 void
gomp_acc_remove_pointer(void * h,bool force_copyfrom,int async,int mapnum)609 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
610 {
611 struct goacc_thread *thr = goacc_thread ();
612 struct gomp_device_descr *acc_dev = thr->dev;
613 splay_tree_key n;
614 struct target_mem_desc *t;
615 int minrefs = (mapnum == 1) ? 2 : 3;
616
617 gomp_mutex_lock (&acc_dev->lock);
618
619 n = lookup_host (acc_dev, h, 1);
620
621 if (!n)
622 {
623 gomp_mutex_unlock (&acc_dev->lock);
624 gomp_fatal ("%p is not a mapped block", (void *)h);
625 }
626
627 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
628
629 t = n->tgt;
630
631 struct target_mem_desc *tp;
632
633 if (t->refcount == minrefs)
634 {
635 /* This is the last reference, so pull the descriptor off the
636 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
637 freeing the device memory. */
638 t->tgt_end = 0;
639 t->to_free = 0;
640
641 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
642 tp = t, t = t->prev)
643 {
644 if (n->tgt == t)
645 {
646 if (tp)
647 tp->prev = t->prev;
648 else
649 acc_dev->openacc.data_environ = t->prev;
650 break;
651 }
652 }
653 }
654
655 if (force_copyfrom)
656 t->list[0].copy_from = 1;
657
658 gomp_mutex_unlock (&acc_dev->lock);
659
660 /* If running synchronously, unmap immediately. */
661 if (async < acc_async_noval)
662 gomp_unmap_vars (t, true);
663 else
664 {
665 gomp_copy_from_async (t);
666 acc_dev->openacc.register_async_cleanup_func (t);
667 }
668
669 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
670 }
671