1 /* OpenACC Runtime initialization routines 2 3 Copyright (C) 2013-2018 Free Software Foundation, Inc. 4 5 Contributed by Mentor Embedded. 6 7 This file is part of the GNU Offloading and Multi Processing Library 8 (libgomp). 9 10 Libgomp is free software; you can redistribute it and/or modify it 11 under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 3, or (at your option) 13 any later version. 14 15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 more details. 19 20 Under Section 7 of GPL version 3, you are granted additional 21 permissions described in the GCC Runtime Library Exception, version 22 3.1, as published by the Free Software Foundation. 23 24 You should have received a copy of the GNU General Public License and 25 a copy of the GCC Runtime Library Exception along with this program; 26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 27 <http://www.gnu.org/licenses/>. */ 28 29 #include "openacc.h" 30 #include "config.h" 31 #include "libgomp.h" 32 #include "gomp-constants.h" 33 #include "oacc-int.h" 34 #include <stdint.h> 35 #include <string.h> 36 #include <assert.h> 37 38 /* Return block containing [H->S), or NULL if not contained. The device lock 39 for DEV must be locked on entry, and remains locked on exit. */ 40 41 static splay_tree_key 42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s) 43 { 44 struct splay_tree_key_s node; 45 splay_tree_key key; 46 47 node.host_start = (uintptr_t) h; 48 node.host_end = (uintptr_t) h + s; 49 50 key = splay_tree_lookup (&dev->mem_map, &node); 51 52 return key; 53 } 54 55 /* Return block containing [D->S), or NULL if not contained. 56 The list isn't ordered by device address, so we have to iterate 57 over the whole array. This is not expected to be a common 58 operation. The device lock associated with TGT must be locked on entry, and 59 remains locked on exit. */ 60 61 static splay_tree_key 62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) 63 { 64 int i; 65 struct target_mem_desc *t; 66 67 if (!tgt) 68 return NULL; 69 70 for (t = tgt; t != NULL; t = t->prev) 71 { 72 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) 73 break; 74 } 75 76 if (!t) 77 return NULL; 78 79 for (i = 0; i < t->list_count; i++) 80 { 81 void * offset; 82 83 splay_tree_key k = &t->array[i].key; 84 offset = d - t->tgt_start + k->tgt_offset; 85 86 if (k->host_start + offset <= (void *) k->host_end) 87 return k; 88 } 89 90 return NULL; 91 } 92 93 /* OpenACC is silent on how memory exhaustion is indicated. We return 94 NULL. */ 95 96 void * 97 acc_malloc (size_t s) 98 { 99 if (!s) 100 return NULL; 101 102 goacc_lazy_initialize (); 103 104 struct goacc_thread *thr = goacc_thread (); 105 106 assert (thr->dev); 107 108 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 109 return malloc (s); 110 111 return thr->dev->alloc_func (thr->dev->target_id, s); 112 } 113 114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event 115 the device address is mapped. We choose to check if it mapped, 116 and if it is, to unmap it. */ 117 void 118 acc_free (void *d) 119 { 120 splay_tree_key k; 121 122 if (!d) 123 return; 124 125 struct goacc_thread *thr = goacc_thread (); 126 127 assert (thr && thr->dev); 128 129 struct gomp_device_descr *acc_dev = thr->dev; 130 131 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 132 return free (d); 133 134 gomp_mutex_lock (&acc_dev->lock); 135 136 /* We don't have to call lazy open here, as the ptr value must have 137 been returned by acc_malloc. It's not permitted to pass NULL in 138 (unless you got that null from acc_malloc). */ 139 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1))) 140 { 141 void *offset; 142 143 offset = d - k->tgt->tgt_start + k->tgt_offset; 144 145 gomp_mutex_unlock (&acc_dev->lock); 146 147 acc_unmap_data ((void *)(k->host_start + offset)); 148 } 149 else 150 gomp_mutex_unlock (&acc_dev->lock); 151 152 if (!acc_dev->free_func (acc_dev->target_id, d)) 153 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); 154 } 155 156 void 157 acc_memcpy_to_device (void *d, void *h, size_t s) 158 { 159 /* No need to call lazy open here, as the device pointer must have 160 been obtained from a routine that did that. */ 161 struct goacc_thread *thr = goacc_thread (); 162 163 assert (thr && thr->dev); 164 165 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 166 { 167 memmove (d, h, s); 168 return; 169 } 170 171 if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s)) 172 gomp_fatal ("error in %s", __FUNCTION__); 173 } 174 175 void 176 acc_memcpy_from_device (void *h, void *d, size_t s) 177 { 178 /* No need to call lazy open here, as the device pointer must have 179 been obtained from a routine that did that. */ 180 struct goacc_thread *thr = goacc_thread (); 181 182 assert (thr && thr->dev); 183 184 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 185 { 186 memmove (h, d, s); 187 return; 188 } 189 190 if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s)) 191 gomp_fatal ("error in %s", __FUNCTION__); 192 } 193 194 /* Return the device pointer that corresponds to host data H. Or NULL 195 if no mapping. */ 196 197 void * 198 acc_deviceptr (void *h) 199 { 200 splay_tree_key n; 201 void *d; 202 void *offset; 203 204 goacc_lazy_initialize (); 205 206 struct goacc_thread *thr = goacc_thread (); 207 struct gomp_device_descr *dev = thr->dev; 208 209 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 210 return h; 211 212 gomp_mutex_lock (&dev->lock); 213 214 n = lookup_host (dev, h, 1); 215 216 if (!n) 217 { 218 gomp_mutex_unlock (&dev->lock); 219 return NULL; 220 } 221 222 offset = h - n->host_start; 223 224 d = n->tgt->tgt_start + n->tgt_offset + offset; 225 226 gomp_mutex_unlock (&dev->lock); 227 228 return d; 229 } 230 231 /* Return the host pointer that corresponds to device data D. Or NULL 232 if no mapping. */ 233 234 void * 235 acc_hostptr (void *d) 236 { 237 splay_tree_key n; 238 void *h; 239 void *offset; 240 241 goacc_lazy_initialize (); 242 243 struct goacc_thread *thr = goacc_thread (); 244 struct gomp_device_descr *acc_dev = thr->dev; 245 246 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 247 return d; 248 249 gomp_mutex_lock (&acc_dev->lock); 250 251 n = lookup_dev (acc_dev->openacc.data_environ, d, 1); 252 253 if (!n) 254 { 255 gomp_mutex_unlock (&acc_dev->lock); 256 return NULL; 257 } 258 259 offset = d - n->tgt->tgt_start + n->tgt_offset; 260 261 h = n->host_start + offset; 262 263 gomp_mutex_unlock (&acc_dev->lock); 264 265 return h; 266 } 267 268 /* Return 1 if host data [H,+S] is present on the device. */ 269 270 int 271 acc_is_present (void *h, size_t s) 272 { 273 splay_tree_key n; 274 275 if (!s || !h) 276 return 0; 277 278 goacc_lazy_initialize (); 279 280 struct goacc_thread *thr = goacc_thread (); 281 struct gomp_device_descr *acc_dev = thr->dev; 282 283 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 284 return h != NULL; 285 286 gomp_mutex_lock (&acc_dev->lock); 287 288 n = lookup_host (acc_dev, h, s); 289 290 if (n && ((uintptr_t)h < n->host_start 291 || (uintptr_t)h + s > n->host_end 292 || s > n->host_end - n->host_start)) 293 n = NULL; 294 295 gomp_mutex_unlock (&acc_dev->lock); 296 297 return n != NULL; 298 } 299 300 /* Create a mapping for host [H,+S] -> device [D,+S] */ 301 302 void 303 acc_map_data (void *h, void *d, size_t s) 304 { 305 struct target_mem_desc *tgt = NULL; 306 size_t mapnum = 1; 307 void *hostaddrs = h; 308 void *devaddrs = d; 309 size_t sizes = s; 310 unsigned short kinds = GOMP_MAP_ALLOC; 311 312 goacc_lazy_initialize (); 313 314 struct goacc_thread *thr = goacc_thread (); 315 struct gomp_device_descr *acc_dev = thr->dev; 316 317 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 318 { 319 if (d != h) 320 gomp_fatal ("cannot map data on shared-memory system"); 321 } 322 else 323 { 324 struct goacc_thread *thr = goacc_thread (); 325 326 if (!d || !h || !s) 327 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", 328 (void *)h, (int)s, (void *)d, (int)s); 329 330 gomp_mutex_lock (&acc_dev->lock); 331 332 if (lookup_host (acc_dev, h, s)) 333 { 334 gomp_mutex_unlock (&acc_dev->lock); 335 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, 336 (int)s); 337 } 338 339 if (lookup_dev (thr->dev->openacc.data_environ, d, s)) 340 { 341 gomp_mutex_unlock (&acc_dev->lock); 342 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, 343 (int)s); 344 } 345 346 gomp_mutex_unlock (&acc_dev->lock); 347 348 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, 349 &kinds, true, GOMP_MAP_VARS_OPENACC); 350 } 351 352 gomp_mutex_lock (&acc_dev->lock); 353 tgt->prev = acc_dev->openacc.data_environ; 354 acc_dev->openacc.data_environ = tgt; 355 gomp_mutex_unlock (&acc_dev->lock); 356 } 357 358 void 359 acc_unmap_data (void *h) 360 { 361 struct goacc_thread *thr = goacc_thread (); 362 struct gomp_device_descr *acc_dev = thr->dev; 363 364 /* No need to call lazy open, as the address must have been mapped. */ 365 366 /* This is a no-op on shared-memory targets. */ 367 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 368 return; 369 370 size_t host_size; 371 372 gomp_mutex_lock (&acc_dev->lock); 373 374 splay_tree_key n = lookup_host (acc_dev, h, 1); 375 struct target_mem_desc *t; 376 377 if (!n) 378 { 379 gomp_mutex_unlock (&acc_dev->lock); 380 gomp_fatal ("%p is not a mapped block", (void *)h); 381 } 382 383 host_size = n->host_end - n->host_start; 384 385 if (n->host_start != (uintptr_t) h) 386 { 387 gomp_mutex_unlock (&acc_dev->lock); 388 gomp_fatal ("[%p,%d] surrounds %p", 389 (void *) n->host_start, (int) host_size, (void *) h); 390 } 391 392 t = n->tgt; 393 394 if (t->refcount == 2) 395 { 396 struct target_mem_desc *tp; 397 398 /* This is the last reference, so pull the descriptor off the 399 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 400 freeing the device memory. */ 401 t->tgt_end = 0; 402 t->to_free = 0; 403 404 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 405 tp = t, t = t->prev) 406 if (n->tgt == t) 407 { 408 if (tp) 409 tp->prev = t->prev; 410 else 411 acc_dev->openacc.data_environ = t->prev; 412 413 break; 414 } 415 } 416 417 gomp_mutex_unlock (&acc_dev->lock); 418 419 gomp_unmap_vars (t, true); 420 } 421 422 #define FLAG_PRESENT (1 << 0) 423 #define FLAG_CREATE (1 << 1) 424 #define FLAG_COPY (1 << 2) 425 426 static void * 427 present_create_copy (unsigned f, void *h, size_t s) 428 { 429 void *d; 430 splay_tree_key n; 431 432 if (!h || !s) 433 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); 434 435 goacc_lazy_initialize (); 436 437 struct goacc_thread *thr = goacc_thread (); 438 struct gomp_device_descr *acc_dev = thr->dev; 439 440 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 441 return h; 442 443 gomp_mutex_lock (&acc_dev->lock); 444 445 n = lookup_host (acc_dev, h, s); 446 if (n) 447 { 448 /* Present. */ 449 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 450 451 if (!(f & FLAG_PRESENT)) 452 { 453 gomp_mutex_unlock (&acc_dev->lock); 454 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", 455 (void *)h, (int)s, (void *)d, (int)s); 456 } 457 if ((h + s) > (void *)n->host_end) 458 { 459 gomp_mutex_unlock (&acc_dev->lock); 460 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 461 } 462 463 gomp_mutex_unlock (&acc_dev->lock); 464 } 465 else if (!(f & FLAG_CREATE)) 466 { 467 gomp_mutex_unlock (&acc_dev->lock); 468 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 469 } 470 else 471 { 472 struct target_mem_desc *tgt; 473 size_t mapnum = 1; 474 unsigned short kinds; 475 void *hostaddrs = h; 476 477 if (f & FLAG_COPY) 478 kinds = GOMP_MAP_TO; 479 else 480 kinds = GOMP_MAP_ALLOC; 481 482 gomp_mutex_unlock (&acc_dev->lock); 483 484 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, 485 GOMP_MAP_VARS_OPENACC); 486 487 gomp_mutex_lock (&acc_dev->lock); 488 489 d = tgt->to_free; 490 tgt->prev = acc_dev->openacc.data_environ; 491 acc_dev->openacc.data_environ = tgt; 492 493 gomp_mutex_unlock (&acc_dev->lock); 494 } 495 496 return d; 497 } 498 499 void * 500 acc_create (void *h, size_t s) 501 { 502 return present_create_copy (FLAG_CREATE, h, s); 503 } 504 505 void * 506 acc_copyin (void *h, size_t s) 507 { 508 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); 509 } 510 511 void * 512 acc_present_or_create (void *h, size_t s) 513 { 514 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); 515 } 516 517 /* acc_pcreate is acc_present_or_create by a different name. */ 518 #ifdef HAVE_ATTRIBUTE_ALIAS 519 strong_alias (acc_present_or_create, acc_pcreate) 520 #else 521 void * 522 acc_pcreate (void *h, size_t s) 523 { 524 return acc_present_or_create (h, s); 525 } 526 #endif 527 528 void * 529 acc_present_or_copyin (void *h, size_t s) 530 { 531 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); 532 } 533 534 /* acc_pcopyin is acc_present_or_copyin by a different name. */ 535 #ifdef HAVE_ATTRIBUTE_ALIAS 536 strong_alias (acc_present_or_copyin, acc_pcopyin) 537 #else 538 void * 539 acc_pcopyin (void *h, size_t s) 540 { 541 return acc_present_or_copyin (h, s); 542 } 543 #endif 544 545 #define FLAG_COPYOUT (1 << 0) 546 547 static void 548 delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) 549 { 550 size_t host_size; 551 splay_tree_key n; 552 void *d; 553 struct goacc_thread *thr = goacc_thread (); 554 struct gomp_device_descr *acc_dev = thr->dev; 555 556 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 557 return; 558 559 gomp_mutex_lock (&acc_dev->lock); 560 561 n = lookup_host (acc_dev, h, s); 562 563 /* No need to call lazy open, as the data must already have been 564 mapped. */ 565 566 if (!n) 567 { 568 gomp_mutex_unlock (&acc_dev->lock); 569 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); 570 } 571 572 d = (void *) (n->tgt->tgt_start + n->tgt_offset 573 + (uintptr_t) h - n->host_start); 574 575 host_size = n->host_end - n->host_start; 576 577 if (n->host_start != (uintptr_t) h || host_size != s) 578 { 579 gomp_mutex_unlock (&acc_dev->lock); 580 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", 581 (void *) n->host_start, (int) host_size, (void *) h, (int) s); 582 } 583 584 gomp_mutex_unlock (&acc_dev->lock); 585 586 if (f & FLAG_COPYOUT) 587 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 588 589 acc_unmap_data (h); 590 591 if (!acc_dev->free_func (acc_dev->target_id, d)) 592 gomp_fatal ("error in freeing device memory in %s", libfnname); 593 } 594 595 void 596 acc_delete (void *h , size_t s) 597 { 598 delete_copyout (0, h, s, __FUNCTION__); 599 } 600 601 void 602 acc_copyout (void *h, size_t s) 603 { 604 delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); 605 } 606 607 static void 608 update_dev_host (int is_dev, void *h, size_t s) 609 { 610 splay_tree_key n; 611 void *d; 612 613 goacc_lazy_initialize (); 614 615 struct goacc_thread *thr = goacc_thread (); 616 struct gomp_device_descr *acc_dev = thr->dev; 617 618 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 619 return; 620 621 gomp_mutex_lock (&acc_dev->lock); 622 623 n = lookup_host (acc_dev, h, s); 624 625 if (!n) 626 { 627 gomp_mutex_unlock (&acc_dev->lock); 628 gomp_fatal ("[%p,%d] is not mapped", h, (int)s); 629 } 630 631 d = (void *) (n->tgt->tgt_start + n->tgt_offset 632 + (uintptr_t) h - n->host_start); 633 634 if (is_dev) 635 acc_dev->host2dev_func (acc_dev->target_id, d, h, s); 636 else 637 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 638 639 gomp_mutex_unlock (&acc_dev->lock); 640 } 641 642 void 643 acc_update_device (void *h, size_t s) 644 { 645 update_dev_host (1, h, s); 646 } 647 648 void 649 acc_update_self (void *h, size_t s) 650 { 651 update_dev_host (0, h, s); 652 } 653 654 void 655 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, 656 void *kinds) 657 { 658 struct target_mem_desc *tgt; 659 struct goacc_thread *thr = goacc_thread (); 660 struct gomp_device_descr *acc_dev = thr->dev; 661 662 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 663 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, 664 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); 665 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 666 667 gomp_mutex_lock (&acc_dev->lock); 668 tgt->prev = acc_dev->openacc.data_environ; 669 acc_dev->openacc.data_environ = tgt; 670 gomp_mutex_unlock (&acc_dev->lock); 671 } 672 673 void 674 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) 675 { 676 struct goacc_thread *thr = goacc_thread (); 677 struct gomp_device_descr *acc_dev = thr->dev; 678 splay_tree_key n; 679 struct target_mem_desc *t; 680 int minrefs = (mapnum == 1) ? 2 : 3; 681 682 gomp_mutex_lock (&acc_dev->lock); 683 684 n = lookup_host (acc_dev, h, 1); 685 686 if (!n) 687 { 688 gomp_mutex_unlock (&acc_dev->lock); 689 gomp_fatal ("%p is not a mapped block", (void *)h); 690 } 691 692 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 693 694 t = n->tgt; 695 696 struct target_mem_desc *tp; 697 698 if (t->refcount == minrefs) 699 { 700 /* This is the last reference, so pull the descriptor off the 701 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 702 freeing the device memory. */ 703 t->tgt_end = 0; 704 t->to_free = 0; 705 706 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 707 tp = t, t = t->prev) 708 { 709 if (n->tgt == t) 710 { 711 if (tp) 712 tp->prev = t->prev; 713 else 714 acc_dev->openacc.data_environ = t->prev; 715 break; 716 } 717 } 718 } 719 720 if (force_copyfrom) 721 t->list[0].copy_from = 1; 722 723 gomp_mutex_unlock (&acc_dev->lock); 724 725 /* If running synchronously, unmap immediately. */ 726 if (async < acc_async_noval) 727 gomp_unmap_vars (t, true); 728 else 729 t->device_descr->openacc.register_async_cleanup_func (t, async); 730 731 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 732 } 733