1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file contains the support of offloading. */ 27 28 #include "config.h" 29 #include "libgomp.h" 30 #include "oacc-plugin.h" 31 #include "oacc-int.h" 32 #include "gomp-constants.h" 33 #include <limits.h> 34 #include <stdbool.h> 35 #include <stdlib.h> 36 #ifdef HAVE_INTTYPES_H 37 # include <inttypes.h> /* For PRIu64. */ 38 #endif 39 #include <string.h> 40 #include <assert.h> 41 #include <errno.h> 42 43 #ifdef PLUGIN_SUPPORT 44 #include <dlfcn.h> 45 #include "plugin-suffix.h" 46 #endif 47 48 static void gomp_target_init (void); 49 50 /* The whole initialization code for offloading plugins is only run one. */ 51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; 52 53 /* Mutex for offload image registration. */ 54 static gomp_mutex_t register_lock; 55 56 /* This structure describes an offload image. 57 It contains type of the target device, pointer to host table descriptor, and 58 pointer to target data. */ 59 struct offload_image_descr { 60 unsigned version; 61 enum offload_target_type type; 62 const void *host_table; 63 const void *target_data; 64 }; 65 66 /* Array of descriptors of offload images. */ 67 static struct offload_image_descr *offload_images; 68 69 /* Total number of offload images. */ 70 static int num_offload_images; 71 72 /* Array of descriptors for all available devices. */ 73 static struct gomp_device_descr *devices; 74 75 /* Total number of available devices. */ 76 static int num_devices; 77 78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ 79 static int num_devices_openmp; 80 81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ 82 83 static void * 84 gomp_realloc_unlock (void *old, size_t size) 85 { 86 void *ret = realloc (old, size); 87 if (ret == NULL) 88 { 89 gomp_mutex_unlock (®ister_lock); 90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); 91 } 92 return ret; 93 } 94 95 attribute_hidden void 96 gomp_init_targets_once (void) 97 { 98 (void) pthread_once (&gomp_is_initialized, gomp_target_init); 99 } 100 101 attribute_hidden int 102 gomp_get_num_devices (void) 103 { 104 gomp_init_targets_once (); 105 return num_devices_openmp; 106 } 107 108 static struct gomp_device_descr * 109 resolve_device (int device_id) 110 { 111 if (device_id == GOMP_DEVICE_ICV) 112 { 113 struct gomp_task_icv *icv = gomp_icv (false); 114 device_id = icv->default_device_var; 115 } 116 117 if (device_id < 0 || device_id >= gomp_get_num_devices ()) 118 return NULL; 119 120 gomp_mutex_lock (&devices[device_id].lock); 121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) 122 gomp_init_device (&devices[device_id]); 123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) 124 { 125 gomp_mutex_unlock (&devices[device_id].lock); 126 return NULL; 127 } 128 gomp_mutex_unlock (&devices[device_id].lock); 129 130 return &devices[device_id]; 131 } 132 133 134 static inline splay_tree_key 135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key) 136 { 137 if (key->host_start != key->host_end) 138 return splay_tree_lookup (mem_map, key); 139 140 key->host_end++; 141 splay_tree_key n = splay_tree_lookup (mem_map, key); 142 key->host_end--; 143 if (n) 144 return n; 145 key->host_start--; 146 n = splay_tree_lookup (mem_map, key); 147 key->host_start++; 148 if (n) 149 return n; 150 return splay_tree_lookup (mem_map, key); 151 } 152 153 static inline splay_tree_key 154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) 155 { 156 if (key->host_start != key->host_end) 157 return splay_tree_lookup (mem_map, key); 158 159 key->host_end++; 160 splay_tree_key n = splay_tree_lookup (mem_map, key); 161 key->host_end--; 162 return n; 163 } 164 165 static inline void 166 gomp_device_copy (struct gomp_device_descr *devicep, 167 bool (*copy_func) (int, void *, const void *, size_t), 168 const char *dst, void *dstaddr, 169 const char *src, const void *srcaddr, 170 size_t size) 171 { 172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) 173 { 174 gomp_mutex_unlock (&devicep->lock); 175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", 176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); 177 } 178 } 179 180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) 181 host to device memory transfers. */ 182 183 struct gomp_coalesce_buf 184 { 185 /* Buffer into which gomp_copy_host2dev will memcpy data and from which 186 it will be copied to the device. */ 187 void *buf; 188 struct target_mem_desc *tgt; 189 /* Array with offsets, chunks[2 * i] is the starting offset and 190 chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address 191 of chunks which are to be copied to buf and later copied to device. */ 192 size_t *chunks; 193 /* Number of chunks in chunks array, or -1 if coalesce buffering should not 194 be performed. */ 195 long chunk_cnt; 196 /* During construction of chunks array, how many memory regions are within 197 the last chunk. If there is just one memory region for a chunk, we copy 198 it directly to device rather than going through buf. */ 199 long use_cnt; 200 }; 201 202 /* Maximum size of memory region considered for coalescing. Larger copies 203 are performed directly. */ 204 #define MAX_COALESCE_BUF_SIZE (32 * 1024) 205 206 /* Maximum size of a gap in between regions to consider them being copied 207 within the same chunk. All the device offsets considered are within 208 newly allocated device memory, so it isn't fatal if we copy some padding 209 in between from host to device. The gaps come either from alignment 210 padding or from memory regions which are not supposed to be copied from 211 host to device (e.g. map(alloc:), map(from:) etc.). */ 212 #define MAX_COALESCE_BUF_GAP (4 * 1024) 213 214 /* Add region with device tgt_start relative offset and length to CBUF. */ 215 216 static inline void 217 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) 218 { 219 if (len > MAX_COALESCE_BUF_SIZE || len == 0) 220 return; 221 if (cbuf->chunk_cnt) 222 { 223 if (cbuf->chunk_cnt < 0) 224 return; 225 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1]) 226 { 227 cbuf->chunk_cnt = -1; 228 return; 229 } 230 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP) 231 { 232 cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len; 233 cbuf->use_cnt++; 234 return; 235 } 236 /* If the last chunk is only used by one mapping, discard it, 237 as it will be one host to device copy anyway and 238 memcpying it around will only waste cycles. */ 239 if (cbuf->use_cnt == 1) 240 cbuf->chunk_cnt--; 241 } 242 cbuf->chunks[2 * cbuf->chunk_cnt] = start; 243 cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len; 244 cbuf->chunk_cnt++; 245 cbuf->use_cnt = 1; 246 } 247 248 /* Return true for mapping kinds which need to copy data from the 249 host to device for regions that weren't previously mapped. */ 250 251 static inline bool 252 gomp_to_device_kind_p (int kind) 253 { 254 switch (kind) 255 { 256 case GOMP_MAP_ALLOC: 257 case GOMP_MAP_FROM: 258 case GOMP_MAP_FORCE_ALLOC: 259 case GOMP_MAP_ALWAYS_FROM: 260 return false; 261 default: 262 return true; 263 } 264 } 265 266 static void 267 gomp_copy_host2dev (struct gomp_device_descr *devicep, 268 void *d, const void *h, size_t sz, 269 struct gomp_coalesce_buf *cbuf) 270 { 271 if (cbuf) 272 { 273 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; 274 if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1]) 275 { 276 long first = 0; 277 long last = cbuf->chunk_cnt - 1; 278 while (first <= last) 279 { 280 long middle = (first + last) >> 1; 281 if (cbuf->chunks[2 * middle + 1] <= doff) 282 first = middle + 1; 283 else if (cbuf->chunks[2 * middle] <= doff) 284 { 285 if (doff + sz > cbuf->chunks[2 * middle + 1]) 286 gomp_fatal ("internal libgomp cbuf error"); 287 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]), 288 h, sz); 289 return; 290 } 291 else 292 last = middle - 1; 293 } 294 } 295 } 296 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); 297 } 298 299 static void 300 gomp_copy_dev2host (struct gomp_device_descr *devicep, 301 void *h, const void *d, size_t sz) 302 { 303 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); 304 } 305 306 static void 307 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) 308 { 309 if (!devicep->free_func (devicep->target_id, devptr)) 310 { 311 gomp_mutex_unlock (&devicep->lock); 312 gomp_fatal ("error in freeing device memory block at %p", devptr); 313 } 314 } 315 316 /* Handle the case where gomp_map_lookup, splay_tree_lookup or 317 gomp_map_0len_lookup found oldn for newn. 318 Helper function of gomp_map_vars. */ 319 320 static inline void 321 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, 322 splay_tree_key newn, struct target_var_desc *tgt_var, 323 unsigned char kind, struct gomp_coalesce_buf *cbuf) 324 { 325 tgt_var->key = oldn; 326 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); 327 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); 328 tgt_var->offset = newn->host_start - oldn->host_start; 329 tgt_var->length = newn->host_end - newn->host_start; 330 331 if ((kind & GOMP_MAP_FLAG_FORCE) 332 || oldn->host_start > newn->host_start 333 || oldn->host_end < newn->host_end) 334 { 335 gomp_mutex_unlock (&devicep->lock); 336 gomp_fatal ("Trying to map into device [%p..%p) object when " 337 "[%p..%p) is already mapped", 338 (void *) newn->host_start, (void *) newn->host_end, 339 (void *) oldn->host_start, (void *) oldn->host_end); 340 } 341 342 if (GOMP_MAP_ALWAYS_TO_P (kind)) 343 gomp_copy_host2dev (devicep, 344 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset 345 + newn->host_start - oldn->host_start), 346 (void *) newn->host_start, 347 newn->host_end - newn->host_start, cbuf); 348 349 if (oldn->refcount != REFCOUNT_INFINITY) 350 oldn->refcount++; 351 } 352 353 static int 354 get_kind (bool short_mapkind, void *kinds, int idx) 355 { 356 return short_mapkind ? ((unsigned short *) kinds)[idx] 357 : ((unsigned char *) kinds)[idx]; 358 } 359 360 static void 361 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, 362 uintptr_t target_offset, uintptr_t bias, 363 struct gomp_coalesce_buf *cbuf) 364 { 365 struct gomp_device_descr *devicep = tgt->device_descr; 366 struct splay_tree_s *mem_map = &devicep->mem_map; 367 struct splay_tree_key_s cur_node; 368 369 cur_node.host_start = host_ptr; 370 if (cur_node.host_start == (uintptr_t) NULL) 371 { 372 cur_node.tgt_offset = (uintptr_t) NULL; 373 gomp_copy_host2dev (devicep, 374 (void *) (tgt->tgt_start + target_offset), 375 (void *) &cur_node.tgt_offset, 376 sizeof (void *), cbuf); 377 return; 378 } 379 /* Add bias to the pointer value. */ 380 cur_node.host_start += bias; 381 cur_node.host_end = cur_node.host_start; 382 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 383 if (n == NULL) 384 { 385 gomp_mutex_unlock (&devicep->lock); 386 gomp_fatal ("Pointer target of array section wasn't mapped"); 387 } 388 cur_node.host_start -= n->host_start; 389 cur_node.tgt_offset 390 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; 391 /* At this point tgt_offset is target address of the 392 array section. Now subtract bias to get what we want 393 to initialize the pointer with. */ 394 cur_node.tgt_offset -= bias; 395 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset), 396 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); 397 } 398 399 static void 400 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, 401 size_t first, size_t i, void **hostaddrs, 402 size_t *sizes, void *kinds, 403 struct gomp_coalesce_buf *cbuf) 404 { 405 struct gomp_device_descr *devicep = tgt->device_descr; 406 struct splay_tree_s *mem_map = &devicep->mem_map; 407 struct splay_tree_key_s cur_node; 408 int kind; 409 const bool short_mapkind = true; 410 const int typemask = short_mapkind ? 0xff : 0x7; 411 412 cur_node.host_start = (uintptr_t) hostaddrs[i]; 413 cur_node.host_end = cur_node.host_start + sizes[i]; 414 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); 415 kind = get_kind (short_mapkind, kinds, i); 416 if (n2 417 && n2->tgt == n->tgt 418 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 419 { 420 gomp_map_vars_existing (devicep, n2, &cur_node, 421 &tgt->list[i], kind & typemask, cbuf); 422 return; 423 } 424 if (sizes[i] == 0) 425 { 426 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) 427 { 428 cur_node.host_start--; 429 n2 = splay_tree_lookup (mem_map, &cur_node); 430 cur_node.host_start++; 431 if (n2 432 && n2->tgt == n->tgt 433 && n2->host_start - n->host_start 434 == n2->tgt_offset - n->tgt_offset) 435 { 436 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], 437 kind & typemask, cbuf); 438 return; 439 } 440 } 441 cur_node.host_end++; 442 n2 = splay_tree_lookup (mem_map, &cur_node); 443 cur_node.host_end--; 444 if (n2 445 && n2->tgt == n->tgt 446 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 447 { 448 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], 449 kind & typemask, cbuf); 450 return; 451 } 452 } 453 gomp_mutex_unlock (&devicep->lock); 454 gomp_fatal ("Trying to map into device [%p..%p) structure element when " 455 "other mapped elements from the same structure weren't mapped " 456 "together with it", (void *) cur_node.host_start, 457 (void *) cur_node.host_end); 458 } 459 460 static inline uintptr_t 461 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) 462 { 463 if (tgt->list[i].key != NULL) 464 return tgt->list[i].key->tgt->tgt_start 465 + tgt->list[i].key->tgt_offset 466 + tgt->list[i].offset; 467 if (tgt->list[i].offset == ~(uintptr_t) 0) 468 return (uintptr_t) hostaddrs[i]; 469 if (tgt->list[i].offset == ~(uintptr_t) 1) 470 return 0; 471 if (tgt->list[i].offset == ~(uintptr_t) 2) 472 return tgt->list[i + 1].key->tgt->tgt_start 473 + tgt->list[i + 1].key->tgt_offset 474 + tgt->list[i + 1].offset 475 + (uintptr_t) hostaddrs[i] 476 - (uintptr_t) hostaddrs[i + 1]; 477 return tgt->tgt_start + tgt->list[i].offset; 478 } 479 480 attribute_hidden struct target_mem_desc * 481 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, 482 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, 483 bool short_mapkind, enum gomp_map_vars_kind pragma_kind) 484 { 485 size_t i, tgt_align, tgt_size, not_found_cnt = 0; 486 bool has_firstprivate = false; 487 const int rshift = short_mapkind ? 8 : 3; 488 const int typemask = short_mapkind ? 0xff : 0x7; 489 struct splay_tree_s *mem_map = &devicep->mem_map; 490 struct splay_tree_key_s cur_node; 491 struct target_mem_desc *tgt 492 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); 493 tgt->list_count = mapnum; 494 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; 495 tgt->device_descr = devicep; 496 struct gomp_coalesce_buf cbuf, *cbufp = NULL; 497 498 if (mapnum == 0) 499 { 500 tgt->tgt_start = 0; 501 tgt->tgt_end = 0; 502 return tgt; 503 } 504 505 tgt_align = sizeof (void *); 506 tgt_size = 0; 507 cbuf.chunks = NULL; 508 cbuf.chunk_cnt = -1; 509 cbuf.use_cnt = 0; 510 cbuf.buf = NULL; 511 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) 512 { 513 cbuf.chunks 514 = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t)); 515 cbuf.chunk_cnt = 0; 516 } 517 if (pragma_kind == GOMP_MAP_VARS_TARGET) 518 { 519 size_t align = 4 * sizeof (void *); 520 tgt_align = align; 521 tgt_size = mapnum * sizeof (void *); 522 cbuf.chunk_cnt = 1; 523 cbuf.use_cnt = 1 + (mapnum > 1); 524 cbuf.chunks[0] = 0; 525 cbuf.chunks[1] = tgt_size; 526 } 527 528 gomp_mutex_lock (&devicep->lock); 529 if (devicep->state == GOMP_DEVICE_FINALIZED) 530 { 531 gomp_mutex_unlock (&devicep->lock); 532 free (tgt); 533 return NULL; 534 } 535 536 for (i = 0; i < mapnum; i++) 537 { 538 int kind = get_kind (short_mapkind, kinds, i); 539 if (hostaddrs[i] == NULL 540 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) 541 { 542 tgt->list[i].key = NULL; 543 tgt->list[i].offset = ~(uintptr_t) 0; 544 continue; 545 } 546 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 547 { 548 cur_node.host_start = (uintptr_t) hostaddrs[i]; 549 cur_node.host_end = cur_node.host_start; 550 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 551 if (n == NULL) 552 { 553 gomp_mutex_unlock (&devicep->lock); 554 gomp_fatal ("use_device_ptr pointer wasn't mapped"); 555 } 556 cur_node.host_start -= n->host_start; 557 hostaddrs[i] 558 = (void *) (n->tgt->tgt_start + n->tgt_offset 559 + cur_node.host_start); 560 tgt->list[i].key = NULL; 561 tgt->list[i].offset = ~(uintptr_t) 0; 562 continue; 563 } 564 else if ((kind & typemask) == GOMP_MAP_STRUCT) 565 { 566 size_t first = i + 1; 567 size_t last = i + sizes[i]; 568 cur_node.host_start = (uintptr_t) hostaddrs[i]; 569 cur_node.host_end = (uintptr_t) hostaddrs[last] 570 + sizes[last]; 571 tgt->list[i].key = NULL; 572 tgt->list[i].offset = ~(uintptr_t) 2; 573 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 574 if (n == NULL) 575 { 576 size_t align = (size_t) 1 << (kind >> rshift); 577 if (tgt_align < align) 578 tgt_align = align; 579 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; 580 tgt_size = (tgt_size + align - 1) & ~(align - 1); 581 tgt_size += cur_node.host_end - cur_node.host_start; 582 not_found_cnt += last - i; 583 for (i = first; i <= last; i++) 584 { 585 tgt->list[i].key = NULL; 586 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) 587 & typemask)) 588 gomp_coalesce_buf_add (&cbuf, 589 tgt_size - cur_node.host_end 590 + (uintptr_t) hostaddrs[i], 591 sizes[i]); 592 } 593 i--; 594 continue; 595 } 596 for (i = first; i <= last; i++) 597 gomp_map_fields_existing (tgt, n, first, i, hostaddrs, 598 sizes, kinds, NULL); 599 i--; 600 continue; 601 } 602 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) 603 { 604 tgt->list[i].key = NULL; 605 tgt->list[i].offset = ~(uintptr_t) 1; 606 has_firstprivate = true; 607 continue; 608 } 609 cur_node.host_start = (uintptr_t) hostaddrs[i]; 610 if (!GOMP_MAP_POINTER_P (kind & typemask)) 611 cur_node.host_end = cur_node.host_start + sizes[i]; 612 else 613 cur_node.host_end = cur_node.host_start + sizeof (void *); 614 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) 615 { 616 tgt->list[i].key = NULL; 617 618 size_t align = (size_t) 1 << (kind >> rshift); 619 if (tgt_align < align) 620 tgt_align = align; 621 tgt_size = (tgt_size + align - 1) & ~(align - 1); 622 gomp_coalesce_buf_add (&cbuf, tgt_size, 623 cur_node.host_end - cur_node.host_start); 624 tgt_size += cur_node.host_end - cur_node.host_start; 625 has_firstprivate = true; 626 continue; 627 } 628 splay_tree_key n; 629 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 630 { 631 n = gomp_map_0len_lookup (mem_map, &cur_node); 632 if (!n) 633 { 634 tgt->list[i].key = NULL; 635 tgt->list[i].offset = ~(uintptr_t) 1; 636 continue; 637 } 638 } 639 else 640 n = splay_tree_lookup (mem_map, &cur_node); 641 if (n && n->refcount != REFCOUNT_LINK) 642 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], 643 kind & typemask, NULL); 644 else 645 { 646 tgt->list[i].key = NULL; 647 648 size_t align = (size_t) 1 << (kind >> rshift); 649 not_found_cnt++; 650 if (tgt_align < align) 651 tgt_align = align; 652 tgt_size = (tgt_size + align - 1) & ~(align - 1); 653 if (gomp_to_device_kind_p (kind & typemask)) 654 gomp_coalesce_buf_add (&cbuf, tgt_size, 655 cur_node.host_end - cur_node.host_start); 656 tgt_size += cur_node.host_end - cur_node.host_start; 657 if ((kind & typemask) == GOMP_MAP_TO_PSET) 658 { 659 size_t j; 660 for (j = i + 1; j < mapnum; j++) 661 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) 662 & typemask)) 663 break; 664 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 665 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 666 > cur_node.host_end)) 667 break; 668 else 669 { 670 tgt->list[j].key = NULL; 671 i++; 672 } 673 } 674 } 675 } 676 677 if (devaddrs) 678 { 679 if (mapnum != 1) 680 { 681 gomp_mutex_unlock (&devicep->lock); 682 gomp_fatal ("unexpected aggregation"); 683 } 684 tgt->to_free = devaddrs[0]; 685 tgt->tgt_start = (uintptr_t) tgt->to_free; 686 tgt->tgt_end = tgt->tgt_start + sizes[0]; 687 } 688 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) 689 { 690 /* Allocate tgt_align aligned tgt_size block of memory. */ 691 /* FIXME: Perhaps change interface to allocate properly aligned 692 memory. */ 693 tgt->to_free = devicep->alloc_func (devicep->target_id, 694 tgt_size + tgt_align - 1); 695 if (!tgt->to_free) 696 { 697 gomp_mutex_unlock (&devicep->lock); 698 gomp_fatal ("device memory allocation fail"); 699 } 700 701 tgt->tgt_start = (uintptr_t) tgt->to_free; 702 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); 703 tgt->tgt_end = tgt->tgt_start + tgt_size; 704 705 if (cbuf.use_cnt == 1) 706 cbuf.chunk_cnt--; 707 if (cbuf.chunk_cnt > 0) 708 { 709 cbuf.buf 710 = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]); 711 if (cbuf.buf) 712 { 713 cbuf.tgt = tgt; 714 cbufp = &cbuf; 715 } 716 } 717 } 718 else 719 { 720 tgt->to_free = NULL; 721 tgt->tgt_start = 0; 722 tgt->tgt_end = 0; 723 } 724 725 tgt_size = 0; 726 if (pragma_kind == GOMP_MAP_VARS_TARGET) 727 tgt_size = mapnum * sizeof (void *); 728 729 tgt->array = NULL; 730 if (not_found_cnt || has_firstprivate) 731 { 732 if (not_found_cnt) 733 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); 734 splay_tree_node array = tgt->array; 735 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; 736 uintptr_t field_tgt_base = 0; 737 738 for (i = 0; i < mapnum; i++) 739 if (tgt->list[i].key == NULL) 740 { 741 int kind = get_kind (short_mapkind, kinds, i); 742 if (hostaddrs[i] == NULL) 743 continue; 744 switch (kind & typemask) 745 { 746 size_t align, len, first, last; 747 splay_tree_key n; 748 case GOMP_MAP_FIRSTPRIVATE: 749 align = (size_t) 1 << (kind >> rshift); 750 tgt_size = (tgt_size + align - 1) & ~(align - 1); 751 tgt->list[i].offset = tgt_size; 752 len = sizes[i]; 753 gomp_copy_host2dev (devicep, 754 (void *) (tgt->tgt_start + tgt_size), 755 (void *) hostaddrs[i], len, cbufp); 756 tgt_size += len; 757 continue; 758 case GOMP_MAP_FIRSTPRIVATE_INT: 759 case GOMP_MAP_USE_DEVICE_PTR: 760 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 761 continue; 762 case GOMP_MAP_STRUCT: 763 first = i + 1; 764 last = i + sizes[i]; 765 cur_node.host_start = (uintptr_t) hostaddrs[i]; 766 cur_node.host_end = (uintptr_t) hostaddrs[last] 767 + sizes[last]; 768 if (tgt->list[first].key != NULL) 769 continue; 770 n = splay_tree_lookup (mem_map, &cur_node); 771 if (n == NULL) 772 { 773 size_t align = (size_t) 1 << (kind >> rshift); 774 tgt_size -= (uintptr_t) hostaddrs[first] 775 - (uintptr_t) hostaddrs[i]; 776 tgt_size = (tgt_size + align - 1) & ~(align - 1); 777 tgt_size += (uintptr_t) hostaddrs[first] 778 - (uintptr_t) hostaddrs[i]; 779 field_tgt_base = (uintptr_t) hostaddrs[first]; 780 field_tgt_offset = tgt_size; 781 field_tgt_clear = last; 782 tgt_size += cur_node.host_end 783 - (uintptr_t) hostaddrs[first]; 784 continue; 785 } 786 for (i = first; i <= last; i++) 787 gomp_map_fields_existing (tgt, n, first, i, hostaddrs, 788 sizes, kinds, cbufp); 789 i--; 790 continue; 791 case GOMP_MAP_ALWAYS_POINTER: 792 cur_node.host_start = (uintptr_t) hostaddrs[i]; 793 cur_node.host_end = cur_node.host_start + sizeof (void *); 794 n = splay_tree_lookup (mem_map, &cur_node); 795 if (n == NULL 796 || n->host_start > cur_node.host_start 797 || n->host_end < cur_node.host_end) 798 { 799 gomp_mutex_unlock (&devicep->lock); 800 gomp_fatal ("always pointer not mapped"); 801 } 802 if ((get_kind (short_mapkind, kinds, i - 1) & typemask) 803 != GOMP_MAP_ALWAYS_POINTER) 804 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); 805 if (cur_node.tgt_offset) 806 cur_node.tgt_offset -= sizes[i]; 807 gomp_copy_host2dev (devicep, 808 (void *) (n->tgt->tgt_start 809 + n->tgt_offset 810 + cur_node.host_start 811 - n->host_start), 812 (void *) &cur_node.tgt_offset, 813 sizeof (void *), cbufp); 814 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset 815 + cur_node.host_start - n->host_start; 816 continue; 817 default: 818 break; 819 } 820 splay_tree_key k = &array->key; 821 k->host_start = (uintptr_t) hostaddrs[i]; 822 if (!GOMP_MAP_POINTER_P (kind & typemask)) 823 k->host_end = k->host_start + sizes[i]; 824 else 825 k->host_end = k->host_start + sizeof (void *); 826 splay_tree_key n = splay_tree_lookup (mem_map, k); 827 if (n && n->refcount != REFCOUNT_LINK) 828 gomp_map_vars_existing (devicep, n, k, &tgt->list[i], 829 kind & typemask, cbufp); 830 else 831 { 832 k->link_key = NULL; 833 if (n && n->refcount == REFCOUNT_LINK) 834 { 835 /* Replace target address of the pointer with target address 836 of mapped object in the splay tree. */ 837 splay_tree_remove (mem_map, n); 838 k->link_key = n; 839 } 840 size_t align = (size_t) 1 << (kind >> rshift); 841 tgt->list[i].key = k; 842 k->tgt = tgt; 843 if (field_tgt_clear != ~(size_t) 0) 844 { 845 k->tgt_offset = k->host_start - field_tgt_base 846 + field_tgt_offset; 847 if (i == field_tgt_clear) 848 field_tgt_clear = ~(size_t) 0; 849 } 850 else 851 { 852 tgt_size = (tgt_size + align - 1) & ~(align - 1); 853 k->tgt_offset = tgt_size; 854 tgt_size += k->host_end - k->host_start; 855 } 856 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); 857 tgt->list[i].always_copy_from 858 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); 859 tgt->list[i].offset = 0; 860 tgt->list[i].length = k->host_end - k->host_start; 861 k->refcount = 1; 862 tgt->refcount++; 863 array->left = NULL; 864 array->right = NULL; 865 splay_tree_insert (mem_map, array); 866 switch (kind & typemask) 867 { 868 case GOMP_MAP_ALLOC: 869 case GOMP_MAP_FROM: 870 case GOMP_MAP_FORCE_ALLOC: 871 case GOMP_MAP_FORCE_FROM: 872 case GOMP_MAP_ALWAYS_FROM: 873 break; 874 case GOMP_MAP_TO: 875 case GOMP_MAP_TOFROM: 876 case GOMP_MAP_FORCE_TO: 877 case GOMP_MAP_FORCE_TOFROM: 878 case GOMP_MAP_ALWAYS_TO: 879 case GOMP_MAP_ALWAYS_TOFROM: 880 gomp_copy_host2dev (devicep, 881 (void *) (tgt->tgt_start 882 + k->tgt_offset), 883 (void *) k->host_start, 884 k->host_end - k->host_start, cbufp); 885 break; 886 case GOMP_MAP_POINTER: 887 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start, 888 k->tgt_offset, sizes[i], cbufp); 889 break; 890 case GOMP_MAP_TO_PSET: 891 gomp_copy_host2dev (devicep, 892 (void *) (tgt->tgt_start 893 + k->tgt_offset), 894 (void *) k->host_start, 895 k->host_end - k->host_start, cbufp); 896 897 for (j = i + 1; j < mapnum; j++) 898 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, 899 j) 900 & typemask)) 901 break; 902 else if ((uintptr_t) hostaddrs[j] < k->host_start 903 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 904 > k->host_end)) 905 break; 906 else 907 { 908 tgt->list[j].key = k; 909 tgt->list[j].copy_from = false; 910 tgt->list[j].always_copy_from = false; 911 if (k->refcount != REFCOUNT_INFINITY) 912 k->refcount++; 913 gomp_map_pointer (tgt, 914 (uintptr_t) *(void **) hostaddrs[j], 915 k->tgt_offset 916 + ((uintptr_t) hostaddrs[j] 917 - k->host_start), 918 sizes[j], cbufp); 919 i++; 920 } 921 break; 922 case GOMP_MAP_FORCE_PRESENT: 923 { 924 /* We already looked up the memory region above and it 925 was missing. */ 926 size_t size = k->host_end - k->host_start; 927 gomp_mutex_unlock (&devicep->lock); 928 #ifdef HAVE_INTTYPES_H 929 gomp_fatal ("present clause: !acc_is_present (%p, " 930 "%"PRIu64" (0x%"PRIx64"))", 931 (void *) k->host_start, 932 (uint64_t) size, (uint64_t) size); 933 #else 934 gomp_fatal ("present clause: !acc_is_present (%p, " 935 "%lu (0x%lx))", (void *) k->host_start, 936 (unsigned long) size, (unsigned long) size); 937 #endif 938 } 939 break; 940 case GOMP_MAP_FORCE_DEVICEPTR: 941 assert (k->host_end - k->host_start == sizeof (void *)); 942 gomp_copy_host2dev (devicep, 943 (void *) (tgt->tgt_start 944 + k->tgt_offset), 945 (void *) k->host_start, 946 sizeof (void *), cbufp); 947 break; 948 default: 949 gomp_mutex_unlock (&devicep->lock); 950 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, 951 kind); 952 } 953 954 if (k->link_key) 955 { 956 /* Set link pointer on target to the device address of the 957 mapped object. */ 958 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); 959 devicep->host2dev_func (devicep->target_id, 960 (void *) n->tgt_offset, 961 &tgt_addr, sizeof (void *)); 962 } 963 array++; 964 } 965 } 966 } 967 968 if (pragma_kind == GOMP_MAP_VARS_TARGET) 969 { 970 for (i = 0; i < mapnum; i++) 971 { 972 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); 973 gomp_copy_host2dev (devicep, 974 (void *) (tgt->tgt_start + i * sizeof (void *)), 975 (void *) &cur_node.tgt_offset, sizeof (void *), 976 cbufp); 977 } 978 } 979 980 if (cbufp) 981 { 982 long c = 0; 983 for (c = 0; c < cbuf.chunk_cnt; ++c) 984 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]), 985 (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]), 986 cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL); 987 free (cbuf.buf); 988 } 989 990 /* If the variable from "omp target enter data" map-list was already mapped, 991 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or 992 gomp_exit_data. */ 993 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) 994 { 995 free (tgt); 996 tgt = NULL; 997 } 998 999 gomp_mutex_unlock (&devicep->lock); 1000 return tgt; 1001 } 1002 1003 static void 1004 gomp_unmap_tgt (struct target_mem_desc *tgt) 1005 { 1006 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ 1007 if (tgt->tgt_end) 1008 gomp_free_device_memory (tgt->device_descr, tgt->to_free); 1009 1010 free (tgt->array); 1011 free (tgt); 1012 } 1013 1014 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant 1015 variables back from device to host: if it is false, it is assumed that this 1016 has been done already. */ 1017 1018 attribute_hidden void 1019 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) 1020 { 1021 struct gomp_device_descr *devicep = tgt->device_descr; 1022 1023 if (tgt->list_count == 0) 1024 { 1025 free (tgt); 1026 return; 1027 } 1028 1029 gomp_mutex_lock (&devicep->lock); 1030 if (devicep->state == GOMP_DEVICE_FINALIZED) 1031 { 1032 gomp_mutex_unlock (&devicep->lock); 1033 free (tgt->array); 1034 free (tgt); 1035 return; 1036 } 1037 1038 size_t i; 1039 for (i = 0; i < tgt->list_count; i++) 1040 { 1041 splay_tree_key k = tgt->list[i].key; 1042 if (k == NULL) 1043 continue; 1044 1045 bool do_unmap = false; 1046 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) 1047 k->refcount--; 1048 else if (k->refcount == 1) 1049 { 1050 k->refcount--; 1051 do_unmap = true; 1052 } 1053 1054 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) 1055 || tgt->list[i].always_copy_from) 1056 gomp_copy_dev2host (devicep, 1057 (void *) (k->host_start + tgt->list[i].offset), 1058 (void *) (k->tgt->tgt_start + k->tgt_offset 1059 + tgt->list[i].offset), 1060 tgt->list[i].length); 1061 if (do_unmap) 1062 { 1063 splay_tree_remove (&devicep->mem_map, k); 1064 if (k->link_key) 1065 splay_tree_insert (&devicep->mem_map, 1066 (splay_tree_node) k->link_key); 1067 if (k->tgt->refcount > 1) 1068 k->tgt->refcount--; 1069 else 1070 gomp_unmap_tgt (k->tgt); 1071 } 1072 } 1073 1074 if (tgt->refcount > 1) 1075 tgt->refcount--; 1076 else 1077 gomp_unmap_tgt (tgt); 1078 1079 gomp_mutex_unlock (&devicep->lock); 1080 } 1081 1082 static void 1083 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, 1084 size_t *sizes, void *kinds, bool short_mapkind) 1085 { 1086 size_t i; 1087 struct splay_tree_key_s cur_node; 1088 const int typemask = short_mapkind ? 0xff : 0x7; 1089 1090 if (!devicep) 1091 return; 1092 1093 if (mapnum == 0) 1094 return; 1095 1096 gomp_mutex_lock (&devicep->lock); 1097 if (devicep->state == GOMP_DEVICE_FINALIZED) 1098 { 1099 gomp_mutex_unlock (&devicep->lock); 1100 return; 1101 } 1102 1103 for (i = 0; i < mapnum; i++) 1104 if (sizes[i]) 1105 { 1106 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1107 cur_node.host_end = cur_node.host_start + sizes[i]; 1108 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 1109 if (n) 1110 { 1111 int kind = get_kind (short_mapkind, kinds, i); 1112 if (n->host_start > cur_node.host_start 1113 || n->host_end < cur_node.host_end) 1114 { 1115 gomp_mutex_unlock (&devicep->lock); 1116 gomp_fatal ("Trying to update [%p..%p) object when " 1117 "only [%p..%p) is mapped", 1118 (void *) cur_node.host_start, 1119 (void *) cur_node.host_end, 1120 (void *) n->host_start, 1121 (void *) n->host_end); 1122 } 1123 1124 1125 void *hostaddr = (void *) cur_node.host_start; 1126 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset 1127 + cur_node.host_start - n->host_start); 1128 size_t size = cur_node.host_end - cur_node.host_start; 1129 1130 if (GOMP_MAP_COPY_TO_P (kind & typemask)) 1131 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL); 1132 if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 1133 gomp_copy_dev2host (devicep, hostaddr, devaddr, size); 1134 } 1135 } 1136 gomp_mutex_unlock (&devicep->lock); 1137 } 1138 1139 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. 1140 And insert to splay tree the mapping between addresses from HOST_TABLE and 1141 from loaded target image. We rely in the host and device compiler 1142 emitting variable and functions in the same order. */ 1143 1144 static void 1145 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, 1146 const void *host_table, const void *target_data, 1147 bool is_register_lock) 1148 { 1149 void **host_func_table = ((void ***) host_table)[0]; 1150 void **host_funcs_end = ((void ***) host_table)[1]; 1151 void **host_var_table = ((void ***) host_table)[2]; 1152 void **host_vars_end = ((void ***) host_table)[3]; 1153 1154 /* The func table contains only addresses, the var table contains addresses 1155 and corresponding sizes. */ 1156 int num_funcs = host_funcs_end - host_func_table; 1157 int num_vars = (host_vars_end - host_var_table) / 2; 1158 1159 /* Load image to device and get target addresses for the image. */ 1160 struct addr_pair *target_table = NULL; 1161 int i, num_target_entries; 1162 1163 num_target_entries 1164 = devicep->load_image_func (devicep->target_id, version, 1165 target_data, &target_table); 1166 1167 if (num_target_entries != num_funcs + num_vars) 1168 { 1169 gomp_mutex_unlock (&devicep->lock); 1170 if (is_register_lock) 1171 gomp_mutex_unlock (®ister_lock); 1172 gomp_fatal ("Cannot map target functions or variables" 1173 " (expected %u, have %u)", num_funcs + num_vars, 1174 num_target_entries); 1175 } 1176 1177 /* Insert host-target address mapping into splay tree. */ 1178 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 1179 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); 1180 tgt->refcount = REFCOUNT_INFINITY; 1181 tgt->tgt_start = 0; 1182 tgt->tgt_end = 0; 1183 tgt->to_free = NULL; 1184 tgt->prev = NULL; 1185 tgt->list_count = 0; 1186 tgt->device_descr = devicep; 1187 splay_tree_node array = tgt->array; 1188 1189 for (i = 0; i < num_funcs; i++) 1190 { 1191 splay_tree_key k = &array->key; 1192 k->host_start = (uintptr_t) host_func_table[i]; 1193 k->host_end = k->host_start + 1; 1194 k->tgt = tgt; 1195 k->tgt_offset = target_table[i].start; 1196 k->refcount = REFCOUNT_INFINITY; 1197 k->link_key = NULL; 1198 array->left = NULL; 1199 array->right = NULL; 1200 splay_tree_insert (&devicep->mem_map, array); 1201 array++; 1202 } 1203 1204 /* Most significant bit of the size in host and target tables marks 1205 "omp declare target link" variables. */ 1206 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 1207 const uintptr_t size_mask = ~link_bit; 1208 1209 for (i = 0; i < num_vars; i++) 1210 { 1211 struct addr_pair *target_var = &target_table[num_funcs + i]; 1212 uintptr_t target_size = target_var->end - target_var->start; 1213 1214 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size) 1215 { 1216 gomp_mutex_unlock (&devicep->lock); 1217 if (is_register_lock) 1218 gomp_mutex_unlock (®ister_lock); 1219 gomp_fatal ("Cannot map target variables (size mismatch)"); 1220 } 1221 1222 splay_tree_key k = &array->key; 1223 k->host_start = (uintptr_t) host_var_table[i * 2]; 1224 k->host_end 1225 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 1226 k->tgt = tgt; 1227 k->tgt_offset = target_var->start; 1228 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; 1229 k->link_key = NULL; 1230 array->left = NULL; 1231 array->right = NULL; 1232 splay_tree_insert (&devicep->mem_map, array); 1233 array++; 1234 } 1235 1236 free (target_table); 1237 } 1238 1239 /* Unload the mappings described by target_data from device DEVICE_P. 1240 The device must be locked. */ 1241 1242 static void 1243 gomp_unload_image_from_device (struct gomp_device_descr *devicep, 1244 unsigned version, 1245 const void *host_table, const void *target_data) 1246 { 1247 void **host_func_table = ((void ***) host_table)[0]; 1248 void **host_funcs_end = ((void ***) host_table)[1]; 1249 void **host_var_table = ((void ***) host_table)[2]; 1250 void **host_vars_end = ((void ***) host_table)[3]; 1251 1252 /* The func table contains only addresses, the var table contains addresses 1253 and corresponding sizes. */ 1254 int num_funcs = host_funcs_end - host_func_table; 1255 int num_vars = (host_vars_end - host_var_table) / 2; 1256 1257 struct splay_tree_key_s k; 1258 splay_tree_key node = NULL; 1259 1260 /* Find mapping at start of node array */ 1261 if (num_funcs || num_vars) 1262 { 1263 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] 1264 : (uintptr_t) host_var_table[0]); 1265 k.host_end = k.host_start + 1; 1266 node = splay_tree_lookup (&devicep->mem_map, &k); 1267 } 1268 1269 if (!devicep->unload_image_func (devicep->target_id, version, target_data)) 1270 { 1271 gomp_mutex_unlock (&devicep->lock); 1272 gomp_fatal ("image unload fail"); 1273 } 1274 1275 /* Remove mappings from splay tree. */ 1276 int i; 1277 for (i = 0; i < num_funcs; i++) 1278 { 1279 k.host_start = (uintptr_t) host_func_table[i]; 1280 k.host_end = k.host_start + 1; 1281 splay_tree_remove (&devicep->mem_map, &k); 1282 } 1283 1284 /* Most significant bit of the size in host and target tables marks 1285 "omp declare target link" variables. */ 1286 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 1287 const uintptr_t size_mask = ~link_bit; 1288 bool is_tgt_unmapped = false; 1289 1290 for (i = 0; i < num_vars; i++) 1291 { 1292 k.host_start = (uintptr_t) host_var_table[i * 2]; 1293 k.host_end 1294 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 1295 1296 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) 1297 splay_tree_remove (&devicep->mem_map, &k); 1298 else 1299 { 1300 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); 1301 splay_tree_remove (&devicep->mem_map, n); 1302 if (n->link_key) 1303 { 1304 if (n->tgt->refcount > 1) 1305 n->tgt->refcount--; 1306 else 1307 { 1308 is_tgt_unmapped = true; 1309 gomp_unmap_tgt (n->tgt); 1310 } 1311 } 1312 } 1313 } 1314 1315 if (node && !is_tgt_unmapped) 1316 { 1317 free (node->tgt); 1318 free (node); 1319 } 1320 } 1321 1322 /* This function should be called from every offload image while loading. 1323 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 1324 the target, and TARGET_DATA needed by target plugin. */ 1325 1326 void 1327 GOMP_offload_register_ver (unsigned version, const void *host_table, 1328 int target_type, const void *target_data) 1329 { 1330 int i; 1331 1332 if (GOMP_VERSION_LIB (version) > GOMP_VERSION) 1333 gomp_fatal ("Library too old for offload (version %u < %u)", 1334 GOMP_VERSION, GOMP_VERSION_LIB (version)); 1335 1336 gomp_mutex_lock (®ister_lock); 1337 1338 /* Load image to all initialized devices. */ 1339 for (i = 0; i < num_devices; i++) 1340 { 1341 struct gomp_device_descr *devicep = &devices[i]; 1342 gomp_mutex_lock (&devicep->lock); 1343 if (devicep->type == target_type 1344 && devicep->state == GOMP_DEVICE_INITIALIZED) 1345 gomp_load_image_to_device (devicep, version, 1346 host_table, target_data, true); 1347 gomp_mutex_unlock (&devicep->lock); 1348 } 1349 1350 /* Insert image to array of pending images. */ 1351 offload_images 1352 = gomp_realloc_unlock (offload_images, 1353 (num_offload_images + 1) 1354 * sizeof (struct offload_image_descr)); 1355 offload_images[num_offload_images].version = version; 1356 offload_images[num_offload_images].type = target_type; 1357 offload_images[num_offload_images].host_table = host_table; 1358 offload_images[num_offload_images].target_data = target_data; 1359 1360 num_offload_images++; 1361 gomp_mutex_unlock (®ister_lock); 1362 } 1363 1364 void 1365 GOMP_offload_register (const void *host_table, int target_type, 1366 const void *target_data) 1367 { 1368 GOMP_offload_register_ver (0, host_table, target_type, target_data); 1369 } 1370 1371 /* This function should be called from every offload image while unloading. 1372 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 1373 the target, and TARGET_DATA needed by target plugin. */ 1374 1375 void 1376 GOMP_offload_unregister_ver (unsigned version, const void *host_table, 1377 int target_type, const void *target_data) 1378 { 1379 int i; 1380 1381 gomp_mutex_lock (®ister_lock); 1382 1383 /* Unload image from all initialized devices. */ 1384 for (i = 0; i < num_devices; i++) 1385 { 1386 struct gomp_device_descr *devicep = &devices[i]; 1387 gomp_mutex_lock (&devicep->lock); 1388 if (devicep->type == target_type 1389 && devicep->state == GOMP_DEVICE_INITIALIZED) 1390 gomp_unload_image_from_device (devicep, version, 1391 host_table, target_data); 1392 gomp_mutex_unlock (&devicep->lock); 1393 } 1394 1395 /* Remove image from array of pending images. */ 1396 for (i = 0; i < num_offload_images; i++) 1397 if (offload_images[i].target_data == target_data) 1398 { 1399 offload_images[i] = offload_images[--num_offload_images]; 1400 break; 1401 } 1402 1403 gomp_mutex_unlock (®ister_lock); 1404 } 1405 1406 void 1407 GOMP_offload_unregister (const void *host_table, int target_type, 1408 const void *target_data) 1409 { 1410 GOMP_offload_unregister_ver (0, host_table, target_type, target_data); 1411 } 1412 1413 /* This function initializes the target device, specified by DEVICEP. DEVICEP 1414 must be locked on entry, and remains locked on return. */ 1415 1416 attribute_hidden void 1417 gomp_init_device (struct gomp_device_descr *devicep) 1418 { 1419 int i; 1420 if (!devicep->init_device_func (devicep->target_id)) 1421 { 1422 gomp_mutex_unlock (&devicep->lock); 1423 gomp_fatal ("device initialization failed"); 1424 } 1425 1426 /* Load to device all images registered by the moment. */ 1427 for (i = 0; i < num_offload_images; i++) 1428 { 1429 struct offload_image_descr *image = &offload_images[i]; 1430 if (image->type == devicep->type) 1431 gomp_load_image_to_device (devicep, image->version, 1432 image->host_table, image->target_data, 1433 false); 1434 } 1435 1436 devicep->state = GOMP_DEVICE_INITIALIZED; 1437 } 1438 1439 attribute_hidden void 1440 gomp_unload_device (struct gomp_device_descr *devicep) 1441 { 1442 if (devicep->state == GOMP_DEVICE_INITIALIZED) 1443 { 1444 unsigned i; 1445 1446 /* Unload from device all images registered at the moment. */ 1447 for (i = 0; i < num_offload_images; i++) 1448 { 1449 struct offload_image_descr *image = &offload_images[i]; 1450 if (image->type == devicep->type) 1451 gomp_unload_image_from_device (devicep, image->version, 1452 image->host_table, 1453 image->target_data); 1454 } 1455 } 1456 } 1457 1458 /* Free address mapping tables. MM must be locked on entry, and remains locked 1459 on return. */ 1460 1461 attribute_hidden void 1462 gomp_free_memmap (struct splay_tree_s *mem_map) 1463 { 1464 while (mem_map->root) 1465 { 1466 struct target_mem_desc *tgt = mem_map->root->key.tgt; 1467 1468 splay_tree_remove (mem_map, &mem_map->root->key); 1469 free (tgt->array); 1470 free (tgt); 1471 } 1472 } 1473 1474 /* Host fallback for GOMP_target{,_ext} routines. */ 1475 1476 static void 1477 gomp_target_fallback (void (*fn) (void *), void **hostaddrs) 1478 { 1479 struct gomp_thread old_thr, *thr = gomp_thread (); 1480 old_thr = *thr; 1481 memset (thr, '\0', sizeof (*thr)); 1482 if (gomp_places_list) 1483 { 1484 thr->place = old_thr.place; 1485 thr->ts.place_partition_len = gomp_places_list_len; 1486 } 1487 fn (hostaddrs); 1488 gomp_free_thread (thr); 1489 *thr = old_thr; 1490 } 1491 1492 /* Calculate alignment and size requirements of a private copy of data shared 1493 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ 1494 1495 static inline void 1496 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, 1497 unsigned short *kinds, size_t *tgt_align, 1498 size_t *tgt_size) 1499 { 1500 size_t i; 1501 for (i = 0; i < mapnum; i++) 1502 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 1503 { 1504 size_t align = (size_t) 1 << (kinds[i] >> 8); 1505 if (*tgt_align < align) 1506 *tgt_align = align; 1507 *tgt_size = (*tgt_size + align - 1) & ~(align - 1); 1508 *tgt_size += sizes[i]; 1509 } 1510 } 1511 1512 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ 1513 1514 static inline void 1515 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, 1516 size_t *sizes, unsigned short *kinds, size_t tgt_align, 1517 size_t tgt_size) 1518 { 1519 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 1520 if (al) 1521 tgt += tgt_align - al; 1522 tgt_size = 0; 1523 size_t i; 1524 for (i = 0; i < mapnum; i++) 1525 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 1526 { 1527 size_t align = (size_t) 1 << (kinds[i] >> 8); 1528 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1529 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); 1530 hostaddrs[i] = tgt + tgt_size; 1531 tgt_size = tgt_size + sizes[i]; 1532 } 1533 } 1534 1535 /* Helper function of GOMP_target{,_ext} routines. */ 1536 1537 static void * 1538 gomp_get_target_fn_addr (struct gomp_device_descr *devicep, 1539 void (*host_fn) (void *)) 1540 { 1541 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) 1542 return (void *) host_fn; 1543 else 1544 { 1545 gomp_mutex_lock (&devicep->lock); 1546 if (devicep->state == GOMP_DEVICE_FINALIZED) 1547 { 1548 gomp_mutex_unlock (&devicep->lock); 1549 return NULL; 1550 } 1551 1552 struct splay_tree_key_s k; 1553 k.host_start = (uintptr_t) host_fn; 1554 k.host_end = k.host_start + 1; 1555 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); 1556 gomp_mutex_unlock (&devicep->lock); 1557 if (tgt_fn == NULL) 1558 return NULL; 1559 1560 return (void *) tgt_fn->tgt_offset; 1561 } 1562 } 1563 1564 /* Called when encountering a target directive. If DEVICE 1565 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is 1566 GOMP_DEVICE_HOST_FALLBACK (or any value 1567 larger than last available hw device), use host fallback. 1568 FN is address of host code, UNUSED is part of the current ABI, but 1569 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays 1570 with MAPNUM entries, with addresses of the host objects, 1571 sizes of the host objects (resp. for pointer kind pointer bias 1572 and assumed sizeof (void *) size) and kinds. */ 1573 1574 void 1575 GOMP_target (int device, void (*fn) (void *), const void *unused, 1576 size_t mapnum, void **hostaddrs, size_t *sizes, 1577 unsigned char *kinds) 1578 { 1579 struct gomp_device_descr *devicep = resolve_device (device); 1580 1581 void *fn_addr; 1582 if (devicep == NULL 1583 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1584 /* All shared memory devices should use the GOMP_target_ext function. */ 1585 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM 1586 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) 1587 return gomp_target_fallback (fn, hostaddrs); 1588 1589 struct target_mem_desc *tgt_vars 1590 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 1591 GOMP_MAP_VARS_TARGET); 1592 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, 1593 NULL); 1594 gomp_unmap_vars (tgt_vars, true); 1595 } 1596 1597 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, 1598 and several arguments have been added: 1599 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. 1600 DEPEND is array of dependencies, see GOMP_task for details. 1601 1602 ARGS is a pointer to an array consisting of a variable number of both 1603 device-independent and device-specific arguments, which can take one two 1604 elements where the first specifies for which device it is intended, the type 1605 and optionally also the value. If the value is not present in the first 1606 one, the whole second element the actual value. The last element of the 1607 array is a single NULL. Among the device independent can be for example 1608 NUM_TEAMS and THREAD_LIMIT. 1609 1610 NUM_TEAMS is positive if GOMP_teams will be called in the body with 1611 that value, or 1 if teams construct is not present, or 0, if 1612 teams construct does not have num_teams clause and so the choice is 1613 implementation defined, and -1 if it can't be determined on the host 1614 what value will GOMP_teams have on the device. 1615 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the 1616 body with that value, or 0, if teams construct does not have thread_limit 1617 clause or the teams construct is not present, or -1 if it can't be 1618 determined on the host what value will GOMP_teams have on the device. */ 1619 1620 void 1621 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, 1622 void **hostaddrs, size_t *sizes, unsigned short *kinds, 1623 unsigned int flags, void **depend, void **args) 1624 { 1625 struct gomp_device_descr *devicep = resolve_device (device); 1626 size_t tgt_align = 0, tgt_size = 0; 1627 bool fpc_done = false; 1628 1629 if (flags & GOMP_TARGET_FLAG_NOWAIT) 1630 { 1631 struct gomp_thread *thr = gomp_thread (); 1632 /* Create a team if we don't have any around, as nowait 1633 target tasks make sense to run asynchronously even when 1634 outside of any parallel. */ 1635 if (__builtin_expect (thr->ts.team == NULL, 0)) 1636 { 1637 struct gomp_team *team = gomp_new_team (1); 1638 struct gomp_task *task = thr->task; 1639 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; 1640 team->prev_ts = thr->ts; 1641 thr->ts.team = team; 1642 thr->ts.team_id = 0; 1643 thr->ts.work_share = &team->work_shares[0]; 1644 thr->ts.last_work_share = NULL; 1645 #ifdef HAVE_SYNC_BUILTINS 1646 thr->ts.single_count = 0; 1647 #endif 1648 thr->ts.static_trip = 0; 1649 thr->task = &team->implicit_task[0]; 1650 gomp_init_task (thr->task, NULL, icv); 1651 if (task) 1652 { 1653 thr->task = task; 1654 gomp_end_task (); 1655 free (task); 1656 thr->task = &team->implicit_task[0]; 1657 } 1658 else 1659 pthread_setspecific (gomp_thread_destructor, thr); 1660 } 1661 if (thr->ts.team 1662 && !thr->task->final_task) 1663 { 1664 gomp_create_target_task (devicep, fn, mapnum, hostaddrs, 1665 sizes, kinds, flags, depend, args, 1666 GOMP_TARGET_TASK_BEFORE_MAP); 1667 return; 1668 } 1669 } 1670 1671 /* If there are depend clauses, but nowait is not present 1672 (or we are in a final task), block the parent task until the 1673 dependencies are resolved and then just continue with the rest 1674 of the function as if it is a merged task. */ 1675 if (depend != NULL) 1676 { 1677 struct gomp_thread *thr = gomp_thread (); 1678 if (thr->task && thr->task->depend_hash) 1679 { 1680 /* If we might need to wait, copy firstprivate now. */ 1681 calculate_firstprivate_requirements (mapnum, sizes, kinds, 1682 &tgt_align, &tgt_size); 1683 if (tgt_align) 1684 { 1685 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 1686 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 1687 tgt_align, tgt_size); 1688 } 1689 fpc_done = true; 1690 gomp_task_maybe_wait_for_dependencies (depend); 1691 } 1692 } 1693 1694 void *fn_addr; 1695 if (devicep == NULL 1696 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1697 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) 1698 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 1699 { 1700 if (!fpc_done) 1701 { 1702 calculate_firstprivate_requirements (mapnum, sizes, kinds, 1703 &tgt_align, &tgt_size); 1704 if (tgt_align) 1705 { 1706 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 1707 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 1708 tgt_align, tgt_size); 1709 } 1710 } 1711 gomp_target_fallback (fn, hostaddrs); 1712 return; 1713 } 1714 1715 struct target_mem_desc *tgt_vars; 1716 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1717 { 1718 if (!fpc_done) 1719 { 1720 calculate_firstprivate_requirements (mapnum, sizes, kinds, 1721 &tgt_align, &tgt_size); 1722 if (tgt_align) 1723 { 1724 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 1725 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 1726 tgt_align, tgt_size); 1727 } 1728 } 1729 tgt_vars = NULL; 1730 } 1731 else 1732 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, 1733 true, GOMP_MAP_VARS_TARGET); 1734 devicep->run_func (devicep->target_id, fn_addr, 1735 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, 1736 args); 1737 if (tgt_vars) 1738 gomp_unmap_vars (tgt_vars, true); 1739 } 1740 1741 /* Host fallback for GOMP_target_data{,_ext} routines. */ 1742 1743 static void 1744 gomp_target_data_fallback (void) 1745 { 1746 struct gomp_task_icv *icv = gomp_icv (false); 1747 if (icv->target_data) 1748 { 1749 /* Even when doing a host fallback, if there are any active 1750 #pragma omp target data constructs, need to remember the 1751 new #pragma omp target data, otherwise GOMP_target_end_data 1752 would get out of sync. */ 1753 struct target_mem_desc *tgt 1754 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, 1755 GOMP_MAP_VARS_DATA); 1756 tgt->prev = icv->target_data; 1757 icv->target_data = tgt; 1758 } 1759 } 1760 1761 void 1762 GOMP_target_data (int device, const void *unused, size_t mapnum, 1763 void **hostaddrs, size_t *sizes, unsigned char *kinds) 1764 { 1765 struct gomp_device_descr *devicep = resolve_device (device); 1766 1767 if (devicep == NULL 1768 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1769 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) 1770 return gomp_target_data_fallback (); 1771 1772 struct target_mem_desc *tgt 1773 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 1774 GOMP_MAP_VARS_DATA); 1775 struct gomp_task_icv *icv = gomp_icv (true); 1776 tgt->prev = icv->target_data; 1777 icv->target_data = tgt; 1778 } 1779 1780 void 1781 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, 1782 size_t *sizes, unsigned short *kinds) 1783 { 1784 struct gomp_device_descr *devicep = resolve_device (device); 1785 1786 if (devicep == NULL 1787 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1788 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1789 return gomp_target_data_fallback (); 1790 1791 struct target_mem_desc *tgt 1792 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 1793 GOMP_MAP_VARS_DATA); 1794 struct gomp_task_icv *icv = gomp_icv (true); 1795 tgt->prev = icv->target_data; 1796 icv->target_data = tgt; 1797 } 1798 1799 void 1800 GOMP_target_end_data (void) 1801 { 1802 struct gomp_task_icv *icv = gomp_icv (false); 1803 if (icv->target_data) 1804 { 1805 struct target_mem_desc *tgt = icv->target_data; 1806 icv->target_data = tgt->prev; 1807 gomp_unmap_vars (tgt, true); 1808 } 1809 } 1810 1811 void 1812 GOMP_target_update (int device, const void *unused, size_t mapnum, 1813 void **hostaddrs, size_t *sizes, unsigned char *kinds) 1814 { 1815 struct gomp_device_descr *devicep = resolve_device (device); 1816 1817 if (devicep == NULL 1818 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1819 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1820 return; 1821 1822 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); 1823 } 1824 1825 void 1826 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, 1827 size_t *sizes, unsigned short *kinds, 1828 unsigned int flags, void **depend) 1829 { 1830 struct gomp_device_descr *devicep = resolve_device (device); 1831 1832 /* If there are depend clauses, but nowait is not present, 1833 block the parent task until the dependencies are resolved 1834 and then just continue with the rest of the function as if it 1835 is a merged task. Until we are able to schedule task during 1836 variable mapping or unmapping, ignore nowait if depend clauses 1837 are not present. */ 1838 if (depend != NULL) 1839 { 1840 struct gomp_thread *thr = gomp_thread (); 1841 if (thr->task && thr->task->depend_hash) 1842 { 1843 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 1844 && thr->ts.team 1845 && !thr->task->final_task) 1846 { 1847 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 1848 mapnum, hostaddrs, sizes, kinds, 1849 flags | GOMP_TARGET_FLAG_UPDATE, 1850 depend, NULL, GOMP_TARGET_TASK_DATA)) 1851 return; 1852 } 1853 else 1854 { 1855 struct gomp_team *team = thr->ts.team; 1856 /* If parallel or taskgroup has been cancelled, don't start new 1857 tasks. */ 1858 if (team 1859 && (gomp_team_barrier_cancelled (&team->barrier) 1860 || (thr->task->taskgroup 1861 && thr->task->taskgroup->cancelled))) 1862 return; 1863 1864 gomp_task_maybe_wait_for_dependencies (depend); 1865 } 1866 } 1867 } 1868 1869 if (devicep == NULL 1870 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1871 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1872 return; 1873 1874 struct gomp_thread *thr = gomp_thread (); 1875 struct gomp_team *team = thr->ts.team; 1876 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 1877 if (team 1878 && (gomp_team_barrier_cancelled (&team->barrier) 1879 || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) 1880 return; 1881 1882 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); 1883 } 1884 1885 static void 1886 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, 1887 void **hostaddrs, size_t *sizes, unsigned short *kinds) 1888 { 1889 const int typemask = 0xff; 1890 size_t i; 1891 gomp_mutex_lock (&devicep->lock); 1892 if (devicep->state == GOMP_DEVICE_FINALIZED) 1893 { 1894 gomp_mutex_unlock (&devicep->lock); 1895 return; 1896 } 1897 1898 for (i = 0; i < mapnum; i++) 1899 { 1900 struct splay_tree_key_s cur_node; 1901 unsigned char kind = kinds[i] & typemask; 1902 switch (kind) 1903 { 1904 case GOMP_MAP_FROM: 1905 case GOMP_MAP_ALWAYS_FROM: 1906 case GOMP_MAP_DELETE: 1907 case GOMP_MAP_RELEASE: 1908 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 1909 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: 1910 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1911 cur_node.host_end = cur_node.host_start + sizes[i]; 1912 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION 1913 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 1914 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) 1915 : splay_tree_lookup (&devicep->mem_map, &cur_node); 1916 if (!k) 1917 continue; 1918 1919 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) 1920 k->refcount--; 1921 if ((kind == GOMP_MAP_DELETE 1922 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) 1923 && k->refcount != REFCOUNT_INFINITY) 1924 k->refcount = 0; 1925 1926 if ((kind == GOMP_MAP_FROM && k->refcount == 0) 1927 || kind == GOMP_MAP_ALWAYS_FROM) 1928 gomp_copy_dev2host (devicep, (void *) cur_node.host_start, 1929 (void *) (k->tgt->tgt_start + k->tgt_offset 1930 + cur_node.host_start 1931 - k->host_start), 1932 cur_node.host_end - cur_node.host_start); 1933 if (k->refcount == 0) 1934 { 1935 splay_tree_remove (&devicep->mem_map, k); 1936 if (k->link_key) 1937 splay_tree_insert (&devicep->mem_map, 1938 (splay_tree_node) k->link_key); 1939 if (k->tgt->refcount > 1) 1940 k->tgt->refcount--; 1941 else 1942 gomp_unmap_tgt (k->tgt); 1943 } 1944 1945 break; 1946 default: 1947 gomp_mutex_unlock (&devicep->lock); 1948 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", 1949 kind); 1950 } 1951 } 1952 1953 gomp_mutex_unlock (&devicep->lock); 1954 } 1955 1956 void 1957 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, 1958 size_t *sizes, unsigned short *kinds, 1959 unsigned int flags, void **depend) 1960 { 1961 struct gomp_device_descr *devicep = resolve_device (device); 1962 1963 /* If there are depend clauses, but nowait is not present, 1964 block the parent task until the dependencies are resolved 1965 and then just continue with the rest of the function as if it 1966 is a merged task. Until we are able to schedule task during 1967 variable mapping or unmapping, ignore nowait if depend clauses 1968 are not present. */ 1969 if (depend != NULL) 1970 { 1971 struct gomp_thread *thr = gomp_thread (); 1972 if (thr->task && thr->task->depend_hash) 1973 { 1974 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 1975 && thr->ts.team 1976 && !thr->task->final_task) 1977 { 1978 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 1979 mapnum, hostaddrs, sizes, kinds, 1980 flags, depend, NULL, 1981 GOMP_TARGET_TASK_DATA)) 1982 return; 1983 } 1984 else 1985 { 1986 struct gomp_team *team = thr->ts.team; 1987 /* If parallel or taskgroup has been cancelled, don't start new 1988 tasks. */ 1989 if (team 1990 && (gomp_team_barrier_cancelled (&team->barrier) 1991 || (thr->task->taskgroup 1992 && thr->task->taskgroup->cancelled))) 1993 return; 1994 1995 gomp_task_maybe_wait_for_dependencies (depend); 1996 } 1997 } 1998 } 1999 2000 if (devicep == NULL 2001 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2002 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2003 return; 2004 2005 struct gomp_thread *thr = gomp_thread (); 2006 struct gomp_team *team = thr->ts.team; 2007 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 2008 if (team 2009 && (gomp_team_barrier_cancelled (&team->barrier) 2010 || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) 2011 return; 2012 2013 size_t i; 2014 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 2015 for (i = 0; i < mapnum; i++) 2016 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) 2017 { 2018 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], 2019 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2020 i += sizes[i]; 2021 } 2022 else 2023 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 2024 true, GOMP_MAP_VARS_ENTER_DATA); 2025 else 2026 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); 2027 } 2028 2029 bool 2030 gomp_target_task_fn (void *data) 2031 { 2032 struct gomp_target_task *ttask = (struct gomp_target_task *) data; 2033 struct gomp_device_descr *devicep = ttask->devicep; 2034 2035 if (ttask->fn != NULL) 2036 { 2037 void *fn_addr; 2038 if (devicep == NULL 2039 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2040 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) 2041 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 2042 { 2043 ttask->state = GOMP_TARGET_TASK_FALLBACK; 2044 gomp_target_fallback (ttask->fn, ttask->hostaddrs); 2045 return false; 2046 } 2047 2048 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 2049 { 2050 if (ttask->tgt) 2051 gomp_unmap_vars (ttask->tgt, true); 2052 return false; 2053 } 2054 2055 void *actual_arguments; 2056 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2057 { 2058 ttask->tgt = NULL; 2059 actual_arguments = ttask->hostaddrs; 2060 } 2061 else 2062 { 2063 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, 2064 NULL, ttask->sizes, ttask->kinds, true, 2065 GOMP_MAP_VARS_TARGET); 2066 actual_arguments = (void *) ttask->tgt->tgt_start; 2067 } 2068 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; 2069 2070 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, 2071 ttask->args, (void *) ttask); 2072 return true; 2073 } 2074 else if (devicep == NULL 2075 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2076 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2077 return false; 2078 2079 size_t i; 2080 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) 2081 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 2082 ttask->kinds, true); 2083 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 2084 for (i = 0; i < ttask->mapnum; i++) 2085 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) 2086 { 2087 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], 2088 NULL, &ttask->sizes[i], &ttask->kinds[i], true, 2089 GOMP_MAP_VARS_ENTER_DATA); 2090 i += ttask->sizes[i]; 2091 } 2092 else 2093 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], 2094 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2095 else 2096 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 2097 ttask->kinds); 2098 return false; 2099 } 2100 2101 void 2102 GOMP_teams (unsigned int num_teams, unsigned int thread_limit) 2103 { 2104 if (thread_limit) 2105 { 2106 struct gomp_task_icv *icv = gomp_icv (true); 2107 icv->thread_limit_var 2108 = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 2109 } 2110 (void) num_teams; 2111 } 2112 2113 void * 2114 omp_target_alloc (size_t size, int device_num) 2115 { 2116 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2117 return malloc (size); 2118 2119 if (device_num < 0) 2120 return NULL; 2121 2122 struct gomp_device_descr *devicep = resolve_device (device_num); 2123 if (devicep == NULL) 2124 return NULL; 2125 2126 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2127 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2128 return malloc (size); 2129 2130 gomp_mutex_lock (&devicep->lock); 2131 void *ret = devicep->alloc_func (devicep->target_id, size); 2132 gomp_mutex_unlock (&devicep->lock); 2133 return ret; 2134 } 2135 2136 void 2137 omp_target_free (void *device_ptr, int device_num) 2138 { 2139 if (device_ptr == NULL) 2140 return; 2141 2142 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2143 { 2144 free (device_ptr); 2145 return; 2146 } 2147 2148 if (device_num < 0) 2149 return; 2150 2151 struct gomp_device_descr *devicep = resolve_device (device_num); 2152 if (devicep == NULL) 2153 return; 2154 2155 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2156 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2157 { 2158 free (device_ptr); 2159 return; 2160 } 2161 2162 gomp_mutex_lock (&devicep->lock); 2163 gomp_free_device_memory (devicep, device_ptr); 2164 gomp_mutex_unlock (&devicep->lock); 2165 } 2166 2167 int 2168 omp_target_is_present (void *ptr, int device_num) 2169 { 2170 if (ptr == NULL) 2171 return 1; 2172 2173 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2174 return 1; 2175 2176 if (device_num < 0) 2177 return 0; 2178 2179 struct gomp_device_descr *devicep = resolve_device (device_num); 2180 if (devicep == NULL) 2181 return 0; 2182 2183 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2184 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2185 return 1; 2186 2187 gomp_mutex_lock (&devicep->lock); 2188 struct splay_tree_s *mem_map = &devicep->mem_map; 2189 struct splay_tree_key_s cur_node; 2190 2191 cur_node.host_start = (uintptr_t) ptr; 2192 cur_node.host_end = cur_node.host_start; 2193 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); 2194 int ret = n != NULL; 2195 gomp_mutex_unlock (&devicep->lock); 2196 return ret; 2197 } 2198 2199 int 2200 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, 2201 size_t src_offset, int dst_device_num, int src_device_num) 2202 { 2203 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 2204 bool ret; 2205 2206 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) 2207 { 2208 if (dst_device_num < 0) 2209 return EINVAL; 2210 2211 dst_devicep = resolve_device (dst_device_num); 2212 if (dst_devicep == NULL) 2213 return EINVAL; 2214 2215 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2216 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2217 dst_devicep = NULL; 2218 } 2219 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) 2220 { 2221 if (src_device_num < 0) 2222 return EINVAL; 2223 2224 src_devicep = resolve_device (src_device_num); 2225 if (src_devicep == NULL) 2226 return EINVAL; 2227 2228 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2229 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2230 src_devicep = NULL; 2231 } 2232 if (src_devicep == NULL && dst_devicep == NULL) 2233 { 2234 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); 2235 return 0; 2236 } 2237 if (src_devicep == NULL) 2238 { 2239 gomp_mutex_lock (&dst_devicep->lock); 2240 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 2241 (char *) dst + dst_offset, 2242 (char *) src + src_offset, length); 2243 gomp_mutex_unlock (&dst_devicep->lock); 2244 return (ret ? 0 : EINVAL); 2245 } 2246 if (dst_devicep == NULL) 2247 { 2248 gomp_mutex_lock (&src_devicep->lock); 2249 ret = src_devicep->dev2host_func (src_devicep->target_id, 2250 (char *) dst + dst_offset, 2251 (char *) src + src_offset, length); 2252 gomp_mutex_unlock (&src_devicep->lock); 2253 return (ret ? 0 : EINVAL); 2254 } 2255 if (src_devicep == dst_devicep) 2256 { 2257 gomp_mutex_lock (&src_devicep->lock); 2258 ret = src_devicep->dev2dev_func (src_devicep->target_id, 2259 (char *) dst + dst_offset, 2260 (char *) src + src_offset, length); 2261 gomp_mutex_unlock (&src_devicep->lock); 2262 return (ret ? 0 : EINVAL); 2263 } 2264 return EINVAL; 2265 } 2266 2267 static int 2268 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, 2269 int num_dims, const size_t *volume, 2270 const size_t *dst_offsets, 2271 const size_t *src_offsets, 2272 const size_t *dst_dimensions, 2273 const size_t *src_dimensions, 2274 struct gomp_device_descr *dst_devicep, 2275 struct gomp_device_descr *src_devicep) 2276 { 2277 size_t dst_slice = element_size; 2278 size_t src_slice = element_size; 2279 size_t j, dst_off, src_off, length; 2280 int i, ret; 2281 2282 if (num_dims == 1) 2283 { 2284 if (__builtin_mul_overflow (element_size, volume[0], &length) 2285 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) 2286 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) 2287 return EINVAL; 2288 if (dst_devicep == NULL && src_devicep == NULL) 2289 { 2290 memcpy ((char *) dst + dst_off, (char *) src + src_off, length); 2291 ret = 1; 2292 } 2293 else if (src_devicep == NULL) 2294 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 2295 (char *) dst + dst_off, 2296 (char *) src + src_off, length); 2297 else if (dst_devicep == NULL) 2298 ret = src_devicep->dev2host_func (src_devicep->target_id, 2299 (char *) dst + dst_off, 2300 (char *) src + src_off, length); 2301 else if (src_devicep == dst_devicep) 2302 ret = src_devicep->dev2dev_func (src_devicep->target_id, 2303 (char *) dst + dst_off, 2304 (char *) src + src_off, length); 2305 else 2306 ret = 0; 2307 return ret ? 0 : EINVAL; 2308 } 2309 2310 /* FIXME: it would be nice to have some plugin function to handle 2311 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can 2312 be handled in the generic recursion below, and for host-host it 2313 should be used even for any num_dims >= 2. */ 2314 2315 for (i = 1; i < num_dims; i++) 2316 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) 2317 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) 2318 return EINVAL; 2319 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) 2320 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) 2321 return EINVAL; 2322 for (j = 0; j < volume[0]; j++) 2323 { 2324 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, 2325 (char *) src + src_off, 2326 element_size, num_dims - 1, 2327 volume + 1, dst_offsets + 1, 2328 src_offsets + 1, dst_dimensions + 1, 2329 src_dimensions + 1, dst_devicep, 2330 src_devicep); 2331 if (ret) 2332 return ret; 2333 dst_off += dst_slice; 2334 src_off += src_slice; 2335 } 2336 return 0; 2337 } 2338 2339 int 2340 omp_target_memcpy_rect (void *dst, void *src, size_t element_size, 2341 int num_dims, const size_t *volume, 2342 const size_t *dst_offsets, 2343 const size_t *src_offsets, 2344 const size_t *dst_dimensions, 2345 const size_t *src_dimensions, 2346 int dst_device_num, int src_device_num) 2347 { 2348 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 2349 2350 if (!dst && !src) 2351 return INT_MAX; 2352 2353 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) 2354 { 2355 if (dst_device_num < 0) 2356 return EINVAL; 2357 2358 dst_devicep = resolve_device (dst_device_num); 2359 if (dst_devicep == NULL) 2360 return EINVAL; 2361 2362 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2363 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2364 dst_devicep = NULL; 2365 } 2366 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) 2367 { 2368 if (src_device_num < 0) 2369 return EINVAL; 2370 2371 src_devicep = resolve_device (src_device_num); 2372 if (src_devicep == NULL) 2373 return EINVAL; 2374 2375 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2376 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2377 src_devicep = NULL; 2378 } 2379 2380 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) 2381 return EINVAL; 2382 2383 if (src_devicep) 2384 gomp_mutex_lock (&src_devicep->lock); 2385 else if (dst_devicep) 2386 gomp_mutex_lock (&dst_devicep->lock); 2387 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, 2388 volume, dst_offsets, src_offsets, 2389 dst_dimensions, src_dimensions, 2390 dst_devicep, src_devicep); 2391 if (src_devicep) 2392 gomp_mutex_unlock (&src_devicep->lock); 2393 else if (dst_devicep) 2394 gomp_mutex_unlock (&dst_devicep->lock); 2395 return ret; 2396 } 2397 2398 int 2399 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, 2400 size_t device_offset, int device_num) 2401 { 2402 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2403 return EINVAL; 2404 2405 if (device_num < 0) 2406 return EINVAL; 2407 2408 struct gomp_device_descr *devicep = resolve_device (device_num); 2409 if (devicep == NULL) 2410 return EINVAL; 2411 2412 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2413 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2414 return EINVAL; 2415 2416 gomp_mutex_lock (&devicep->lock); 2417 2418 struct splay_tree_s *mem_map = &devicep->mem_map; 2419 struct splay_tree_key_s cur_node; 2420 int ret = EINVAL; 2421 2422 cur_node.host_start = (uintptr_t) host_ptr; 2423 cur_node.host_end = cur_node.host_start + size; 2424 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 2425 if (n) 2426 { 2427 if (n->tgt->tgt_start + n->tgt_offset 2428 == (uintptr_t) device_ptr + device_offset 2429 && n->host_start <= cur_node.host_start 2430 && n->host_end >= cur_node.host_end) 2431 ret = 0; 2432 } 2433 else 2434 { 2435 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 2436 tgt->array = gomp_malloc (sizeof (*tgt->array)); 2437 tgt->refcount = 1; 2438 tgt->tgt_start = 0; 2439 tgt->tgt_end = 0; 2440 tgt->to_free = NULL; 2441 tgt->prev = NULL; 2442 tgt->list_count = 0; 2443 tgt->device_descr = devicep; 2444 splay_tree_node array = tgt->array; 2445 splay_tree_key k = &array->key; 2446 k->host_start = cur_node.host_start; 2447 k->host_end = cur_node.host_end; 2448 k->tgt = tgt; 2449 k->tgt_offset = (uintptr_t) device_ptr + device_offset; 2450 k->refcount = REFCOUNT_INFINITY; 2451 array->left = NULL; 2452 array->right = NULL; 2453 splay_tree_insert (&devicep->mem_map, array); 2454 ret = 0; 2455 } 2456 gomp_mutex_unlock (&devicep->lock); 2457 return ret; 2458 } 2459 2460 int 2461 omp_target_disassociate_ptr (void *ptr, int device_num) 2462 { 2463 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2464 return EINVAL; 2465 2466 if (device_num < 0) 2467 return EINVAL; 2468 2469 struct gomp_device_descr *devicep = resolve_device (device_num); 2470 if (devicep == NULL) 2471 return EINVAL; 2472 2473 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 2474 return EINVAL; 2475 2476 gomp_mutex_lock (&devicep->lock); 2477 2478 struct splay_tree_s *mem_map = &devicep->mem_map; 2479 struct splay_tree_key_s cur_node; 2480 int ret = EINVAL; 2481 2482 cur_node.host_start = (uintptr_t) ptr; 2483 cur_node.host_end = cur_node.host_start; 2484 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 2485 if (n 2486 && n->host_start == cur_node.host_start 2487 && n->refcount == REFCOUNT_INFINITY 2488 && n->tgt->tgt_start == 0 2489 && n->tgt->to_free == NULL 2490 && n->tgt->refcount == 1 2491 && n->tgt->list_count == 0) 2492 { 2493 splay_tree_remove (&devicep->mem_map, n); 2494 gomp_unmap_tgt (n->tgt); 2495 ret = 0; 2496 } 2497 2498 gomp_mutex_unlock (&devicep->lock); 2499 return ret; 2500 } 2501 2502 #ifdef PLUGIN_SUPPORT 2503 2504 /* This function tries to load a plugin for DEVICE. Name of plugin is passed 2505 in PLUGIN_NAME. 2506 The handles of the found functions are stored in the corresponding fields 2507 of DEVICE. The function returns TRUE on success and FALSE otherwise. */ 2508 2509 static bool 2510 gomp_load_plugin_for_device (struct gomp_device_descr *device, 2511 const char *plugin_name) 2512 { 2513 const char *err = NULL, *last_missing = NULL; 2514 2515 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); 2516 if (!plugin_handle) 2517 goto dl_fail; 2518 2519 /* Check if all required functions are available in the plugin and store 2520 their handlers. None of the symbols can legitimately be NULL, 2521 so we don't need to check dlerror all the time. */ 2522 #define DLSYM(f) \ 2523 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ 2524 goto dl_fail 2525 /* Similar, but missing functions are not an error. Return false if 2526 failed, true otherwise. */ 2527 #define DLSYM_OPT(f, n) \ 2528 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ 2529 || (last_missing = #n, 0)) 2530 2531 DLSYM (version); 2532 if (device->version_func () != GOMP_VERSION) 2533 { 2534 err = "plugin version mismatch"; 2535 goto fail; 2536 } 2537 2538 DLSYM (get_name); 2539 DLSYM (get_caps); 2540 DLSYM (get_type); 2541 DLSYM (get_num_devices); 2542 DLSYM (init_device); 2543 DLSYM (fini_device); 2544 DLSYM (load_image); 2545 DLSYM (unload_image); 2546 DLSYM (alloc); 2547 DLSYM (free); 2548 DLSYM (dev2host); 2549 DLSYM (host2dev); 2550 device->capabilities = device->get_caps_func (); 2551 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2552 { 2553 DLSYM (run); 2554 DLSYM (async_run); 2555 DLSYM_OPT (can_run, can_run); 2556 DLSYM (dev2dev); 2557 } 2558 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 2559 { 2560 if (!DLSYM_OPT (openacc.exec, openacc_exec) 2561 || !DLSYM_OPT (openacc.register_async_cleanup, 2562 openacc_register_async_cleanup) 2563 || !DLSYM_OPT (openacc.async_test, openacc_async_test) 2564 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all) 2565 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait) 2566 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async) 2567 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all) 2568 || !DLSYM_OPT (openacc.async_wait_all_async, 2569 openacc_async_wait_all_async) 2570 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async) 2571 || !DLSYM_OPT (openacc.create_thread_data, 2572 openacc_create_thread_data) 2573 || !DLSYM_OPT (openacc.destroy_thread_data, 2574 openacc_destroy_thread_data)) 2575 { 2576 /* Require all the OpenACC handlers if we have 2577 GOMP_OFFLOAD_CAP_OPENACC_200. */ 2578 err = "plugin missing OpenACC handler function"; 2579 goto fail; 2580 } 2581 2582 unsigned cuda = 0; 2583 cuda += DLSYM_OPT (openacc.cuda.get_current_device, 2584 openacc_cuda_get_current_device); 2585 cuda += DLSYM_OPT (openacc.cuda.get_current_context, 2586 openacc_cuda_get_current_context); 2587 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); 2588 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); 2589 if (cuda && cuda != 4) 2590 { 2591 /* Make sure all the CUDA functions are there if any of them are. */ 2592 err = "plugin missing OpenACC CUDA handler function"; 2593 goto fail; 2594 } 2595 } 2596 #undef DLSYM 2597 #undef DLSYM_OPT 2598 2599 return 1; 2600 2601 dl_fail: 2602 err = dlerror (); 2603 fail: 2604 gomp_error ("while loading %s: %s", plugin_name, err); 2605 if (last_missing) 2606 gomp_error ("missing function was %s", last_missing); 2607 if (plugin_handle) 2608 dlclose (plugin_handle); 2609 2610 return 0; 2611 } 2612 2613 /* This function finalizes all initialized devices. */ 2614 2615 static void 2616 gomp_target_fini (void) 2617 { 2618 int i; 2619 for (i = 0; i < num_devices; i++) 2620 { 2621 bool ret = true; 2622 struct gomp_device_descr *devicep = &devices[i]; 2623 gomp_mutex_lock (&devicep->lock); 2624 if (devicep->state == GOMP_DEVICE_INITIALIZED) 2625 { 2626 ret = devicep->fini_device_func (devicep->target_id); 2627 devicep->state = GOMP_DEVICE_FINALIZED; 2628 } 2629 gomp_mutex_unlock (&devicep->lock); 2630 if (!ret) 2631 gomp_fatal ("device finalization failed"); 2632 } 2633 } 2634 2635 /* This function initializes the runtime needed for offloading. 2636 It parses the list of offload targets and tries to load the plugins for 2637 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP 2638 will be set, and the array DEVICES initialized, containing descriptors for 2639 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows 2640 by the others. */ 2641 2642 static void 2643 gomp_target_init (void) 2644 { 2645 const char *prefix ="libgomp-plugin-"; 2646 const char *suffix = SONAME_SUFFIX (1); 2647 const char *cur, *next; 2648 char *plugin_name; 2649 int i, new_num_devices; 2650 2651 num_devices = 0; 2652 devices = NULL; 2653 2654 cur = OFFLOAD_TARGETS; 2655 if (*cur) 2656 do 2657 { 2658 struct gomp_device_descr current_device; 2659 size_t prefix_len, suffix_len, cur_len; 2660 2661 next = strchr (cur, ','); 2662 2663 prefix_len = strlen (prefix); 2664 cur_len = next ? next - cur : strlen (cur); 2665 suffix_len = strlen (suffix); 2666 2667 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); 2668 if (!plugin_name) 2669 { 2670 num_devices = 0; 2671 break; 2672 } 2673 2674 memcpy (plugin_name, prefix, prefix_len); 2675 memcpy (plugin_name + prefix_len, cur, cur_len); 2676 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); 2677 2678 if (gomp_load_plugin_for_device (¤t_device, plugin_name)) 2679 { 2680 new_num_devices = current_device.get_num_devices_func (); 2681 if (new_num_devices >= 1) 2682 { 2683 /* Augment DEVICES and NUM_DEVICES. */ 2684 2685 devices = realloc (devices, (num_devices + new_num_devices) 2686 * sizeof (struct gomp_device_descr)); 2687 if (!devices) 2688 { 2689 num_devices = 0; 2690 free (plugin_name); 2691 break; 2692 } 2693 2694 current_device.name = current_device.get_name_func (); 2695 /* current_device.capabilities has already been set. */ 2696 current_device.type = current_device.get_type_func (); 2697 current_device.mem_map.root = NULL; 2698 current_device.state = GOMP_DEVICE_UNINITIALIZED; 2699 current_device.openacc.data_environ = NULL; 2700 for (i = 0; i < new_num_devices; i++) 2701 { 2702 current_device.target_id = i; 2703 devices[num_devices] = current_device; 2704 gomp_mutex_init (&devices[num_devices].lock); 2705 num_devices++; 2706 } 2707 } 2708 } 2709 2710 free (plugin_name); 2711 cur = next + 1; 2712 } 2713 while (next); 2714 2715 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set 2716 NUM_DEVICES_OPENMP. */ 2717 struct gomp_device_descr *devices_s 2718 = malloc (num_devices * sizeof (struct gomp_device_descr)); 2719 if (!devices_s) 2720 { 2721 num_devices = 0; 2722 free (devices); 2723 devices = NULL; 2724 } 2725 num_devices_openmp = 0; 2726 for (i = 0; i < num_devices; i++) 2727 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2728 devices_s[num_devices_openmp++] = devices[i]; 2729 int num_devices_after_openmp = num_devices_openmp; 2730 for (i = 0; i < num_devices; i++) 2731 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 2732 devices_s[num_devices_after_openmp++] = devices[i]; 2733 free (devices); 2734 devices = devices_s; 2735 2736 for (i = 0; i < num_devices; i++) 2737 { 2738 /* The 'devices' array can be moved (by the realloc call) until we have 2739 found all the plugins, so registering with the OpenACC runtime (which 2740 takes a copy of the pointer argument) must be delayed until now. */ 2741 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 2742 goacc_register (&devices[i]); 2743 } 2744 2745 if (atexit (gomp_target_fini) != 0) 2746 gomp_fatal ("atexit failed"); 2747 } 2748 2749 #else /* PLUGIN_SUPPORT */ 2750 /* If dlfcn.h is unavailable we always fallback to host execution. 2751 GOMP_target* routines are just stubs for this case. */ 2752 static void 2753 gomp_target_init (void) 2754 { 2755 } 2756 #endif /* PLUGIN_SUPPORT */ 2757